@article {Jin022384, author = {Minliang Jin and Haijun Liu and Cheng He and Junjie Fu and Yingjie Xiao and Yuebin Wang and Weibo Xie and Guoying Wang and Jianbing Yan}, title = {Maize pan-transcriptome provides novel insights into genome complexity and quantitative trait variation}, elocation-id = {022384}, year = {2015}, doi = {10.1101/022384}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Variation in gene expression contributes to the diversity of phenotype. The construction of the pan-transcriptome is especially necessary for species with complex genomes, such as maize. However, knowledge of the regulation mechanisms and functional consequences of the pan-transcriptome is limited. In this study, we identified 13,382 nuclear expression presence and absence variation candidates (ePAVs, expressed in 5\%~95\% lines; based on the reference genome) by re-analyzing the RNA sequencing data from the kernels (15 days after pollination) of 368 maize diverse inbreds. It was estimated that only ~1\% of the ePAVs are explained by DNA sequence presence and absence variations (PAV). The ePAV genes tend to be regulated by distant eQTLs when compared with non-ePAV genes (called here core expression genes, expressed in more than 95\% lines). When the expression presence/absence status was used as the {\textquotedblleft} genotype{\textquotedblright} to perform genome-wide association study, 56 (0.42\%) ePAVs were significantly associated with 15 agronomic traits and 1,967 (14.74\%) with 526 metabolic traits, measured from the mature kernels. While the above was majorly based on the reference genome, by using a modified {\textquoteleft}assemble-then-align{\textquoteright} strategy, 2,355 high confidence novel sequences with a total length of 1.9Mb were found absent in the current B73 reference genome (v2). Ten randomly selected novel sequences were validated with genomic PCR. A simulation analysis suggested that the pan-transcriptome of the maize whole kernel is approaching a maximum value of 63,000 genes. Two novel validated sequences annotated as NBS_LRR like genes were found to associate with flavonoid content and their homologs in rice were also found to affect flavonoids and disease-resistance. Novel sequences absent in the present reference genome might be functionally important and deserve more attentions. This study provides novel perspectives and resources to discover maize quantitative trait variations and help us to better understand the kernel regulation networks, thus enhancing maize breeding.}, URL = {https://www.biorxiv.org/content/early/2015/07/12/022384}, eprint = {https://www.biorxiv.org/content/early/2015/07/12/022384.full.pdf}, journal = {bioRxiv} }