@article {Andersson005447, author = {Robin Andersson and Peter Refsing Andersen and Eivind Valen and Leighton Core and Jette Bornholdt and Mette Boyd and Torben Heick Jensen and Albin Sandelin}, title = {Nuclear stability and transcriptional directionality separate functionally distinct RNA species}, elocation-id = {005447}, year = {2014}, doi = {10.1101/005447}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Mammalian genomes are pervasively transcribed, yielding a complex transcriptome with high variability in composition and cellular abundance. While recent efforts have identified thousands of new long non-coding (lnc) RNAs and demonstrated a complex transcriptional repertoire produced by protein-coding (pc) genes, limited progress has been made in distinguishing functional RNA from spurious transcription events. This is partly due to present RNA classification, which is typically based on technical rather than biochemical criteria. Here we devise a strategy to systematically categorize human RNAs by their sensitivity to the ribonucleolytic RNA exosome complex and by the nature of their transcription initiation. These measures are surprisingly effective at correctly classifying annotated transcripts, including lncRNAs of known function. The approach also identifies hundreds of uncharacterized stable lncRNAs, hidden among a vast majority of unstable transcripts. The predictive power of the approach promises to streamline the functional analysis of known and novel RNAs.An estimated \~{}75\% of mammalian DNA yields RNA; at least when considering multiple cell lines1{\textendash}4. In human cells, only \~{}50\% of this material is accounted for by pre-mRNA and conventional stable RNA (tRNA, rRNA, sn/snoRNA); the remaining part constitutes a population of poorly characterized lncRNA species5. The latter are mainly cell type-restricted2, suggesting that unknown regulatory RNAs may be found in this population. Among these, the intergenic (or intervening) lncRNAs (lincRNAs) have attracted attention due to successful functional characterization of a limited number of molecules (for recent reviews see refs.6{\textendash}10). Other lncRNAs include promoter upstream transcripts (PROMPTs), originating in antisense orientation from active protein-coding (pc) gene promoters11{\textendash}14 and RNAs produced from active enhancers14{\textendash}16 (eRNAs).Characterization of PROMPT and eRNA production has revealed that human pc gene promoters and enhancers can be divergently transcribed11,14,15,17{\textendash}19. A strand-bias in transcriptional directionality of pc gene promoters is apparent when considering stable RNA levels (i.e. seemingly producing robust amounts of mRNA in the sense direction and only little antisense PROMPT). This bias is established post-transcriptionally and governed by a decreased occurrence and utilization of early polyadenylation (pA) sites in the sense (mRNA) direction12,13. Such promoter-proximal pA sites trigger transcription termination and rapid transcript turnover by the 3{\textquoteright}-5{\textquoteright} exo- and endo-nucleolytic RNA exosome complex13. In general, many lncRNAs are suppressed post-transcriptionally by this mechanism20, considerably skewing their steady-state levels from what would be expected based on transcription initiation rates alone. Therefore, transcription units that are under evolutionary pressure to evade such termination and RNA decay will constitute prime candidates for producing functional lncRNAs requiring a certain copy number for their actions.Here we classify promoters of capped RNA species in HeLa cells by their transcriptional directionality, RNA exosome-sensitivity and abundance. We identify stable lncRNAs with the potential to function in trans and a population of annotated alternative promoters, which produce exosome-sensitive mRNAs. We project that this strategy and resource of classified promoters and associated RNAs will guide annotation of functional candidates among novel and known transcripts.}, URL = {https://www.biorxiv.org/content/early/2014/07/02/005447}, eprint = {https://www.biorxiv.org/content/early/2014/07/02/005447.full.pdf}, journal = {bioRxiv} }