@article {Cross027797, author = {Frederick R. Cross}, title = {Tying down loose ends in the Chlamydomonas genome}, elocation-id = {027797}, year = {2015}, doi = {10.1101/027797}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The Chlamydomonas genome has been sequenced, assembled and annotated to produce a rich resource for genetics and molecular biology in this well-studied model organism. The annotated genome is very rich in open reading frames upstream of the annotated coding sequence ({\textquoteleft}uORFs{\textquoteright}): almost three quarters of the assigned transcripts have at least one uORF, and frequently more than one. This is problematic with respect to the standard {\textquoteleft}scanning{\textquoteright} model for eukaryotic translation initiation. These uORFs can be grouped into three classes: class 1, initiating in-frame with the coding sequence (cds) (thus providing a potential in-frame N-terminal extension); class 2, initiating in the 5UT and terminating out-of-frame in the cds; and class 3, initiating and terminating within the 5UT. Multiple bioinformatics criteria (including analysis of Kozak consensus sequence agreement and BLASTP comparisons to the closely related Volvox genome, and statistical comparison to cds and to random-sequence controls) indicate that of ~4000 class 1 uORFs, approximately half are likely in vivo translation initiation sites. The proposed resulting N-terminal extensions in many cases will sharply alter the predicted biochemical properties of the encoded proteins. These results suggest significant modifications in ~2000 of the ~20,000 transcript models with respect to translation initiation and encoded peptides. In contrast, class 2 uORFs may be subject to purifying selection, and the existent ones (surviving selection) are likely inefficiently translated. Class 3 uORFs are remarkably similar to random sequence expectations with respect to size, number and composition and therefore may be largely selectively neutral; their very high abundance (found in more than half of transcripts, frequently with multiple uORFs per transcript) nevertheless suggests the possibility of translational regulation on a wide scale.}, URL = {https://www.biorxiv.org/content/early/2015/09/29/027797}, eprint = {https://www.biorxiv.org/content/early/2015/09/29/027797.full.pdf}, journal = {bioRxiv} }