@article {Wang067736, author = {Xiaolong Wang and Quanjiang Dong and Gang Chen and Jianye Zhang and Yongqiang Liu and Jinqiao Zhao and Haibo Peng and Yalei Wang and Yujia Cai and Xuxiang Wang and Chao Yang}, title = {Why are frameshift homologs widespread within and across species?}, elocation-id = {067736}, year = {2016}, doi = {10.1101/067736}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Frameshifted coding genes presumably yield truncated and dysfunctional proteins. We report that frameshift homologs, including frameshift orthologs and frameshift paralogs, are actually widespread within and across species. We proposed that protein coding genes have a ca-0.5 quasi-constant shiftability: given any protein coding sequence, at least 50\% of the amino acids remain conserved in a frameshifted protein sequence. In the natural genetic code, amino acid pairs assigned to frameshift codon substitutions are more conserved than those to random codon substitutions, and the frameshift tolerating ability of the natural genetic code ranks among the best 6\% of all compatible genetic codes. Hence, the shiftability of protein coding genes was mainly predefined by the standard genetic code, while additional sequence-level shiftability was achieved through biased usages of codons and codon pairs. We concluded that during early evolution the genetic code was symmetrically optimized for tolerate frameshifts, so that protein coding genes were endowed an inherent ability to tolerate frameshifting in both forward and backward directions.}, URL = {https://www.biorxiv.org/content/early/2016/08/25/067736}, eprint = {https://www.biorxiv.org/content/early/2016/08/25/067736.full.pdf}, journal = {bioRxiv} }