@article {Wang067736, author = {Xiaolong Wang and Quanjiang Dong and Gang Chen and Jianye Zhang and Yongqiang Liu and Jinqiao Zhao and Haibo Peng and Yalei Wang and Yujia Cai and Xuxiang Wang and Chao Yang}, title = {Why are frameshift homologs widespread within and across species?}, elocation-id = {067736}, year = {2017}, doi = {10.1101/067736}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Frameshifted coding genes yield truncated and dysfunctional proteins, frameshift mutations have been therefore considered as utterly harmful and of little importance for the evolution of novel proteins. However, frameshifted yet functional proteins and coding genes have been frequently observed. Here we report that frameshift homologs are widespread within a genome and across species. We showed that protein coding genes have a ca-0.5 quasi-constant shiftability: given any protein coding sequence, at least 50\% of the amino acids remain conserved in a frameshifted protein sequence. In the natural genetic code, amino acid pairs assigned to frameshift codon substitutions are more conservative than those to random codon substitutions, and the frameshift tolerability of the natural genetic code ranks among the best 6.3\% of all compatible genetic codes. Hence, the shiftability of coding genes was predefined by the genetic code, while additional sequence-level shiftability was achieved through biased usages of codons and codon pairs. We concluded that during early evolution the genetic code was optimized to tolerate frameshifting.}, URL = {https://www.biorxiv.org/content/early/2017/04/17/067736}, eprint = {https://www.biorxiv.org/content/early/2017/04/17/067736.full.pdf}, journal = {bioRxiv} }