@article {Alborzi022343, author = {Seyed Ziaeddin Alborzi and Marie-Dominique Devignes and David W. Ritchie}, title = {EC-PSI: Associating Enzyme Commission Numbers with Pfam Domains}, elocation-id = {022343}, year = {2015}, doi = {10.1101/022343}, publisher = {Cold Spring Harbor Laboratory}, abstract = {With the growing number of protein structures in the protein data bank (PDB), there is a need to annotate these structures at the domain level in order to relate protein structure to protein function. Thanks to the SIFTS database, many PDB chains are now cross-referenced with Pfam domains and enzyme commission (EC) numbers. However, these annotations do not include any explicit relationship between individual Pfam domains and EC numbers. This article presents a novel statistical training-based method called EC-PSI that can automatically infer high confidence associations between EC numbers and Pfam domains directly from EC-chain associations from SIFTS and from EC-sequence associations from the SwissProt, and TrEMBL databases. By collecting and integrating these existing EC-chain/sequence annotations, our approach is able to infer a total of 8,329 direct EC-Pfam associations with an overall F-measure of 0.819 with respect to the manually curated InterPro database, which we treat here as a {\textquotedblleft}gold standard{\textquotedblright} reference dataset. Thus, compared to the 1,493 EC-Pfam associations in InterPro, our approach provides a way to find over six times as many high quality EC-Pfam associations completely automatically.}, URL = {https://www.biorxiv.org/content/early/2015/07/10/022343}, eprint = {https://www.biorxiv.org/content/early/2015/07/10/022343.full.pdf}, journal = {bioRxiv} }