@article {Wan086033, author = {Fangping Wan and Jianyang (Michael) Zeng}, title = {Deep learning with feature embedding for compound-protein interaction prediction}, elocation-id = {086033}, year = {2016}, doi = {10.1101/086033}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Accurately identifying compound-protein interactions in silico can deepen our understanding of the mechanisms of drug action and significantly facilitate the drug discovery and development process. Traditional similarity-based computational models for compound-protein interaction prediction rarely exploit the latent features from current available large-scale unlabelled compound and protein data, and often limit their usage on relatively small-scale datasets. We propose a new scheme that combines feature embedding (a technique of representation learning) with deep learning for predicting compound-protein interactions. Our method automatically learns the low-dimensional implicit but expressive features for compounds and proteins from the massive amount of unlabelled data. Combining effective feature embedding with powerful deep learning techniques, our method provides a general computational pipeline for accurate compound-protein interaction prediction, even when the interaction knowledge of compounds and proteins is entirely unknown. Evaluations on current large-scale databases of the measured compound-protein affinities, such as ChEMBL and BindingDB, as well as known drug-target interactions from DrugBank have demonstrated the superior prediction performance of our method, and suggested that it can offer a useful tool for drug development and drug repositioning.}, URL = {https://www.biorxiv.org/content/early/2016/11/07/086033}, eprint = {https://www.biorxiv.org/content/early/2016/11/07/086033.full.pdf}, journal = {bioRxiv} }