@article {Gagliano000984, author = {Sarah A. Gagliano and Michael R. Barnes and Michael E. Weale and Jo Knight}, title = {A Bayesian Method to Incorporate Hundreds of Functional Characteristics with Association Evidence to Improve Variant Prioritization}, elocation-id = {000984}, year = {2013}, doi = {10.1101/000984}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The increasing quantity and quality of functional genomic information motivate the assessment and integration of these data with association data, including data originating from genome-wide association studies (GWAS). We used previously described GWAS signals ({\textquotedblleft}hits{\textquotedblright}) to train a regularized logistic model in order to predict SNP causality on the basis of a large multivariate functional dataset. We show how this model can be used to derive Bayes factors for integrating functional and association data into a combined Bayesian analysis. Functional characteristics were obtained from the Encyclopedia of DNA Elements (ENCODE), from published expression quantitative trait loci (eQTL), and from other sources of genome-wide characteristics. We trained the model using all GWAS signals combined, and also using phenotype specific signals for autoimmune, brain-related, cancer, and cardiovascular disorders. The non-phenotype specific and the autoimmune GWAS signals gave the most reliable results. We found SNPs with higher probabilities of causality from functional characteristics showed an enrichment of more significant p-values compared to all GWAS SNPs in three large GWAS studies of complex traits. We investigated the ability of our Bayesian method to improve the identification of true causal signals in a psoriasis GWAS dataset and found that combining functional data with association data improves the ability to prioritise novel hits. We used the predictions from the penalized logistic regression model to calculate Bayes factors relating to functional characteristics and supply these online alongside resources to integrate these data with association data.Author Summary Large-scale genetic studies have had success identifying genes that play a role in complex traits. Advanced statistical procedures suggest that there are still genetic variants to be discovered, but these variants are difficult to detect. Incorporating biological information that affect the amount of protein or other product produced can be used to prioritise the genetic variants in order to identify which are likely to be causal. The method proposed here uses such biological characteristics to predict which genetic variants are most likely to be causal for complex traits.}, URL = {https://www.biorxiv.org/content/early/2013/12/04/000984}, eprint = {https://www.biorxiv.org/content/early/2013/12/04/000984.full.pdf}, journal = {bioRxiv} }