@article {Loh052308, author = {Po-Ru Loh and Petr Danecek and Pier Francesco Palamara and Christian Fuchsberger and Yakir A Reshef and Hilary K Finucane and Sebastian Schoenherr and Lukas Forer and Shane McCarthy and Goncalo R Abecasis and Richard Durbin and Alkes L Price}, title = {Reference-based phasing using the Haplotype Reference Consortium panel}, elocation-id = {052308}, year = {2016}, doi = {10.1101/052308}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Haplotype phasing is a fundamental problem in medical and population genetics. Phasing is generally performed via statistical phasing within a genotyped cohort, an approach that can attain high accuracy in very large cohorts but attains lower accuracy in smaller cohorts. Here, we instead explore the paradigm of reference-based phasing. We introduce a new phasing algorithm, Eagle2, that attains high accuracy across a broad range of cohort sizes by efficiently leveraging information from large external reference panels (such as the Haplotype Reference Consortium, HRC) using a new data structure based on the positional BurrowsWheeler transform. We demonstrate that Eagle2 attains a ≈20x speedup and ≈10\% increase in accuracy compared to reference-based phasing using SHAPEIT2. On European-ancestry samples, Eagle2 with the HRC panel achieves \>2x the accuracy of 1000 Genomes-based phasing. Eagle2 is open source and freely available for HRC-based phasing via the Sanger Imputation Service and the Michigan Imputation Server.}, URL = {https://www.biorxiv.org/content/early/2016/07/07/052308}, eprint = {https://www.biorxiv.org/content/early/2016/07/07/052308.full.pdf}, journal = {bioRxiv} }