@article {Arthur031484, author = {Rudy Arthur and Jared O{\textquoteright}Connell and Ole Schulz-Trieglaff and Anthony J. Cox}, title = {Rapid Genotype Refinement for Whole-Genome Sequencing Data using Multi-Variate Normal Distributions}, elocation-id = {031484}, year = {2015}, doi = {10.1101/031484}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Whole-genome low-coverage sequencing has been combined with linkage-disequilibrium (LD) based genotype refinement to accurately and cost-effectively infer genotypes in large cohorts of individuals. Most genotype refinement methods are based on hidden Markov models, which are accurate but computationally expensive. We introduce an algorithm that models LD using a simple multivariate Gaussian distribution. The key feature of our algorithm is its speed, it is hundreds of times faster than other methods on the same data set and its scaling behaviour is linear in the number of samples. We demonstrate the performance of the method on both low-coverage and high-coverage samples.Availability: The source code is available at https://github.com/sequencing/marvinContact: rarthur{at}illumina.com}, URL = {https://www.biorxiv.org/content/early/2015/11/12/031484}, eprint = {https://www.biorxiv.org/content/early/2015/11/12/031484.full.pdf}, journal = {bioRxiv} }