@article {L{\^e} Cao044206, author = {Kim-Anh L{\^e} Cao and Mary-Ellen Costello and Vanessa Anne Lakis and Fran{\c c}ois Bartolo and Xin-Yi Chua and R{\'e}mi Brazeilles and Pascale Rondeau}, title = {mixMC: a multivariate statistical framework to gain insight into Microbial Communities}, elocation-id = {044206}, year = {2016}, doi = {10.1101/044206}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Culture independent techniques, such as shotgun metagenomics and 16S rRNA amplicon sequencing have dramatically changed the way we can examine microbial communities. Recently, changes in microbial community structure and dynamics have been associated with a growing list of human diseases. The identification and comparison of bacteria driving those changes requires the development of sound statistical tools, especially if microbial biomarkers are to be used in a clinical setting.We present mixMC, a novel multivariate data analysis framework for metagenomic biomarker discovery. mixMC accounts for the compositional nature of 16S data and enables detection of subtle differences when high inter-subject variability is present due to microbial sampling performed repeatedly on the same subjects but in multiple habitats. Through data dimension reduction the multivariate methods provide insightful graphical visualisations to characterise each type of environment in a detailed manner.We applied mixMC to 16S microbiome studies focusing on multiple body sites in healthy individuals, compared our results with existing statistical tools and illustrated added value of using multivariate methodologies to fully characterise and compare microbial communities.16S rRNA- 16S ribosomal RNACLR- Centered Log RatioCSS- Cumulative Sum ScalingHMP- Human Microbiome ProjectILR- Isometric Log RatioOTU- Operational Taxonomy UnitPCA- Principal Component AnalysisPCoA- Principal Coordinate AnalysissPLS-DA- sparse Partial Least Squares Discriminant AnalysisTSS- Total Sum ScalingZIG- Zero Inflated Gaussian}, URL = {https://www.biorxiv.org/content/early/2016/03/16/044206}, eprint = {https://www.biorxiv.org/content/early/2016/03/16/044206.full.pdf}, journal = {bioRxiv} }