@article {Szefer088310, author = {Elena Szefer and Donghuan Lu and Farouk Nathoo and Mirza Faisal Beg and Jinko Graham and for the Alzheimer{\textquoteright}s Disease Neuroimaging Initiative}, title = {Multivariate association between single-nucleotide polymorphisms in Alzgene linkage regions and structural changes in the brain: discovery, refinement and validation}, elocation-id = {088310}, year = {2016}, doi = {10.1101/088310}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Both genetic variants and brain region abnormalities are recognized to play a role in cognitive decline. We explore the association between single-nucleotide polymorphisms (SNPs) in linkage regions for Alzheimer{\textquoteright}s disease and rates of decline in brain structure using data from the Alzheimer{\textquoteright}s Disease Neuroimaging Initiative (ADNI).In an initial discovery stage, we assessed the presence of linear association between the minor allele counts of 75,845 SNPs in the Alzgene linkage regions and estimated rates of change in structural MRI measurements for 56 brain regions using an RV test. In a second, refinement stage, we reduced the number of SNPs using a bootstrap-enhanced sparse canonical correlation analysis (SCCA) with a fixed tuning parameter. Each SNP was assigned an importance measure proportional to the number of times it was estimated to have a nonzero coefficient in repeated re-sampling from the ADNI-1 sample. We created refined lists of SNPs based on importance probabilities greater than 50\% and 90\%, respectively. In a third, validation stage, we assessed the multivariate association between these refined lists of SNPs and the rates of structural change in an independent dataset comprised of the ADNIGO and ADNI-2 study samples.There was strong statistical evidence for linear association between the SNPs in the Alzgene linkage regions and the 56 imaging phenotypes in both the ADNI-1 and ADNIGO/2 samples (p \< 0.0001). The bootstrap-enhanced SCCA identified 1,694 priority SNPs with importance probabilities \> 50\% and 22 SNPs with importance probabilities \> 90\%. The 1,694 prioritized SNPs were associated with imaging phenotypes in the ADNI-1 data (p \< 0.001) and this association was replicated in the ADNIGO/2 data (p = 0.0021).This manuscript presents an analysis that addresses challenges in current imaging genetics studies such as biased sampling designs, high-dimensional data with low-signal, and discovery and validation of association in multivariate analysis. Genes corresponding to priority SNPs having the highest contribution to the RV coefficient test statistic in the validation data have previously been implicated or hypothesized to be implicated in AD, including GCLC, IDE, and STAMBP1andFAS. We hypothesize that the effect sizes of the 1,694 SNPs in the priority set are likely small, but further investigation within this set may advance understanding of the missing heritability in late-onset Alzheimer{\textquoteright}s disease.}, URL = {https://www.biorxiv.org/content/early/2016/11/17/088310}, eprint = {https://www.biorxiv.org/content/early/2016/11/17/088310.full.pdf}, journal = {bioRxiv} }