@article {Chen045070, author = {Mengjie Chen and Xiang Zhou}, title = {Controlling for confounding effects in single cell RNA sequencing studies using both control and target genes}, elocation-id = {045070}, year = {2016}, doi = {10.1101/045070}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Single cell RNA sequencing (scRNAseq) technique is becoming increasingly popular for unbiased and high-resolutional transcriptome analysis of heterogeneous cell populations. Despite its many advantages, scRNAseq, like any other genomic sequencing technique, is susceptible to the influence of confounding effects. Controlling for confounding effects in scRNAseq data is a crucial step for proper data normalization and accurate downstream analysis. Several recent methodological studies have demonstrated the use of control genes for controlling for confounding effects in scRNAseq studies; the control genes are used to infer the confounding effects, which are then used to normalize target genes of primary interest. However, these methods can be suboptimal as they ignore the rich information contained in the target genes. Here, we develop an alternative statistical method, which we refer to as scPLS, for more accurate inference of confounding effects. Our method is based on partial least squares and models control and target genes jointly to better infer and control for confounding effects. To accompany our method, we also develop a new,block-wise expectation maximization algorithm for scalable inference. Our algorithm is an order of magnitude faster than standard ones, making scPLS applicable to hundreds of cells and hundreds of thousands of genes. With extensive simulations and comparisons with other methods, we demonstrate the effectiveness of scPLS. Finally, we apply scPLS to analyze two scRNAseq data sets to illustrate its benefits in removing technical confounding effects as well as for removing cell cycle effects.}, URL = {https://www.biorxiv.org/content/early/2016/09/14/045070}, eprint = {https://www.biorxiv.org/content/early/2016/09/14/045070.full.pdf}, journal = {bioRxiv} }