@article {Zhao068916, author = {Lu Zhao and Zhimin Liu and Sasha F. Levy and Song Wu}, title = {Bartender: an ultrafast and accurate clustering algorithm to count barcode and amplicon reads}, elocation-id = {068916}, year = {2016}, doi = {10.1101/068916}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Barcode sequencing (bar-seq) is a high-throughput, and cost effective method to assay large numbers of lineages or genotypes in complex cell pools. Because of its advantages, applications for bar-seq are quickly growing {\textendash} from using neutral random barcodes to study the evolution of microbes or cancer, to using pseudo-barcodes, such as shRNAs, sgRNAs, or transposon insertion libraries, to simultaneously screen large numbers of cell perturbations. However, the computational pipelines for bar-seq have not been well developed. Available methods, which use prior information and/or simple brute-force comparisons, are slow and often result in overclustering artifacts that group distinct barcodes together. Here, we developed Bartender: an ultrafast and accurate clustering algorithm to detect barcodes and their abundances from raw next-generation sequencing data. To improve speed and reduce unnecessary pairwise comparisons, Bartender employs a divide-and-conquer strategy that intelligently sorts barcode reads into distinct bins before performing comparisons. To improve accuracy and reduce over-clustering artifacts, Bartender employs a modified two-sample proportion test that uses information on both the cluster sequence distances and cluster sizes to make merging decisions. Additionally, Bartender includes a {\textquotedblleft}multiple time point{\textquotedblright} mode, which matches barcode clusters between different clustering runs for seamless handling of time course data. For both simulated and real data, Bartender clusters millions of unique barcodes in a few minutes at high accuracy (\>99.9\%), and is ~100-fold faster than previous methods. Bartender is a set of simple-to-use command line tools that can be performed on a laptop.Availability Bartender is available at no charge for non-commercial use at https://github.com/LaoZZZZZ/bartender-1.1.}, URL = {https://www.biorxiv.org/content/early/2016/08/10/068916}, eprint = {https://www.biorxiv.org/content/early/2016/08/10/068916.full.pdf}, journal = {bioRxiv} }