@article {Irber056846, author = {Luiz C. Irber, Jr. and C. Titus Brown}, title = {Efficient cardinality estimation for k-mers in large DNA sequencing data sets}, elocation-id = {056846}, year = {2016}, doi = {10.1101/056846}, publisher = {Cold Spring Harbor Laboratory}, abstract = {We present an open implementation of the HyperLogLog cardinality estimation sketch for counting fixed-length substrings of DNA strings ({\textquotedblleft}k-mers{\textquotedblright}).The HyperLogLog sketch implementation is in C++ with a Python interface, and is distributed as part of the khmer software package. khmer is freely available from https://github.com/dib-lab/khmerunder a BSD License. The features presented here are included in version 1.4 and later.}, URL = {https://www.biorxiv.org/content/early/2016/06/07/056846}, eprint = {https://www.biorxiv.org/content/early/2016/06/07/056846.full.pdf}, journal = {bioRxiv} }