@article {Alonso060277, author = {Arnald Alonso and Brittany N. Lasseigne and Kelly Williams and Josh Nielsen and Ryne C. Ramaker and Andrew A Hardigan and Bobbi Johnston and Brian S. Roberts and Sara J. Cooper and Sara Marsal and Richard M. Myers}, title = {aRNApipe: A balanced, efficient and distributed pipeline for processing RNA-seq data in high performance computing environments}, elocation-id = {060277}, year = {2016}, doi = {10.1101/060277}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Summary The wide range of RNA-seq applications and their high computational needs require the development of pipelines orchestrating the entire workflow and optimizing usage of available computational resources. We present aRNApipe, a project-oriented pipeline for processing of RNA-seq data in high performance cluster environments. aRNApipe is highly modular and can be easily migrated to any high performance computing (HPC) environment. The current applications included in aRNApipe combine the essential RNA-seq primary analyses, including quality control metrics, transcript alignment, count generation, transcript fusion identification, and sequence variant calling. aRNApipe is project-oriented and dynamic so users can easily update analyses to include or exclude samples or enable additional processing modules. Workflow parameters are easily set using a single configuration file that provides centralized tracking of all analytical processes. Finally, aRNApipe incorporates interactive web reports for sample tracking and a tool for managing the genome assemblies available to perform an analysis.Availability and documentation https://github.com/HudsonAlpha/aRNAPipeContact rmyers{at}hudsonalpha.orgSupplementary information Supplementary data are available.}, URL = {https://www.biorxiv.org/content/early/2016/06/22/060277}, eprint = {https://www.biorxiv.org/content/early/2016/06/22/060277.full.pdf}, journal = {bioRxiv} }