@article {Norris014340, author = {Ryan W. Norris and Cory L. Strope and David M. McCandlish and Arlin Stoltzfus}, title = {Bayesian priors for tree calibration: Evaluating two new approaches based on fossil intervals}, elocation-id = {014340}, year = {2015}, doi = {10.1101/014340}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Background: Studies of diversification and trait evolution increasingly rely on combining molecular sequences and fossil dates to infer time-calibrated phylogenetic trees. Available calibration software provides many options for the shape of the prior probability distribution of ages at a node to be calibrated, but the question of how to assign a Bayesian prior from limited fossil data remains open.Results: We introduce two new methods for generating priors based upon (1) the interval between the two oldest fossils in a clade, i.e., the penultimate gap (PenG), and (2) the ghost lineage length (GLin), defined as the difference between the oldest fossils for each of two sister lineages. We show that PenG and GLin/2 are point estimates of the interval between the oldest fossil and the true age for the node. Furthermore, given either of these quantities, we derive a principled prior distribution for the true age. This prior is log-logistic, and can be implemented approximately in existing software. Using simulated data, we test these new methods against some other approaches.Conclusions: When implemented as approaches for assigning Bayesian priors, the PenG and GLin methods increase the accuracy of inferred divergence times, showing considerably more precision than the other methods tested, without significantly greater bias. When implemented as approaches to post-hoc scaling of a tree by linear regression, the PenG and GLin methods exhibit less bias than other methods tested. The new methods are simple to use and can be applied to a variety of studies that call for calibrated trees.aD11{\textendash} An abbreviated version of the method of Dornburg et al. (2011).GLin{\textendash}Ghost Lineage Length, the difference between the oldest fossil on one lineage, and the oldest fossil on a sister lineage. The GLin method uses GLin/2 in approximating the actual age of a node.M08{\textendash} Marshall{\textquoteright}s (2008) method.M08*{\textendash} Adaptation of Marshall{\textquoteright}s (2008) method for a Bayesian context.M08min{\textendash} Adaptation of Marshall{\textquoteright}s (2008) method for Bayesian context, preventing age estimates for a node that are more recent than known fossils at that node.Obs{\textendash} Observed age, the oldest known fossil belonging to a clade. The Obs method uses this value without correction as an estimate of the actual age of a node.PenG{\textendash} Penultimate Gap, the difference between the oldest fossil in a clade and the second oldest fossil. The PenG method uses this value when approximating the actual age of a node.UltG{\textendash} Ultimate Gap. The difference between the oldest known fossil and the actual origin of a clade.}, URL = {https://www.biorxiv.org/content/early/2015/01/24/014340}, eprint = {https://www.biorxiv.org/content/early/2015/01/24/014340.full.pdf}, journal = {bioRxiv} }