@article{febefdb0b68748c5990b2ad69d7bd74b,
title = "EvoRator: Prediction of Residue-level Evolutionary Rates from Protein Structures Using Machine Learning",
abstract = "Measuring evolutionary rates at the residue level is indispensable for gaining structural and functional insights into proteins. State-of-the-art tools for estimating rates take as input a large set of homologous proteins, a probabilistic model of evolution and a phylogenetic tree. However, a gap exists when only few or no homologous proteins can be found, e.g., orphan proteins. In addition, such tools do not take the three-dimensional (3D) structure of the protein into account. The association between the 3D structure and site-specific rates can be learned using machine-learning regression tools from a cohort of proteins for which both the structure and a large set of homologs exist. Here we present EvoRator, a user-friendly web server that implements a machine-learning regression algorithm to predict site-specific evolutionary rates from protein structures. We show that EvoRator outperforms predictions obtained using traditional physicochemical features, such as relative solvent accessibility and weighted contact number. We also demonstrate the application of EvoRator in three common scenarios that arise in protein evolution research: (1) orphan proteins for which no (or few) homologs exist; (2) When homologous sequences exist, our algorithm contrasts structure-based estimates of the evolutionary rates and the phylogeny-based estimates. This allows detecting sites that are likely conserved due to functional rather than structural constraints; (3) Algorithms that only rely on homologous sequence often fail to accurately measure the evolutionary rates of positions in gapped sequence alignments, which frequently occurs as a result of a clade-specific insertion. Our algorithm makes use of training data and known 3D structure of such gapped positions to predict their evolutionary rates. EvoRator is freely available for all users at: https://evorator.tau.ac.il/.",
keywords = "ConSurf, gapped alignment, machine learning, orphan genes, protein evolution, protein function, protein structure",
author = "Natan Nagar and {Ben Tal}, Nir and Tal Pupko",
note = "Publisher Copyright: {\textcopyright} 2022 Elsevier Ltd",
year = "2022",
month = jun,
day = "15",
doi = "10.1016/j.jmb.2022.167538",
language = "אנגלית",
volume = "434",
journal = "Journal of Molecular Biology",
issn = "0022-2836",
publisher = "Academic Press",
number = "11",
}