@article{d1a686c38f774b5397ac26ec7cdf0347,
title = "Ontologizing health systems data at scale: making translational discovery a reality.",
abstract = "Common data models solve many challenges of standardizing electronic health record (EHR) data but are unable to semantically integrate all of the resources needed for deep phenotyping. Open Biological and Biomedical Ontology (OBO) Foundry ontologies provide computable representations of biological knowledge and enable the integration of heterogeneous data. However, mapping EHR data to OBO ontologies requires significant manual curation and domain expertise. We introduce OMOP2OBO, an algorithm for mapping Observational Medical Outcomes Partnership (OMOP) vocabularies to OBO ontologies. Using OMOP2OBO, we produced mappings for 92,367 conditions, 8611 drug ingredients, and 10,673 measurement results, which covered 68-99% of concepts used in clinical practice when examined across 24 hospitals. When used to phenotype rare disease patients, the mappings helped systematically identify undiagnosed patients who might benefit from genetic testing. By aligning OMOP vocabularies to OBO ontologies our algorithm presents new opportunities to advance EHR-based deep phenotyping.",
author = "Callahan, {Tiffany J} and Stefanski, {Adrianne L} and Wyrwa, {Jordan M} and Chenjie Zeng and Anna Ostropolets and Banda, {Juan M} and Baumgartner, {William A} and Boyce, {Richard D} and Elena Casiraghi and Coleman, {Ben D} and Collins, {Janine H} and {Deakyne Davies}, {Sara J} and Feinstein, {James A} and Lin, {Asiyah Y} and Blake Martin and Matentzoglu, {Nicolas A} and Daniella Meeker and Justin Reese and Jessica Sinclair and Taneja, {Sanya B} and Trinkley, {Katy E} and Vasilevsky, {Nicole A} and Williams, {Andrew E} and Zhang, {Xingmin A} and Denny, {Joshua C} and Ryan, {Patrick B} and George Hripcsak and Bennett, {Tellen D} and Haendel, {Melissa A} and Robinson, {Peter N.} and Hunter, {Lawrence E} and Kahn, {Michael G}",
note = "KAUST Repository Item: Exported on 2023-05-26 Acknowledgements: This work was primarily supported by funding from the National Library of Medicine (NLM T15LM009451 and T15LM007079) to T.J.C. and in part by the National Center for Advancing Translational Sciences (NCATS U24TR002306) to M.A.H. and P.N.R., the National Human Genome Research Institute (NHGRI 5RM1HG010860) to M.A.H., P.N.R., N.A.V., and N.A.M., the NLM (R01LM013400) to L.E.H. and (R01LM006910) G.H., the Medical Research Council (MR/P02002X/1) to J.H.C., the National Heart, Lung, and Blood Institute (NHLBI 1K23HL161352) to K.E.T., the NHGRI (5U24HG011449-02) to P.N.R., and the Intramural Research Program of the NHGRI (ZIA HG200417) to J.C.D. and C.Z. The authors thank colleagues at the Health Data Compass warehouse, Children{\textquoteright}s Hospital Colorado Research Informatics team, and the OMOP2OBO and Machine Learning Working Groups at the National COVID Cohort Collaboration for piloting testing, extending, and improving the mappings. The authors would also like to thank Drs. Paul Schofield (University of Oxford) and members of Dr. Robert Hoehndorf{\textquoteright}s (King Abdullah University of Science and Technology) lab for their feedback on the mappings. This publication acknowledges KAUST support, but has no KAUST affiliated authors.",
year = "2023",
month = may,
day = "19",
doi = "10.1038/s41746-023-00830-x",
language = "English (US)",
volume = "6",
journal = "npj Digital Medicine",
issn = "2398-6352",
publisher = "Nature Publishing Group",
number = "1",
}