@article{0601cdf4649046c68cdd1e42e15c5a64,
title = "A deep learning system accurately classifies primary and metastatic cancers using passenger mutation patterns",
abstract = "In cancer, the primary tumour's organ of origin and histopathology are the strongest determinants of its clinical behaviour, but in 3% of cases a patient presents with a metastatic tumour and no obvious primary. Here, as part of the ICGC/TCGA Pan-Cancer Analysis of Whole Genomes (PCAWG) Consortium, we train a deep learning classifier to predict cancer type based on patterns of somatic passenger mutations detected in whole genome sequencing (WGS) of 2606 tumours representing 24 common cancer types produced by the PCAWG Consortium. Our classifier achieves an accuracy of 91% on held-out tumor samples and 88% and 83% respectively on independent primary and metastatic samples, roughly double the accuracy of trained pathologists when presented with a metastatic tumour without knowledge of the primary. Surprisingly, adding information on driver mutations reduced accuracy. Our results have clinical applicability, underscore how patterns of somatic passenger mutations encode the state of the cell of origin, and can inform future strategies to detect the source of circulating tumour DNA.",
keywords = "Cancer genomics, Cancer of unknown primary",
author = "Wei Jiao and Gurnit Atwal and Paz Polak and Rosa Karlic and Edwin Cuppen and Alexandra Danyi and {de Ridder}, Jeroen and {van Herpen}, Carla and Lolkema, {Martijn P} and Neeltje Steeghs and Gad Getz and Quaid Morris and Stein, {Lincoln D}",
note = "Funding Information: We would like to thank Irina Kalatskaya, Quang Trinh, Jared Simpson, Katie Hoadley and David Louis for their helpful comments during preparation of this paper. We also gratefully acknowledge the assistance of Drs. Ludmil B. Alexandrov, Mi Ni Huang, Arnoud Boot, Steven Gallinger, Julie Wilson, Haiko J. Bloemendal, Laurens Beerepoot, Steven G. Rozen and Michael R. Stratton in providing independent WGS primary and metastatic tumour SNV profiles used for validation. We also thank W.J., L.S. and Q.M. supported by funding from the Province of Ontario, Canada. QM{\textquoteright}s research was supported by a gift from NVIDIA foundation, an advised fund of the Silicon Valley Community Foundation. RK was supported by the European Structural and Investment Funds grant for the Croatian National Centre of Research Excellence in Personalized Healthcare (contract #KK.01.1.1.01.0010), Croatian National Centre of Research Excellence for Data Science and Advanced Cooperative Systems (contract KK.01.1.1.01.0009), the European Commission Seventh Framework Program (Integra-Life; grant 315997) and Croatian Science Foundation (grant IP-2014-09-6400). J.d.R. is supported by a NWO-Vidi grant (016.Vidi.178.023). We acknowledge the contributions of the many clinical networks across ICGC and TCGA who provided samples and data to the PCAWG Consortium, and the contributions of the Technical Working Group and the Germline Working Group of the PCAWG Consortium for collation, realignment and harmonised variant calling of the cancer genomes used in this study. We thank the patients and their families for their participation in the individual ICGC and TCGA projects. Publisher Copyright: {\textcopyright} 2020, The Author(s). Copyright: Copyright 2021 Elsevier B.V., All rights reserved.",
year = "2020",
month = feb,
day = "5",
doi = "10.1038/s41467-019-13825-8",
language = "English",
volume = "11",
journal = "Nature Communications",
issn = "2041-1723",
publisher = "Nature Publishing Group",
number = "1",
}