JOURNALS
2026
1.
Yunju Song; Hwan Choi; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Attention mechanism, Bioinformatics, Carcinogenicity, Deep learning, in silico, Systems biology
@article{Song2026,
title = {Carcinogenicity prediction via multi-task learning of cross-organ representations with attention mechanisms},
author = {Yunju Song and Hwan Choi and Sunyong Yoo},
url = {https://academic.oup.com/bib/article/27/3/bbag296/8702471?login=true},
doi = {10.1093/bib/bbag296},
issn = {1477-4054},
year = {2026},
date = {2026-06-04},
urldate = {2026-06-04},
journal = {Briefings in Bioinformatics},
volume = {27},
issue = {3},
pages = {bbag296},
abstract = {Cancer is caused by the uncontrolled growth and division of abnormal cells. In industrialized societies, chemical exposure is a leading cause of cancer. Since certain compounds induce cancer by damaging genes or affecting cellular metabolism, studying carcinogens is essential. However, previous studies used separate models for each organ and failed to capture carcinogenic features shared across organs, limiting generalization. Thus, this study developed a multi-task learning framework to predict organ-specific carcinogenicity in the liver, lung, stomach, and breast. This framework consisted of a shared layer and task-specific layers. The shared layer contains a graph attention network layer to make atom-level representations, along with parallel fully connected layers designed for each task combination. The resulting shared representations are passed to task-specific layers to predict organ-specific carcinogenicity. The training process followed stepwise learning, whereby the model was first trained using partially labeled data to capture cross-organ representations and determine initial weights. In the second step, fully labeled data for all organs were used for final training. The proposed multi-task model achieved superior performance in the liver, lung, and stomach tasks. Notably, it recorded the highest area under the receiver operating characteristic curve in the stomach task (0.7636), outperforming the single-task model (0.7055) and all comparative models (0.5527–0.7418). The highest area under the precision–recall curve was observed in the liver task (0.9646), surpassing the single-task model (0.9505) and all comparative models (0.9373–0.9621). We further analyzed molecules with high predicted carcinogenicity and identified critical substructures using an attention mechanism. This research can contribute to predicting organ-specific carcinogenicity of candidate chemicals in the early stages of drug development.
},
note = {Correspondence to Sunyong Yoo},
keywords = {Attention mechanism, Bioinformatics, Carcinogenicity, Deep learning, in silico, Systems biology},
pubstate = {published},
tppubtype = {article}
}
Cancer is caused by the uncontrolled growth and division of abnormal cells. In industrialized societies, chemical exposure is a leading cause of cancer. Since certain compounds induce cancer by damaging genes or affecting cellular metabolism, studying carcinogens is essential. However, previous studies used separate models for each organ and failed to capture carcinogenic features shared across organs, limiting generalization. Thus, this study developed a multi-task learning framework to predict organ-specific carcinogenicity in the liver, lung, stomach, and breast. This framework consisted of a shared layer and task-specific layers. The shared layer contains a graph attention network layer to make atom-level representations, along with parallel fully connected layers designed for each task combination. The resulting shared representations are passed to task-specific layers to predict organ-specific carcinogenicity. The training process followed stepwise learning, whereby the model was first trained using partially labeled data to capture cross-organ representations and determine initial weights. In the second step, fully labeled data for all organs were used for final training. The proposed multi-task model achieved superior performance in the liver, lung, and stomach tasks. Notably, it recorded the highest area under the receiver operating characteristic curve in the stomach task (0.7636), outperforming the single-task model (0.7055) and all comparative models (0.5527–0.7418). The highest area under the precision–recall curve was observed in the liver task (0.9646), surpassing the single-task model (0.9505) and all comparative models (0.9373–0.9621). We further analyzed molecules with high predicted carcinogenicity and identified critical substructures using an attention mechanism. This research can contribute to predicting organ-specific carcinogenicity of candidate chemicals in the early stages of drug development.