2025
Chaewon Kim; Sunyong Yoo
Abstract | Links | BibTeX | Tags: Bioinformatics, Generative model, Transcriptome
@conference{Kim2025,
title = {Predicting Drug-Induced Transcriptional Responses Using Latent Diffusion Model},
author = {Chaewon Kim and Sunyong Yoo},
url = {https://bmil.jnu.ac.kr/wp-content/uploads/2025/12/Chaewon-Kim-Sunyong-Yoo-Predicting-Drug-Induced-Transcriptional-Responses-Using-Latent-Diffusion-Model.pdf},
year = {2025},
date = {2025-12-17},
urldate = {2025-12-17},
publisher = {The 19th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Accurate prediction of drug-induced transcriptional responses is essential for drug discovery and precision medicine. Existing computational models, including encoder–decoder architectures and generative adversarial network-based approaches, achieve reasonable accuracy but often fail to account for gene–gene correlations and generalize to unseen conditions. Here, we present a latent diffusion model that combines a variational autoencoder (VAE) with a diffusion process. The VAE compresses gene expression (GE) profiles into a lowdimensional latent space, where the diffusion process learns the joint probability distribution of latent GE representations and their noisy intermediates. Learning these distributions allow the model to capture gene–gene correlations more effectively. Moreover, our model incorporates multiple perturbation conditions—including cell line, compound, dose, and time—to enhance generalization performance on unseen conditions. The reverse diffusion process is designed to predict both the mean and variance of the latent representations, which robustly enhances the fidelity of the generated GE profiles. The proposed model demonstrated the highest accuracy in reconstructing perturbed GE profiles compared to previous studies, achieving a root mean squared error (RMSE) of 1.340, a Pearson correlation coefficient of 0.832 and an R² score of 0.669. In addition, the proposed model demonstrated superior performance in preserving gene–gene correlation, as shown by correlation heatmaps, compared to existing approaches. To evaluate the biological relevance of generated transcriptional profiles, we conducted a half-maximal inhibitory concentration prediction task using the generated profiles as model inputs. Our model outperformed the baseline methods, achieving a RMSE of 1.335 and R2 score of 0.819. In conclusion, we demonstrated the potential of diffusion-based generative models as reliable and versatile tools for modeling transcriptional responses, with implications for drug discovery and precision medicine applications.},
keywords = {Bioinformatics, Generative model, Transcriptome},
pubstate = {published},
tppubtype = {conference}
}
Subhin Seomun; Sunyong Yoo
Abstract | Links | BibTeX | Tags: ADME, Bioinformatics, Deep learning
@conference{Seomun2025,
title = {Cross-species multi-task learning with molecular and ADME descriptors for liver microsomal metabolic stability},
author = {Subhin Seomun and Sunyong Yoo},
url = {https://bmil.jnu.ac.kr/wp-content/uploads/2025/12/Subhin-Seomun-Sunyong-Yoo-Cross-species-multi-task-learning-with-molecular-and-ADME-descriptors-for-liver-microsomal-metabolic-stability.pdf},
year = {2025},
date = {2025-12-17},
urldate = {2025-12-17},
publisher = {The 19th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Liver microsomal stability is a key determinant of in vivo compound exposure and efficacy. Although metabolic stability has been extensively studied, linking substructure destabilizing features to absorption, distribution, metabolism, and excretion (ADME) properties remains challenging. Moreover, single-species, single-modality models often generalize poorly. To address these limitations, we propose a cross-species multi-task learning framework that integrates multi-modal molecular representations to predict liver microsomal stability. Specifically, the model leverages three complementary modalities: SMILES-derived fingerprints, molecular graphs, and in silico ADME descriptors. These modalities are learned in a shared network using data from multiple species and subsequently fused via attention mechanisms to form a shared molecular representation, which captures conserved structuremetabolism relationships common across species. Species-specific network capture individual metabolic characteristics and stability predictions for human (HLM), rat (RLM), and mouse liver microsomal (MLM). Under stratified 10-fold cross-validation, mean AUROC was 0.770 ± 0.001 (HLM), 0.785 ± 0.001 (RLM), and 0.766 ± 0.001 (MLM). To understand the chemical basis of metabolic liability, we examined three multi-level perspectives. At the molecular property level, physicochemical descriptors related to enzyme interaction, permeability/transport, and the lipophilicity-polarity axis emerged as dominant predictive drivers. At the substructure level, to pinpoint specific sites of metabolic vulnerability, recurring destabilizing features were identified at alkenes and allylic/benzylic positions, while amide and carbamate carbonyl motifs conferred stability. To elucidate the underlying physicochemical mechanisms, these structural motifs were linked to systematic shifts in logP, solubility, bloodbrain barrier propensity, and efflux liability. Overall, these results indicate that the cross-species integrative model accurately predicts microsomal stability across human, rat, and mouse while providing chemically grounded explanations.},
keywords = {ADME, Bioinformatics, Deep learning},
pubstate = {published},
tppubtype = {conference}
}
Junyong Park; Sunyong Yoo
Abstract | Links | BibTeX | Tags: Bioinformatics, Generative model, Molecular design
@conference{Park2025b,
title = {Novel Molecular Design via a Scaffold-Aware Transformer with Multi-Scale Attention Mechanisms},
author = {Junyong Park and Sunyong Yoo},
url = {https://bmil.jnu.ac.kr/wp-content/uploads/2025/12/Junyoung-Park-Sunyong-Yoo-Novel-Molecular-Design-via-a-Scaffold-Aware-Transformer-with-Multi-Scale-Attention-Mechanisms.pdf},
year = {2025},
date = {2025-12-17},
publisher = {The 19th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Recent advancements in artificial intelligence have demonstrated great potential in accelerating drug discovery by exploring vast chemical spaces and predicting molecular properties. However, conventional molecular generation models have limitations in reflecting desired molecular structures, as they often fail to incorporate specific structural constraints or target properties directly into the generation process. To overcome these limitations, we propose a novel framework that integrates a transformer-based generative model and a graph attention network-based predictive model. The generative model produces molecules with desired structural characteristics by explicitly incorporating scaffold information, while the predictive model estimates the biological activity of the generated molecules. A cyclic learning structure enables the generative and predictive models to interact iteratively, facilitating continuous evaluation and feedback during training. In addition, a multi stage tournament selection with experience memory guides the subsequent training process. Our approach accelerates the identification of scaffold-consistent, high affinity candidates by exploring novel chemical variations around a user-specified scaffold. Experimental results show that the proposed scaffold-aware transformer achieves competitive validity, uniqueness, and novelty, and effectively generates novel compounds with high predicted binding affinity for biological targets. An attention-based analysis extracts atom-level importance scores and highlights the substructures that contribute to the predicted binding affinity, providing interpretable insights into structure-activity relationships. This study provides a practical and interpretable tool for scaffold-conditioned molecular generation.},
keywords = {Bioinformatics, Generative model, Molecular design},
pubstate = {published},
tppubtype = {conference}
}
김채원; 정명현; 김민건; 유선용
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Bioinformatics, Drugs, Transcriptome
@conference{nokey,
title = {Conditional Diffusion Model 기반 약물로 인한 전사체 반응 예측},
author = {김채원 and 정명현 and 김민건 and 유선용},
url = {https://bmil.jnu.ac.kr/wp-content/uploads/2025/07/김채원-Conditional-Diffusion-Model-기반-약물로-인한-전사체-반응-예측.pdf},
year = {2025},
date = {2025-07-04},
urldate = {2025-07-04},
booktitle = {2025 한국디지털콘텐츠학회 하계종합학술대회},
publisher = {한국디지털콘텐츠학회},
abstract = {본 논문에서는 Conditional Diffusion Model 기반 교란 조건을 고려한 전사체 변화 예측 심층 생성 모델을 소개한다 처리한 화합물 정보와 더불어 처리용량과 시간 세포주의 기저 유전자 발현 정보를 사용함으로써 정밀한 전사체 변화 예측을 가능하게 한다 따라서 본 모델이 생성한 전사체 변화 데이터를 활용함으로써 약물에 대한 이해도를 향상하고 신약 개발 및 정밀 의료 기술의 발전 등에 기여할 수 있는 가능성을 보여준다.},
keywords = {Artificial Intelligence, Bioinformatics, Drugs, Transcriptome},
pubstate = {published},
tppubtype = {conference}
}
2024
Yeabean Na; Junho Kim; Myung-Gyun Kang; Sunyong Yoo
Abstract | Links | BibTeX | Tags: Bioinformatics, Deep learning, Drugs
@conference{Yoo2024,
title = {A Multimodal Deep Learning Approach for Predicting Drug Metabolism According to the CYP2D6 Genetic Variation},
author = {Yeabean Na and Junho Kim and Myung-Gyun Kang and Sunyong Yoo},
url = {https://dtmbio.net/},
year = {2024},
date = {2024-01-02},
urldate = {2024-01-02},
publisher = {The 18th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Background Cytochrome P450 2D6 (CYP2D6) is involved in metabolizing up to 25% of the drugs commonly used in clinics. Characterized by high polymorphisms, CYP2D6 is one of the key pharmacogenes in pharmacogenomics. This genetic variability can lead to significant inter-patient differences in drug metabolism, resulting in differential therapeutic responses and adverse effects. However, conducting in vivo or in vitro experiments for each CYP2D6 variant across various drugs is time-consuming, ethically challenging, and expensive. Given these constraints, In silico modeling approaches for predicting the drug metabolism profiles of CYP2D6 variants are a critical necessity.
Methods A multimodal deep learning approach that combined CYP2D6 genotype data and drug structural information was used in this study. A Convolutional Neural Network (CNN) was used to encode the genotype data, and a Graph Convolutional Network (GCN) was used to decode the drug structures. These diverse data types were then integrated into a multimodal model to predict drug metabolism.
Results A comparative analysis was conducted between a CNN model utilizing solely the CYP2D6 genotype data and a multimodal model incorporating both genotype and drug-specific information. The multimodal approach demonstrated better performance across all evaluated metrics. An additional experiment predicting drug metabolism on unseen drug data also performed well.
Conclusions This model is anticipated to enhance the prediction of metabolic capacity in previously uncharacterized CYP2D6 variants, potentially reducing adverse drug reactions.},
keywords = {Bioinformatics, Deep learning, Drugs},
pubstate = {published},
tppubtype = {conference}
}
Methods A multimodal deep learning approach that combined CYP2D6 genotype data and drug structural information was used in this study. A Convolutional Neural Network (CNN) was used to encode the genotype data, and a Graph Convolutional Network (GCN) was used to decode the drug structures. These diverse data types were then integrated into a multimodal model to predict drug metabolism.
Results A comparative analysis was conducted between a CNN model utilizing solely the CYP2D6 genotype data and a multimodal model incorporating both genotype and drug-specific information. The multimodal approach demonstrated better performance across all evaluated metrics. An additional experiment predicting drug metabolism on unseen drug data also performed well.
Conclusions This model is anticipated to enhance the prediction of metabolic capacity in previously uncharacterized CYP2D6 variants, potentially reducing adverse drug reactions.
2022
Myeonghyeon Jeong; Sangjin Kim; Yewon Han; Jihyun Jeong; Dahwa Jung; Inyoung Choi; Sunyong Yoo
BibTeX | Tags: Attention mechanism, Bioinformatics, Deep learning
@conference{nokey,
title = {Attention-based Deep Neural Network for Predicting Fetotoxicity},
author = {Myeonghyeon Jeong and Sangjin Kim and Yewon Han and Jihyun Jeong and Dahwa Jung and Inyoung Choi and Sunyong Yoo},
year = {2022},
date = {2022-01-02},
urldate = {2022-01-02},
booktitle = {In the 10th International Conference on Big Data Applications and Services},
publisher = {The Korea Big Data Service Society},
keywords = {Attention mechanism, Bioinformatics, Deep learning},
pubstate = {published},
tppubtype = {conference}
}
2015
Moonshik Shin; Sungyoung Yoo; Suhyun Ha; Kyungrin Noh; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: ADME, Bioinformatics, Natural product
@conference{shin2015identifying,
title = {Identifying Potential Bioactive Compounds of Natural Products by Combining ADMET Prediction Methods},
author = {Moonshik Shin and Sungyoung Yoo and Suhyun Ha and Kyungrin Noh and Doheon Lee},
url = {https://dl.acm.org/doi/abs/10.1145/2811163.2811168},
doi = {10.1145/2811163.2811168},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {Proceedings of the ACM Ninth International Workshop on Data and Text Mining in Biomedical Informatics},
pages = {19–19},
publisher = {CIKM},
abstract = {Herbs consist of various chemical compounds. Thus, identifying potential bioactive compounds from those diversity is an important task for studies in the herb, food and natural products. Even though various computational approaches are developed for predicting bioactive compounds, the prediction performances are diverse due to different methods and dataset. Therefore, there is urgent demand for an approach that connotes the previous methods and identify potential bioactive compounds with high accuracy. To meet the demand, we proposed a filtering strategy that identifies potential bioactive compounds by combining previously developed computational methods which predict ADMET, such as Human Intestinal Absorption (HIA) and Caco-2 permeability. Our approach was evaluated on 930 compounds that are known as bioactive compounds, which were extracted from literature, DrugBank and Dr. Dukes phytochemical databases. By applying our filtering strategy, 97.5% of the known bioactive compounds were correctly predicted as bioactive. We examined whether our approach can distinguish the potential bioactive compound from the non-potential bioactive compounds with Fishers' exact test, and a reasonable p-value (3.806 x 10-9) was derived. For the next step, we are planning to develop a machine-learning based method to improve our filtering approach.},
keywords = {ADME, Bioinformatics, Natural product},
pubstate = {published},
tppubtype = {conference}
}