CONFERENCES
2025
2.
Chaewon Kim; Sunyong Yoo
Abstract | Links | BibTeX | Tags: Bioinformatics, Generative model, Transcriptome
@conference{Kim2025,
title = {Predicting Drug-Induced Transcriptional Responses Using Latent Diffusion Model},
author = {Chaewon Kim and Sunyong Yoo},
url = {https://bmil.jnu.ac.kr/wp-content/uploads/2025/12/Chaewon-Kim-Sunyong-Yoo-Predicting-Drug-Induced-Transcriptional-Responses-Using-Latent-Diffusion-Model.pdf},
year = {2025},
date = {2025-12-17},
urldate = {2025-12-17},
publisher = {The 19th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Accurate prediction of drug-induced transcriptional responses is essential for drug discovery and precision medicine. Existing computational models, including encoder–decoder architectures and generative adversarial network-based approaches, achieve reasonable accuracy but often fail to account for gene–gene correlations and generalize to unseen conditions. Here, we present a latent diffusion model that combines a variational autoencoder (VAE) with a diffusion process. The VAE compresses gene expression (GE) profiles into a lowdimensional latent space, where the diffusion process learns the joint probability distribution of latent GE representations and their noisy intermediates. Learning these distributions allow the model to capture gene–gene correlations more effectively. Moreover, our model incorporates multiple perturbation conditions—including cell line, compound, dose, and time—to enhance generalization performance on unseen conditions. The reverse diffusion process is designed to predict both the mean and variance of the latent representations, which robustly enhances the fidelity of the generated GE profiles. The proposed model demonstrated the highest accuracy in reconstructing perturbed GE profiles compared to previous studies, achieving a root mean squared error (RMSE) of 1.340, a Pearson correlation coefficient of 0.832 and an R² score of 0.669. In addition, the proposed model demonstrated superior performance in preserving gene–gene correlation, as shown by correlation heatmaps, compared to existing approaches. To evaluate the biological relevance of generated transcriptional profiles, we conducted a half-maximal inhibitory concentration prediction task using the generated profiles as model inputs. Our model outperformed the baseline methods, achieving a RMSE of 1.335 and R2 score of 0.819. In conclusion, we demonstrated the potential of diffusion-based generative models as reliable and versatile tools for modeling transcriptional responses, with implications for drug discovery and precision medicine applications.},
keywords = {Bioinformatics, Generative model, Transcriptome},
pubstate = {published},
tppubtype = {conference}
}
Accurate prediction of drug-induced transcriptional responses is essential for drug discovery and precision medicine. Existing computational models, including encoder–decoder architectures and generative adversarial network-based approaches, achieve reasonable accuracy but often fail to account for gene–gene correlations and generalize to unseen conditions. Here, we present a latent diffusion model that combines a variational autoencoder (VAE) with a diffusion process. The VAE compresses gene expression (GE) profiles into a lowdimensional latent space, where the diffusion process learns the joint probability distribution of latent GE representations and their noisy intermediates. Learning these distributions allow the model to capture gene–gene correlations more effectively. Moreover, our model incorporates multiple perturbation conditions—including cell line, compound, dose, and time—to enhance generalization performance on unseen conditions. The reverse diffusion process is designed to predict both the mean and variance of the latent representations, which robustly enhances the fidelity of the generated GE profiles. The proposed model demonstrated the highest accuracy in reconstructing perturbed GE profiles compared to previous studies, achieving a root mean squared error (RMSE) of 1.340, a Pearson correlation coefficient of 0.832 and an R² score of 0.669. In addition, the proposed model demonstrated superior performance in preserving gene–gene correlation, as shown by correlation heatmaps, compared to existing approaches. To evaluate the biological relevance of generated transcriptional profiles, we conducted a half-maximal inhibitory concentration prediction task using the generated profiles as model inputs. Our model outperformed the baseline methods, achieving a RMSE of 1.335 and R2 score of 0.819. In conclusion, we demonstrated the potential of diffusion-based generative models as reliable and versatile tools for modeling transcriptional responses, with implications for drug discovery and precision medicine applications.
1.
Junyong Park; Sunyong Yoo
Abstract | Links | BibTeX | Tags: Bioinformatics, Generative model, Molecular design
@conference{Park2025b,
title = {Novel Molecular Design via a Scaffold-Aware Transformer with Multi-Scale Attention Mechanisms},
author = {Junyong Park and Sunyong Yoo},
url = {https://bmil.jnu.ac.kr/wp-content/uploads/2025/12/Junyoung-Park-Sunyong-Yoo-Novel-Molecular-Design-via-a-Scaffold-Aware-Transformer-with-Multi-Scale-Attention-Mechanisms.pdf},
year = {2025},
date = {2025-12-17},
publisher = {The 19th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Recent advancements in artificial intelligence have demonstrated great potential in accelerating drug discovery by exploring vast chemical spaces and predicting molecular properties. However, conventional molecular generation models have limitations in reflecting desired molecular structures, as they often fail to incorporate specific structural constraints or target properties directly into the generation process. To overcome these limitations, we propose a novel framework that integrates a transformer-based generative model and a graph attention network-based predictive model. The generative model produces molecules with desired structural characteristics by explicitly incorporating scaffold information, while the predictive model estimates the biological activity of the generated molecules. A cyclic learning structure enables the generative and predictive models to interact iteratively, facilitating continuous evaluation and feedback during training. In addition, a multi stage tournament selection with experience memory guides the subsequent training process. Our approach accelerates the identification of scaffold-consistent, high affinity candidates by exploring novel chemical variations around a user-specified scaffold. Experimental results show that the proposed scaffold-aware transformer achieves competitive validity, uniqueness, and novelty, and effectively generates novel compounds with high predicted binding affinity for biological targets. An attention-based analysis extracts atom-level importance scores and highlights the substructures that contribute to the predicted binding affinity, providing interpretable insights into structure-activity relationships. This study provides a practical and interpretable tool for scaffold-conditioned molecular generation.},
keywords = {Bioinformatics, Generative model, Molecular design},
pubstate = {published},
tppubtype = {conference}
}
Recent advancements in artificial intelligence have demonstrated great potential in accelerating drug discovery by exploring vast chemical spaces and predicting molecular properties. However, conventional molecular generation models have limitations in reflecting desired molecular structures, as they often fail to incorporate specific structural constraints or target properties directly into the generation process. To overcome these limitations, we propose a novel framework that integrates a transformer-based generative model and a graph attention network-based predictive model. The generative model produces molecules with desired structural characteristics by explicitly incorporating scaffold information, while the predictive model estimates the biological activity of the generated molecules. A cyclic learning structure enables the generative and predictive models to interact iteratively, facilitating continuous evaluation and feedback during training. In addition, a multi stage tournament selection with experience memory guides the subsequent training process. Our approach accelerates the identification of scaffold-consistent, high affinity candidates by exploring novel chemical variations around a user-specified scaffold. Experimental results show that the proposed scaffold-aware transformer achieves competitive validity, uniqueness, and novelty, and effectively generates novel compounds with high predicted binding affinity for biological targets. An attention-based analysis extracts atom-level importance scores and highlights the substructures that contribute to the predicted binding affinity, providing interpretable insights into structure-activity relationships. This study provides a practical and interpretable tool for scaffold-conditioned molecular generation.