2025
Sunwoo Jung; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: ADR, Artificial Intelligence, Attention mechanism, Bioinformatics, DDI, Deep learning, Text mining
@article{Jung2024,
title = {Interpretable prediction of drug-drug interactions via text embedding in biomedical literature},
author = {Sunwoo Jung and Sunyong Yoo},
url = {https://www.sciencedirect.com/science/article/pii/S0010482524015816},
doi = {10.1016/j.compbiomed.2024.109496},
isbn = {0010-4825},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-01},
journal = {Computers in Biology and Medicine},
volume = {185},
pages = {109496},
abstract = {Polypharmacy is a promising approach for treating diseases, especially those with complex symptoms. However, it can lead to unexpected drug-drug interactions (DDIs), potentially reducing efficacy and triggering adverse drug reactions (ADRs). Predicting the risk of DDIs is crucial for ensuring safe drug use, particularly by identifying the types of DDIs and the mechanisms involved. Therefore, this study used biomedical literature to proposed hierarchical attention-based deep learning models to predict DDIs and their types. The proposed model consists of two components: drug embedding and DDI prediction. The drug embedding module extracts representation vectors that effectively capture drug properties using sentence and sequence embedding methods. For sentence embedding, a pre-trained biomedical language model is used to map drug-related sentences into vector space. For sequence embedding, sentence embedding vectors are sequentially fed into bidirectional long short-term memory with a hierarchical attention network, enabling the analysis of sentences relevant to DDI prediction while accounting for the order of the sentences. Finally, DDI prediction is performed using a deep neural network based on the sequence embedding vectors of a drug pair. Our model achieved high performances in the accuracy (0.85–0.90), AUROC (0.98–0.99), and AUPR (0.63–0.95) performance across 164 DDI types. Additionally, the proposed model showed improvements in up to 11 % in AUROC, and 8 % in AUPR. Furthermore, model interprets predictions by leveraging attention mechanisms and drug similarity. The results indicated that the model considered various factors beyond similarity to predict DDIs. These findings may help prevent unforeseen medical accidents and reduce healthcare costs by predicting detailed drug interaction types.},
note = {Correspondence to Sunyong Yoo},
keywords = {ADR, Artificial Intelligence, Attention mechanism, Bioinformatics, DDI, Deep learning, Text mining},
pubstate = {published},
tppubtype = {article}
}
Dohyeon Lee; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Artificial Intelligence, Attention mechanism, Bioinformatics, Cardiotoxicity, Deep learning, Graph attention network
@article{Lee2025,
title = {hERGAT: predicting hERG blockers using graph attention mechanism through atom- and molecule-level interaction analyses},
author = {Dohyeon Lee and Sunyong Yoo},
url = {https://link.springer.com/article/10.1186/s13321-025-00957-x?utm_source=rct_congratemailt&utm_medium=email&utm_campaign=oa_20250128&utm_content=10.1186/s13321-025-00957-x},
doi = {10.1186/s13321-025-00957-x},
issn = {1758-2946},
year = {2025},
date = {2025-01-28},
urldate = {2025-01-28},
journal = {Journal of Cheminformatics},
volume = {17},
number = {11},
abstract = {The human ether-a-go-go-related gene (hERG) channel plays a critical role in the electrical activity of the heart, and its blockers can cause serious cardiotoxic effects. Thus, screening for hERG channel blockers is a crucial step in the drug development process. Many in silico models have been developed to predict hERG blockers, which can efficiently save time and resources. However, previous methods have found it hard to achieve high performance and to interpret the predictive results. To overcome these challenges, we have proposed hERGAT, a graph neural network model with an attention mechanism, to consider compound interactions on atomic and molecular levels. In the atom-level interaction analysis, we applied a graph attention mechanism (GAT) that integrates information from neighboring nodes and their extended connections. The hERGAT employs a gated recurrent unit (GRU) with the GAT to learn information between more distant atoms. To confirm this, we performed clustering analysis and visualized a correlation heatmap, verifying the interactions between distant atoms were considered during the training process. In the molecule-level interaction analysis, the attention mechanism enables the target node to focus on the most relevant information, highlighting the molecular substructures that play crucial roles in predicting hERG blockers. Through a literature review, we confirmed that highlighted substructures have a significant role in determining the chemical and biological characteristics related to hERG activity. Furthermore, we integrated physicochemical properties into our hERGAT model to improve the performance. Our model achieved an area under the receiver operating characteristic of 0.907 and an area under the precision-recall of 0.904, demonstrating its effectiveness in modeling hERG activity and offering a reliable framework for optimizing drug safety in early development stages.},
note = {Correspondence to Sunyong Yoo},
keywords = {Artificial Intelligence, Attention mechanism, Bioinformatics, Cardiotoxicity, Deep learning, Graph attention network},
pubstate = {published},
tppubtype = {article}
}
박준영; 유선용
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Drugs, Transformer
@article{박준영;유선용2025,
title = {화합물의 골격구조를 활용한 Transformer 기반 새로운 분자 설계},
author = {박준영 and 유선용},
url = {http://journal.dcs.or.kr/_common/do.php?a=full&b=12&bidx=3950&aidx=43776},
doi = {10.9728/dcs.2025.26.1.217},
issn = {1598-2009},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {디지털콘텐츠학회논문지},
volume = {26},
number = {1},
pages = {217-223},
abstract = {전통적인 신약 개발은 새로운 약물을 시장에 출시하기까지 많은 시간과 막대한 비용이 소요되며, 높은 실패율로 인해 효율성이 낮다는 문제가 있다. 이러한 문제를 해결하기 위해 생성 모델을 활용한 혁신적인 접근법이 주목받고 있다. 본 연구에서는 트랜스포머 디코더 구조를 기반으로 화합물의 구조 정보를 문자열로 학습하여 새로운 화합물 구조를 생성하는 모델을 제안한다. 특히, 화합물에서 추출한 골격 구조(scaffold)를 임베딩하여 모델 입력에 포함함으로써, 결합 및 원자 정보와 골격 구조를 동시에 처리하였다. 벤치마크 데이터셋을 사용한 평가 결과, 골격 구조 임베딩을 적용한 모델이 데이터셋 별로 유효성 지표에서 0.964, 0.986의 우수한 성능을 보였다. 본 연구는 분자 생성 모델에 골격 구조 임베딩을 도입함으로써, 화학적 규칙을 준수하는 분자를 효과적으로 생성할 수 있는 방법을 제시하였으며, 신약 개발 분야에서 AI 기반 분자 설계의 효율성을 높이는 데 기여할 것으로 기대된다.},
keywords = {Bioinformatics, Drugs, Transformer},
pubstate = {published},
tppubtype = {article}
}
2024
Hyeon Jae Lee; Kyeong Jin Kim; Soo-yeon Park; Kwanyong Choi; Jaeho Pyee; Sunyong Yoo; Ji Yeon Kim
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Gut permeability, Inflammatory bowel disease, Network analysis
@article{lee2024enhancing,
title = {Enhancing intestinal health with germinated oats: Bioinformatics and compound profiling insights into a novel approach for managing inflammatory bowel disease},
author = {Hyeon Jae Lee and Kyeong Jin Kim and Soo-yeon Park and Kwanyong Choi and Jaeho Pyee and Sunyong Yoo and Ji Yeon Kim},
url = {https://www.sciencedirect.com/science/article/pii/S221242922401263X},
doi = {10.1016/j.fbio.2024.104833},
year = {2024},
date = {2024-10-01},
urldate = {2024-10-01},
journal = {Food Bioscience},
volume = {61},
pages = {104833},
publisher = {Elsevier},
abstract = {Oats are widely recognized for their numerous health benefits, particularly regarding their anti-inflammatory properties. However, research exploring their specific effects on intestinal permeability and tight junction (TJ) integrity in the context of inflammatory bowel disease (IBD) has been limited. This study aimed to investigate the therapeutic efficacy of germinated oat extract (GOE) in managing IBD, a condition marked by persistent gastrointestinal inflammation and increasing global prevalence. The identified compounds were used to predict target biomarkers and mechanisms related to IBD via bioinformatics analysis and validated using in vitro models. In this study, we used network biology and chemical informatics approaches to predict target biomarkers and their molecular mechanisms. The predicted biomarkers were validated for their effectiveness using a cellular model of intestinal inflammation. The effectiveness of treatment with GOE was validated via in vitro studies, which demonstrated significant enhancement in transepithelial electrical resistance (TEER) and a reduction in fluorescein isothiocyanate (FITC) permeability. Analysis of the mRNA expression of IBD-associated biomarkers in Caco-2 cells demonstrated a significant increase in the mRNA levels of TJ proteins, including TJP1, TJP2, occludin, claudin-1 and claudin-3 compared to the inflammatory group. Furthermore, treatment with GOE markedly reduced the mRNA expression levels of proinflammatory cytokines such as TNF-α, IL-6, and CXCL8. The combination of COCONUT and chemical profiling analysis provided insights into the fundamental molecular mechanisms of GOE. These results underscore the potential of systematically using big data-driven network biology to analyze the effect of food components, highlighting GOE as a promising dietary intervention for IBD.},
note = {Correspondence to Ji Yeon Kim},
keywords = {Bioinformatics, Gut permeability, Inflammatory bowel disease, Network analysis},
pubstate = {published},
tppubtype = {article}
}
Myeonghyeon Jeong; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Attention mechanism, Bioinformatics, Deep learning, Fetotoxicity, in silico, Interpretability
@article{jeong2024fetoml,
title = {FetoML: Interpretable predictions of the fetotoxicity of drugs based on machine learning approaches},
author = {Myeonghyeon Jeong and Sunyong Yoo},
url = {https://onlinelibrary.wiley.com/doi/full/10.1002/minf.202300312},
doi = {10.1002/minf.202300312},
issn = {1868-1743},
year = {2024},
date = {2024-03-03},
urldate = {2024-03-03},
journal = {Molecular Informatics},
volume = {43},
number = {6},
pages = {e202300312},
publisher = {Wiley Online Library},
abstract = {Pregnant females may use medications to manage health problems that develop during pregnancy or that they had prior to pregnancy. However, using medications during pregnancy has a potential risk to the fetus. Assessing the fetotoxicity of drugs is essential to ensure safe treatments, but the current process is challenged by ethical issues, time, and cost. Therefore, the need for in silico models to efficiently assess the fetotoxicity of drugs has recently emerged. Previous studies have proposed successful machine learning models for fetotoxicity prediction and even suggest molecular substructures that are possibly associated with fetotoxicity risks or protective effects. However, the interpretation of the decisions of the models on fetotoxicity prediction for each drug is still insufficient. This study constructed machine learning-based models that can predict the fetotoxicity of drugs while providing explanations for the decisions. For this, permutation feature importance was used to identify the general features that the model made significant in predicting the fetotoxicity of drugs. In addition, features associated with fetotoxicity for each drug were analyzed using the attention mechanism. The predictive performance of all the constructed models was significantly high (AUROC: 0.854-0.974, AUPR: 0.890-0.975). Furthermore, we conducted literature reviews on the predicted important features and found that they were highly associated with fetotoxicity. We expect that our model will benefit fetotoxicity research by providing an evaluation of fetotoxicity risks for drugs or drug candidates, along with an interpretation of that prediction.},
note = {Correspondence to Sunyong Yoo},
keywords = {Attention mechanism, Bioinformatics, Deep learning, Fetotoxicity, in silico, Interpretability},
pubstate = {published},
tppubtype = {article}
}
Sunyong Yoo; Myeonghyeon Jeong; Subhin Seomun; Kiseong Kim; Youngmahn Han
Abstract | Links | BibTeX | Dimensions | Tags: Amino acids, Attention mechanism, Bioinformatics, Coronaviruses, Deep learning, Immune system, Lymphocytes, Predictive models, Proteins, Transformer
@article{yoo2024interpretable,
title = {Interpretable Prediction of SARS-CoV-2 Epitope-specific TCR Recognition Using a Pre-Trained Protein Language Model},
author = {Sunyong Yoo and Myeonghyeon Jeong and Subhin Seomun and Kiseong Kim and Youngmahn Han},
url = {https://ieeexplore.ieee.org/abstract/document/10443062},
doi = {10.1109/TCBB.2024.3368046},
year = {2024},
date = {2024-02-21},
urldate = {2024-02-21},
journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
volume = {21},
issue = {3},
pages = {428-438},
publisher = {IEEE},
abstract = {The emergence of the novel coronavirus, designated as severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2), has posed a significant threat to public health worldwide. There has been progress in reducing hospitalizations and deaths due to SARS-CoV-2. However, challenges stem from the emergence of SARS-CoV-2 variants, which exhibit high transmission rates, increased disease severity, and the ability to evade humoral immunity. Epitope-specific T-cell receptor (TCR) recognition is key in determining the T-cell immunogenicity for SARS-CoV-2 epitopes. Although several data-driven methods for predicting epitope-specific TCR recognition have been proposed, they remain challenging due to the enormous diversity of TCRs and the lack of available training data. Self-supervised transfer learning has recently been proven useful for extracting information from unlabeled protein sequences, increasing the predictive performance of fine-tuned models, and using a relatively small amount of training data. This study presents a deep-learning model generated by fine-tuning pre-trained protein embeddings from a large corpus of protein sequences. The fine-tuned model showed markedly high predictive performance and outperformed the recent Gaussian process-based prediction model. The output attentions captured by the deep-learning model suggested critical amino acid positions in the SARS-CoV-2 epitope-specific TCRβ sequences that are highly associated with the viral escape of T-cell immune response.},
note = {Correspondence to Sunyong Yoo},
keywords = {Amino acids, Attention mechanism, Bioinformatics, Coronaviruses, Deep learning, Immune system, Lymphocytes, Predictive models, Proteins, Transformer},
pubstate = {published},
tppubtype = {article}
}
Soyeon Lee; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Artificial Intelligence, Attention mechanism, Bioinformatics, Deep learning, Drug-induced liver injury, Feature importance, Hepatotoxicity, in silico
@article{lee2024interdili,
title = {InterDILI: interpretable prediction of drug-induced liver injury through permutation feature importance and attention mechanism},
author = {Soyeon Lee and Sunyong Yoo},
url = {https://link.springer.com/article/10.1186/s13321-023-00796-8},
doi = {10.1186/s13321-023-00796-8},
year = {2024},
date = {2024-01-03},
urldate = {2024-01-03},
journal = {Journal of Cheminformatics},
volume = {16},
number = {1},
pages = {1},
publisher = {Springer},
abstract = {Safety is one of the important factors constraining the distribution of clinical drugs on the market. Drug-induced liver injury (DILI) is the leading cause of safety problems produced by drug side effects. Therefore, the DILI risk of approved drugs and potential drug candidates should be assessed. Currently, in vivo and in vitro methods are used to test DILI risk, but both methods are labor-intensive, time-consuming, and expensive. To overcome these problems, many in silico methods for DILI prediction have been suggested. Previous studies have shown that DILI prediction models can be utilized as prescreening tools, and they achieved a good performance. However, there are still limitations in interpreting the prediction results. Therefore, this study focused on interpreting the model prediction to analyze which features could potentially cause DILI. For this, five publicly available datasets were collected to train and test the model. Then, various machine learning methods were applied using substructure and physicochemical descriptors as inputs and the DILI label as the output. The interpretation of feature importance was analyzed by recognizing the following general-to-specific patterns: (i) identifying general important features of the overall DILI predictions, and (ii) highlighting specific molecular substructures which were highly related to the DILI prediction for each compound. The results indicated that the model not only captured the previously known properties to be related to DILI but also proposed a new DILI potential substructural of physicochemical properties. The models for the DILI prediction achieved an area under the receiver operating characteristic (AUROC) of 0.88–0.97 and an area under the Precision-Recall curve (AUPRC) of 0.81–0.95. From this, we hope the proposed models can help identify the potential DILI risk of drug candidates at an early stage and offer valuable insights for drug development.},
note = {Correspondence to Sunyong Yoo},
keywords = {Artificial Intelligence, Attention mechanism, Bioinformatics, Deep learning, Drug-induced liver injury, Feature importance, Hepatotoxicity, in silico},
pubstate = {published},
tppubtype = {article}
}
Yeabean Na; Junho Kim; Myung-Gyun Kang; Sunyong Yoo
Abstract | Links | BibTeX | Tags: Bioinformatics, Deep learning, Drugs
@conference{Yoo2024,
title = {A Multimodal Deep Learning Approach for Predicting Drug Metabolism According to the CYP2D6 Genetic Variation},
author = {Yeabean Na and Junho Kim and Myung-Gyun Kang and Sunyong Yoo},
url = {https://dtmbio.net/},
year = {2024},
date = {2024-01-02},
urldate = {2024-01-02},
publisher = {The 18th International Conference on Data and Text Mining in Biomedical Informatics},
abstract = {Background Cytochrome P450 2D6 (CYP2D6) is involved in metabolizing up to 25% of the drugs commonly used in clinics. Characterized by high polymorphisms, CYP2D6 is one of the key pharmacogenes in pharmacogenomics. This genetic variability can lead to significant inter-patient differences in drug metabolism, resulting in differential therapeutic responses and adverse effects. However, conducting in vivo or in vitro experiments for each CYP2D6 variant across various drugs is time-consuming, ethically challenging, and expensive. Given these constraints, In silico modeling approaches for predicting the drug metabolism profiles of CYP2D6 variants are a critical necessity.
Methods A multimodal deep learning approach that combined CYP2D6 genotype data and drug structural information was used in this study. A Convolutional Neural Network (CNN) was used to encode the genotype data, and a Graph Convolutional Network (GCN) was used to decode the drug structures. These diverse data types were then integrated into a multimodal model to predict drug metabolism.
Results A comparative analysis was conducted between a CNN model utilizing solely the CYP2D6 genotype data and a multimodal model incorporating both genotype and drug-specific information. The multimodal approach demonstrated better performance across all evaluated metrics. An additional experiment predicting drug metabolism on unseen drug data also performed well.
Conclusions This model is anticipated to enhance the prediction of metabolic capacity in previously uncharacterized CYP2D6 variants, potentially reducing adverse drug reactions.},
keywords = {Bioinformatics, Deep learning, Drugs},
pubstate = {published},
tppubtype = {conference}
}
Methods A multimodal deep learning approach that combined CYP2D6 genotype data and drug structural information was used in this study. A Convolutional Neural Network (CNN) was used to encode the genotype data, and a Graph Convolutional Network (GCN) was used to decode the drug structures. These diverse data types were then integrated into a multimodal model to predict drug metabolism.
Results A comparative analysis was conducted between a CNN model utilizing solely the CYP2D6 genotype data and a multimodal model incorporating both genotype and drug-specific information. The multimodal approach demonstrated better performance across all evaluated metrics. An additional experiment predicting drug metabolism on unseen drug data also performed well.
Conclusions This model is anticipated to enhance the prediction of metabolic capacity in previously uncharacterized CYP2D6 variants, potentially reducing adverse drug reactions.
이도현; 유선용
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Cardiotoxicity, Graph attention network
@article{nokey,
title = {메시지 패싱 그래프 기반 딥러닝 모델을 활용한 화합물의 심장독성 예측},
author = {이도현 and 유선용},
url = {https://www.dbpia.co.kr/journal/articleDetail?nodeId=NODE11956044},
doi = {10.9728/dcs.2024.25.10.2961},
isbn = {1598-2009},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {한국디지털콘텐츠학회},
volume = {25},
number = {10},
pages = {2961-2968},
abstract = {hERG 채널은 심장의 전기 활동에 필수적이며, 이 채널을 차단하는 물질은 심각한 심장 독성 효과를 일으킬 수 있다. 인실리코 예측 모델은 hERG 차단제를 효율적으로 선별할 수 있어 시간과 자원을 절약할 수 있다. 이전 접근법은 예측 결과를 해석하고 분자 구조-기능 관계를 이해하는 데 어렵다. 본 연구에서는 공개 데이터베이스로부터 화합물을 수집하여 12,920개의 데이터셋을 구축 하였다. 화합물의 그래프 구조를 고려하는 그래프 신경망(GNN) 가운데 메시지 패싱 신경망(MPNN)을 활용하여 특징 벡터를 추출하고, 이를 구조적ㆍ물리화학적 특성과 결합하여 최종 hERG 차단제를 예측하였다. 해당 모델은 AUROC는 0.864 (±0.009), AUPR은 0.907 (±0.010)의 성능을 달성하였다. 실험 결과, 제안된 모델은 그래프 특징 벡터를 통합하여 분자 특성을 효과적으로 반영하고 분자 간의 관계를 예측하여 hERG 차단제를 예측할 수 있음을 시사한다. 본 연구는 약물 개발과정에서 예비 도구로 활용되어 심장독성을 조기에 평가할 수 있을 것이다.},
note = {Correspondence to Sunyong Yoo},
keywords = {Bioinformatics, Cardiotoxicity, Graph attention network},
pubstate = {published},
tppubtype = {article}
}
2023
Jinmyung Jung; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Breast cancer, Feature importance, Gene expression, Machine learning, Metastasis marker
@article{jung2023identification,
title = {Identification of Breast Cancer Metastasis Markers from Gene Expression Profiles Using Machine Learning Approaches},
author = {Jinmyung Jung and Sunyong Yoo},
url = {https://www.mdpi.com/2073-4425/14/9/1820},
doi = {10.3390/genes14091820},
year = {2023},
date = {2023-09-20},
urldate = {2023-09-20},
journal = {Genes},
volume = {14},
number = {9},
pages = {1820},
publisher = {MDPI},
abstract = {Cancer metastasis accounts for approximately 90% of cancer deaths, and elucidating markers in metastasis is the first step in its prevention. To characterize metastasis marker genes (MGs) of breast cancer, XGBoost models that classify metastasis status were trained with gene expression profiles from TCGA. Then, a metastasis score (MS) was assigned to each gene by calculating the inner product between the feature importance and the AUC performance of the models. As a result, 54, 202, and 357 genes with the highest MS were characterized as MGs by empirical p-value cutoffs of 0.001, 0.005, and 0.01, respectively. The three sets of MGs were compared with those from existing metastasis marker databases, which provided significant results in most comparisons (p-value < 0.05). They were also significantly enriched in biological processes associated with breast cancer metastasis. The three MGs, SPPL2C, KRT23, and RGS7, showed highly significant results (p-value < 0.01) in the survival analysis. The MGs that could not be identified by statistical analysis (e.g., GOLM1, ELAVL1, UBP1, and AZGP1), as well as the MGs with the highest MS (e.g., ZNF676, FAM163B, LDOC2, IRF1, and STK40), were verified via the literature. Additionally, we checked how close the MGs were to each other in the protein–protein interaction networks. We expect that the characterized markers will help understand and prevent breast cancer metastasis.},
note = {Correspondence to Sunyong Yoo},
keywords = {Bioinformatics, Breast cancer, Feature importance, Gene expression, Machine learning, Metastasis marker},
pubstate = {published},
tppubtype = {article}
}
2022
Myeonghyeon Jeong; Sangjin Kim; Yewon Han; Jihyun Jeong; Dahwa Jung; Inyoung Choi; Sunyong Yoo
BibTeX | Tags: Attention mechanism, Bioinformatics, Deep learning
@conference{nokey,
title = {Attention-based Deep Neural Network for Predicting Fetotoxicity},
author = {Myeonghyeon Jeong and Sangjin Kim and Yewon Han and Jihyun Jeong and Dahwa Jung and Inyoung Choi and Sunyong Yoo},
year = {2022},
date = {2022-01-02},
urldate = {2022-01-02},
booktitle = {In the 10th International Conference on Big Data Applications and Services},
publisher = {The Korea Big Data Service Society},
keywords = {Attention mechanism, Bioinformatics, Deep learning},
pubstate = {published},
tppubtype = {conference}
}
2020
Sunyong Yoo; Hyung Chae Yang; Seongyeong Lee; Jaewook Shin; Seyoung Min; Eunjoo Lee; Minkeun Song; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Chemical property, Deep learning, Molecular interaction, Natural product, Network analysis, Text mining
@article{10.3389/fphar.2020.584875,
title = {A Deep Learning-Based Approach for Identifying the Medicinal Uses of Plant-Derived Natural Compounds},
author = {Sunyong Yoo and Hyung Chae Yang and Seongyeong Lee and Jaewook Shin and Seyoung Min and Eunjoo Lee and Minkeun Song and Doheon Lee},
url = {https://www.frontiersin.org/journals/pharmacology/articles/10.3389/fphar.2020.584875},
doi = {10.3389/fphar.2020.584875},
issn = {1663-9812},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Frontiers in Pharmacology},
volume = {11},
pages = {584875},
abstract = {Medicinal plants and their extracts have been used as important sources for drug discovery. In particular, plant-derived natural compounds, including phytochemicals, antioxidants, vitamins, and minerals, are gaining attention as they promote health and prevent disease. Although several in vitro methods have been developed to confirm the biological activities of natural compounds, there is still considerable room to reduce time and cost. To overcome these limitations, several in silico methods have been proposed for conducting large-scale analysis, but they are still limited in terms of dealing with incomplete and heterogeneous natural compound data. Here, we propose a deep learning-based approach to identify the medicinal uses of natural compounds by exploiting massive and heterogeneous drug and natural compound data. The rationale behind this approach is that deep learning can effectively utilize heterogeneous features to alleviate incomplete information. Based on latent knowledge, molecular interactions, and chemical property features, we generated 686 dimensional features for 4,507 natural compounds and 2,882 approved and investigational drugs. The deep learning model was trained using the generated features and verified drug indication information. When the features of natural compounds were applied as input to the trained model, potential efficacies were successfully predicted with high accuracy, sensitivity, and specificity.},
keywords = {Bioinformatics, Chemical property, Deep learning, Molecular interaction, Natural product, Network analysis, Text mining},
pubstate = {published},
tppubtype = {article}
}
2018
Sunyong Yoo; Kwansoo Kim; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Chemical property, Ethnopharmacology, Herbal medicine, Molecular analysis, Network analysis, Phytochemical
@article{yoo2018discovering,
title = {Discovering health benefits of phytochemicals with integrated analysis of the molecular network, chemical properties and ethnopharmacological evidence},
author = {Sunyong Yoo and Kwansoo Kim and Hojung Nam and Doheon Lee},
url = {https://www.mdpi.com/2072-6643/10/8/1042},
doi = {10.3390/nu10081042},
year = {2018},
date = {2018-08-08},
urldate = {2018-08-08},
journal = {Nutrients},
volume = {10},
number = {8},
pages = {1042},
publisher = {MDPI},
abstract = {Identifying the health benefits of phytochemicals is an essential step in drug and functional food development. While many in vitro screening methods have been developed to identify the health effects of phytochemicals, there is still room for improvement because of high cost and low productivity. Therefore, researchers have alternatively proposed in silico methods, primarily based on three types of approaches; utilizing molecular, chemical or ethnopharmacological information. Although each approach has its own strength in analyzing the characteristics of phytochemicals, previous studies have not considered them all together. Here, we apply an integrated in silico analysis to identify the potential health benefits of phytochemicals based on molecular analysis and chemical properties as well as ethnopharmacological evidence. From the molecular analysis, we found an average of 415.6 health effects for 591 phytochemicals. We further investigated ethnopharmacological evidence of phytochemicals and found that on average 129.1 (31%) of the predicted health effects had ethnopharmacological evidence. Lastly, we investigated chemical properties to confirm whether they are orally bio-available, drug available or effective on certain tissues. The evaluation results indicate that the health effects can be predicted more accurately by cooperatively considering the molecular analysis, chemical properties and ethnopharmacological evidence.},
keywords = {Bioinformatics, Chemical property, Ethnopharmacology, Herbal medicine, Molecular analysis, Network analysis, Phytochemical},
pubstate = {published},
tppubtype = {article}
}
Kyungrin Noh; Sunyong Yoo; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Medicinal Compound, Metabolite, Natural product
@article{noh2018systematic,
title = {A systematic approach to identify therapeutic effects of natural products based on human metabolite information},
author = {Kyungrin Noh and Sunyong Yoo and Doheon Lee},
url = {https://link.springer.com/article/10.1186/s12859-018-2196-0},
doi = {10.1186/s12859-018-2196-0},
year = {2018},
date = {2018-06-13},
urldate = {2018-06-13},
journal = {BMC bioinformatics},
volume = {19},
number = {205},
pages = {49–55},
publisher = {Springer},
abstract = {Background
Natural products have been widely investigated in the drug development field. Their traditional use cases as medicinal agents and their resemblance of our endogenous compounds show the possibility of new drug development. Many researchers have focused on identifying therapeutic effects of natural products, yet the resemblance of natural products and human metabolites has been rarely touched.
Methods
We propose a novel method which predicts therapeutic effects of natural products based on their similarity with human metabolites. In this study, we compare the structure, target and phenotype similarities between natural products and human metabolites to capture molecular and phenotypic properties of both compounds. With the generated similarity features, we train support vector machine model to identify similar natural product and human metabolite pairs. The known functions of human metabolites are then mapped to the paired natural products to predict their therapeutic effects.
Results
With our selected three feature sets, structure, target and phenotype similarities, our trained model successfully paired similar natural products and human metabolites. When applied to the natural product derived drugs, we could successfully identify their indications with high specificity and sensitivity. We further validated the found therapeutic effects of natural products with the literature evidence.
Conclusions
These results suggest that our model can match natural products to similar human metabolites and provide possible therapeutic effects of natural products. By utilizing the similar human metabolite information, we expect to find new indications of natural products which could not be covered by previous in silico methods.},
keywords = {Bioinformatics, Medicinal Compound, Metabolite, Natural product},
pubstate = {published},
tppubtype = {article}
}
Natural products have been widely investigated in the drug development field. Their traditional use cases as medicinal agents and their resemblance of our endogenous compounds show the possibility of new drug development. Many researchers have focused on identifying therapeutic effects of natural products, yet the resemblance of natural products and human metabolites has been rarely touched.
Methods
We propose a novel method which predicts therapeutic effects of natural products based on their similarity with human metabolites. In this study, we compare the structure, target and phenotype similarities between natural products and human metabolites to capture molecular and phenotypic properties of both compounds. With the generated similarity features, we train support vector machine model to identify similar natural product and human metabolite pairs. The known functions of human metabolites are then mapped to the paired natural products to predict their therapeutic effects.
Results
With our selected three feature sets, structure, target and phenotype similarities, our trained model successfully paired similar natural products and human metabolites. When applied to the natural product derived drugs, we could successfully identify their indications with high specificity and sensitivity. We further validated the found therapeutic effects of natural products with the literature evidence.
Conclusions
These results suggest that our model can match natural products to similar human metabolites and provide possible therapeutic effects of natural products. By utilizing the similar human metabolite information, we expect to find new indications of natural products which could not be covered by previous in silico methods.
Sunyong Yoo; Kyungrin Noh; Moonshik Shin; Junseok Park; Kwang-Hyung Lee; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: ADR, Bioinformatics, Drugs, Network analysis
@article{yoo2018silico,
title = {In silico profiling of systemic effects of drugs to predict unexpected interactions},
author = {Sunyong Yoo and Kyungrin Noh and Moonshik Shin and Junseok Park and Kwang-Hyung Lee and Hojung Nam and Doheon Lee},
url = {https://www.nature.com/articles/s41598-018-19614-5},
doi = {10.1038/s41598-018-19614-5},
year = {2018},
date = {2018-01-25},
urldate = {2018-01-25},
journal = {Scientific Reports},
volume = {8},
number = {1},
pages = {1612},
publisher = {Nature Publishing Group UK London},
abstract = {Identifying unexpected drug interactions is an essential step in drug development. Most studies focus on predicting whether a drug pair interacts or is effective on a certain disease without considering the mechanism of action (MoA). Here, we introduce a novel method to infer effects and interactions of drug pairs with MoA based on the profiling of systemic effects of drugs. By investigating propagated drug effects from the molecular and phenotypic networks, we constructed profiles of 5,441 approved and investigational drugs for 3,833 phenotypes. Our analysis indicates that highly connected phenotypes between drug profiles represent the potential effects of drug pairs and the drug pairs with strong potential effects are more likely to interact. When applied to drug interactions with verified effects, both therapeutic and adverse effects have been successfully identified with high specificity and sensitivity. Finally, tracing drug interactions in molecular and phenotypic networks allows us to understand the MoA.},
keywords = {ADR, Bioinformatics, Drugs, Network analysis},
pubstate = {published},
tppubtype = {article}
}
2016
Jongsoo Keum; Sunyong Yoo; Doheon Lee; Hojung Nam
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Database, Herbal medicine, Target proteins
@article{keum2016prediction,
title = {Prediction of compound-target interactions of natural products using large-scale drug and protein information},
author = {Jongsoo Keum and Sunyong Yoo and Doheon Lee and Hojung Nam},
url = {https://link.springer.com/article/10.1186/s12859-016-1081-y},
doi = {10.1186/s12859-016-1081-y},
year = {2016},
date = {2016-07-28},
urldate = {2016-07-28},
journal = {BMC bioinformatics},
volume = {17},
number = {219},
pages = {417–425},
publisher = {Springer},
abstract = {Background
Verifying the proteins that are targeted by compounds of natural herbs will be helpful to select natural herb-based drug candidates. However, this entails a great deal of effort to clarify the interaction throughout in vitro or in vivo experiments. In this light, in silico prediction of the interactions between compounds and target proteins can help ease the efforts.
Results
In this study, we performed in silico predictions of herbal compound target identification. First, data related to compounds, target proteins, and interactions between them are taken from the DrugBank database. Then we characterized six classes of compound-target interaction in humans including G-protein-coupled receptors (GPCRs), ion channel, enzymes, receptors, transporters, and other proteins. Also, classification-prediction models that predict the interactions between compounds and target proteins through a machine learning method were constructed using these matrices. As a result, AUC values of six classes are 0.94, 0.93, 0.90, 0.89, 0.91, and 0.76 respectively. Finally, the interactions of compounds from natural products were predicted using the constructed classification models. Furthermore, from our predicted results, we confirmed that several important disease related proteins were predicted as targets of natural herbal compounds.
Conclusions
We constructed classification-prediction models that predict the interactions between compounds and target proteins. The constructed models showed good prediction performances, and numbers of potential natural compounds target proteins were predicted from our results.},
keywords = {Bioinformatics, Database, Herbal medicine, Target proteins},
pubstate = {published},
tppubtype = {article}
}
Verifying the proteins that are targeted by compounds of natural herbs will be helpful to select natural herb-based drug candidates. However, this entails a great deal of effort to clarify the interaction throughout in vitro or in vivo experiments. In this light, in silico prediction of the interactions between compounds and target proteins can help ease the efforts.
Results
In this study, we performed in silico predictions of herbal compound target identification. First, data related to compounds, target proteins, and interactions between them are taken from the DrugBank database. Then we characterized six classes of compound-target interaction in humans including G-protein-coupled receptors (GPCRs), ion channel, enzymes, receptors, transporters, and other proteins. Also, classification-prediction models that predict the interactions between compounds and target proteins through a machine learning method were constructed using these matrices. As a result, AUC values of six classes are 0.94, 0.93, 0.90, 0.89, 0.91, and 0.76 respectively. Finally, the interactions of compounds from natural products were predicted using the constructed classification models. Furthermore, from our predicted results, we confirmed that several important disease related proteins were predicted as targets of natural herbal compounds.
Conclusions
We constructed classification-prediction models that predict the interactions between compounds and target proteins. The constructed models showed good prediction performances, and numbers of potential natural compounds target proteins were predicted from our results.
2015
Moonshik Shin; Sungyoung Yoo; Suhyun Ha; Kyungrin Noh; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: ADME, Bioinformatics, Natural product
@conference{shin2015identifying,
title = {Identifying Potential Bioactive Compounds of Natural Products by Combining ADMET Prediction Methods},
author = {Moonshik Shin and Sungyoung Yoo and Suhyun Ha and Kyungrin Noh and Doheon Lee},
url = {https://dl.acm.org/doi/abs/10.1145/2811163.2811168},
doi = {10.1145/2811163.2811168},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {Proceedings of the ACM Ninth International Workshop on Data and Text Mining in Biomedical Informatics},
pages = {19–19},
publisher = {CIKM},
abstract = {Herbs consist of various chemical compounds. Thus, identifying potential bioactive compounds from those diversity is an important task for studies in the herb, food and natural products. Even though various computational approaches are developed for predicting bioactive compounds, the prediction performances are diverse due to different methods and dataset. Therefore, there is urgent demand for an approach that connotes the previous methods and identify potential bioactive compounds with high accuracy. To meet the demand, we proposed a filtering strategy that identifies potential bioactive compounds by combining previously developed computational methods which predict ADMET, such as Human Intestinal Absorption (HIA) and Caco-2 permeability. Our approach was evaluated on 930 compounds that are known as bioactive compounds, which were extracted from literature, DrugBank and Dr. Dukes phytochemical databases. By applying our filtering strategy, 97.5% of the known bioactive compounds were correctly predicted as bioactive. We examined whether our approach can distinguish the potential bioactive compound from the non-potential bioactive compounds with Fishers' exact test, and a reasonable p-value (3.806 x 10-9) was derived. For the next step, we are planning to develop a machine-learning based method to improve our filtering approach.},
keywords = {ADME, Bioinformatics, Natural product},
pubstate = {published},
tppubtype = {conference}
}