PUBLICATIONS
2024
2.
Soyeon Lee; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Artificial Intelligence, Attention mechanism, Bioinformatics, Deep learning, Drug-induced liver injury, Feature importance, Hepatotoxicity, in silico
@article{lee2024interdili,
title = {InterDILI: interpretable prediction of drug-induced liver injury through permutation feature importance and attention mechanism},
author = {Soyeon Lee and Sunyong Yoo},
url = {https://link.springer.com/article/10.1186/s13321-023-00796-8},
doi = {10.1186/s13321-023-00796-8},
year = {2024},
date = {2024-01-03},
urldate = {2024-01-03},
journal = {Journal of Cheminformatics},
volume = {16},
number = {1},
pages = {1},
publisher = {Springer},
abstract = {Safety is one of the important factors constraining the distribution of clinical drugs on the market. Drug-induced liver injury (DILI) is the leading cause of safety problems produced by drug side effects. Therefore, the DILI risk of approved drugs and potential drug candidates should be assessed. Currently, in vivo and in vitro methods are used to test DILI risk, but both methods are labor-intensive, time-consuming, and expensive. To overcome these problems, many in silico methods for DILI prediction have been suggested. Previous studies have shown that DILI prediction models can be utilized as prescreening tools, and they achieved a good performance. However, there are still limitations in interpreting the prediction results. Therefore, this study focused on interpreting the model prediction to analyze which features could potentially cause DILI. For this, five publicly available datasets were collected to train and test the model. Then, various machine learning methods were applied using substructure and physicochemical descriptors as inputs and the DILI label as the output. The interpretation of feature importance was analyzed by recognizing the following general-to-specific patterns: (i) identifying general important features of the overall DILI predictions, and (ii) highlighting specific molecular substructures which were highly related to the DILI prediction for each compound. The results indicated that the model not only captured the previously known properties to be related to DILI but also proposed a new DILI potential substructural of physicochemical properties. The models for the DILI prediction achieved an area under the receiver operating characteristic (AUROC) of 0.88–0.97 and an area under the Precision-Recall curve (AUPRC) of 0.81–0.95. From this, we hope the proposed models can help identify the potential DILI risk of drug candidates at an early stage and offer valuable insights for drug development.},
note = {Correspondence to Sunyong Yoo},
keywords = {Artificial Intelligence, Attention mechanism, Bioinformatics, Deep learning, Drug-induced liver injury, Feature importance, Hepatotoxicity, in silico},
pubstate = {published},
tppubtype = {article}
}
Safety is one of the important factors constraining the distribution of clinical drugs on the market. Drug-induced liver injury (DILI) is the leading cause of safety problems produced by drug side effects. Therefore, the DILI risk of approved drugs and potential drug candidates should be assessed. Currently, in vivo and in vitro methods are used to test DILI risk, but both methods are labor-intensive, time-consuming, and expensive. To overcome these problems, many in silico methods for DILI prediction have been suggested. Previous studies have shown that DILI prediction models can be utilized as prescreening tools, and they achieved a good performance. However, there are still limitations in interpreting the prediction results. Therefore, this study focused on interpreting the model prediction to analyze which features could potentially cause DILI. For this, five publicly available datasets were collected to train and test the model. Then, various machine learning methods were applied using substructure and physicochemical descriptors as inputs and the DILI label as the output. The interpretation of feature importance was analyzed by recognizing the following general-to-specific patterns: (i) identifying general important features of the overall DILI predictions, and (ii) highlighting specific molecular substructures which were highly related to the DILI prediction for each compound. The results indicated that the model not only captured the previously known properties to be related to DILI but also proposed a new DILI potential substructural of physicochemical properties. The models for the DILI prediction achieved an area under the receiver operating characteristic (AUROC) of 0.88–0.97 and an area under the Precision-Recall curve (AUPRC) of 0.81–0.95. From this, we hope the proposed models can help identify the potential DILI risk of drug candidates at an early stage and offer valuable insights for drug development.
2023
1.
이소연; 유선용
Abstract | Links | BibTeX | Dimensions | Tags: Hepatotoxicity, Machine learning
@article{이소연2023기계학습을,
title = {기계학습을 활용한 화합물의 약인성 간 손상 예측 방법 연구},
author = {이소연 and 유선용},
url = {https://www.dbpia.co.kr/pdf/pdfView.do?nodeId=NODE11519759&googleIPSandBox=false&mark=0&minRead=10&ipRange=false&b2cLoginYN=false&icstClss=010000&isPDFSizeAllowed=true&nodeHistoryTotalCnt=2&accessgl=Y&language=ko_KR&hasTopBanner=true},
doi = {10.5626/JOK.2023.50.9.777},
issn = {2383-6296},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {정보과학회논문지},
volume = {50},
number = {9},
pages = {777–783},
abstract = {약 약인성 간 손상은 임상시험용 의약품이 시장에 유통되는 것을 막는 요인 중 하나이다. 따라서 사전에 화합물의 약인성 간 손상 위험 평가가 필요하다. 안전성을 평가하기 위해 생체 내 (in vivo) 및 시험관 내 시험 방법(in vitro)이 사용되지만 이들은 시간과 비용이 많이 든다. 본 연구에서는 위의 문제를 극복하고자 random forest, light gradient boosting machine, logistic regression 모델을 제안한다. 모델은 입력으로 화합물의 분자 구조와 물리화학적 특징을 사용하고 출력으로 약인성 간 손상을 예측한다. 최적의 모델은 평가 지표에서 전반적으로 좋은 성능을 보인 random forest였다. 본 연구에서 제안된 모델은 신약 후보물질의 잠재적인 간 손상을 미리 파악함으로써 신약 개발 과정에 도움을 줄 수 있을 것으로 기대된다.},
note = {Correspondence to Sunyong Yoo},
keywords = {Hepatotoxicity, Machine learning},
pubstate = {published},
tppubtype = {article}
}
약 약인성 간 손상은 임상시험용 의약품이 시장에 유통되는 것을 막는 요인 중 하나이다. 따라서 사전에 화합물의 약인성 간 손상 위험 평가가 필요하다. 안전성을 평가하기 위해 생체 내 (in vivo) 및 시험관 내 시험 방법(in vitro)이 사용되지만 이들은 시간과 비용이 많이 든다. 본 연구에서는 위의 문제를 극복하고자 random forest, light gradient boosting machine, logistic regression 모델을 제안한다. 모델은 입력으로 화합물의 분자 구조와 물리화학적 특징을 사용하고 출력으로 약인성 간 손상을 예측한다. 최적의 모델은 평가 지표에서 전반적으로 좋은 성능을 보인 random forest였다. 본 연구에서 제안된 모델은 신약 후보물질의 잠재적인 간 손상을 미리 파악함으로써 신약 개발 과정에 도움을 줄 수 있을 것으로 기대된다.