2024
이도현; 유선용
Abstract | Links | BibTeX | Tags: Cardiotoxicity, Machine learning
@conference{이도현2024기계학습,
title = {기계학습 기반 화합물의 심장독성 예측 연구},
author = {이도현 and 유선용},
url = {https://www.dbpia.co.kr/pdf/pdfView.do?nodeId=NODE11862000&googleIPSandBox=false&mark=0&minRead=5&ipRange=false&b2cLoginYN=false&icstClss=010000&isPDFSizeAllowed=true&accessgl=Y&language=ko_KR&hasTopBanner=true},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {한국정보과학회 학술발표논문집},
journal = {한국정보과학회 학술발표논문집},
pages = {825–827},
publisher = {한국정보과학회},
abstract = {인간 에테르-아-고-고 관련 유전자(hERG) 채널은 심장의 전기적 활동을 조절하는 데 중요한 역할을 한다. 이 채널을 차단하는 약물은 심각한 심장독성을 일으킬 수 있는데, 기존의 안전성 검사는 많은 시간과 비용을 요구한다는 단점이 있다. 이 문제를 해결하기 위해, 본 연구에서는 in silico 방법을 이용하여 hERG 차단제를 예측함으로써 심장독성을 파악하는 모델을 제안한다. 화합물의 구조적 정보를 파악하기 위해 ECFP(Extended Connectivity Fingerprint)를 사용하여 변환하였고. 물리화학적 특성 또한 추출하였고, 추출한 데이터를 기반으로 기계학습 모델을 구축하였다. 이 접근법은 심장독성을 유발할 수 있는 신약 후보 물질을 효과적으로 선별할 수 있게 한다. 결과적으로, 이 연구는 안전하고 효율적인 후보 물질의 발굴에 중요한 기여를 할 것으로 기대된다 },
keywords = {Cardiotoxicity, Machine learning},
pubstate = {published},
tppubtype = {conference}
}
2023
Jinmyung Jung; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Breast cancer, Feature importance, Gene expression, Machine learning, Metastasis marker
@article{jung2023identification,
title = {Identification of Breast Cancer Metastasis Markers from Gene Expression Profiles Using Machine Learning Approaches},
author = {Jinmyung Jung and Sunyong Yoo},
url = {https://www.mdpi.com/2073-4425/14/9/1820},
doi = {10.3390/genes14091820},
year = {2023},
date = {2023-09-20},
urldate = {2023-09-20},
journal = {Genes},
volume = {14},
number = {9},
pages = {1820},
publisher = {MDPI},
abstract = {Cancer metastasis accounts for approximately 90% of cancer deaths, and elucidating markers in metastasis is the first step in its prevention. To characterize metastasis marker genes (MGs) of breast cancer, XGBoost models that classify metastasis status were trained with gene expression profiles from TCGA. Then, a metastasis score (MS) was assigned to each gene by calculating the inner product between the feature importance and the AUC performance of the models. As a result, 54, 202, and 357 genes with the highest MS were characterized as MGs by empirical p-value cutoffs of 0.001, 0.005, and 0.01, respectively. The three sets of MGs were compared with those from existing metastasis marker databases, which provided significant results in most comparisons (p-value < 0.05). They were also significantly enriched in biological processes associated with breast cancer metastasis. The three MGs, SPPL2C, KRT23, and RGS7, showed highly significant results (p-value < 0.01) in the survival analysis. The MGs that could not be identified by statistical analysis (e.g., GOLM1, ELAVL1, UBP1, and AZGP1), as well as the MGs with the highest MS (e.g., ZNF676, FAM163B, LDOC2, IRF1, and STK40), were verified via the literature. Additionally, we checked how close the MGs were to each other in the protein–protein interaction networks. We expect that the characterized markers will help understand and prevent breast cancer metastasis.},
note = {Correspondence to Sunyong Yoo},
keywords = {Bioinformatics, Breast cancer, Feature importance, Gene expression, Machine learning, Metastasis marker},
pubstate = {published},
tppubtype = {article}
}
Myeonghyeon Jeong; Sunyong Yoo
Links | BibTeX | Tags: Machine learning
@conference{nokey,
title = {FetoML: Interpretable predictions of the fetotoxicity of drugs based on machine learning approaches},
author = {Myeonghyeon Jeong and Sunyong Yoo},
url = {https://dtmbio.net/},
year = {2023},
date = {2023-01-02},
urldate = {2023-01-02},
booktitle = {In 17th International Conference on Data and Text Mining in Biomedical Informatics},
pages = {20},
publisher = {DTMBIO},
keywords = {Machine learning},
pubstate = {published},
tppubtype = {conference}
}
Myeonghyeon Jeong; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Machine learning
@article{jeong2023predicting,
title = {Predicting the Fetotoxicity of Drugs Using Machine Learning},
author = {Myeonghyeon Jeong and Sunyong Yoo},
url = {https://koreascience.kr/article/JAKO202320150261638.page},
doi = {10.5352/JLS.2023.33.6.490},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Journal of Life Science},
volume = {33},
number = {6},
pages = {490–497},
publisher = {Korean Society of Life Science},
abstract = {Pregnant women may need to take medications to treat preexisting diseases or diseases that develop during pregnancy. However, some drugs may be fetotoxic and lead to, for example, teratogenicity and growth retardation. Predicting the fetotoxicity of drugs is thus important for the health of the mother and fetus. The fetotoxicity of many drugs has not been established because various challenges hinder the ability of researchers to determine their fetotoxicity. The need exists for in silico-based fetotoxicity assessment models, as they can modernize the testing paradigm, improve predictability, and reduce the use of animals and the costs of fetotoxicity testing. In this study, we collected data on the fetotoxicity of drugs and constructed fetotoxicity prediction models based on various machine learning algorithms. We optimized the models for more precise predictions by tuning the hyperparameters. We then performed quantitative performance evaluations. The results indicated that the constructed machine learning-based models had high performance (AUROC >0.85, AUPR >0.9) in fetotoxicity prediction. We also analyzed the feature importance of our model's predictions, which could be leveraged to identify the specific features of drugs that are strongly associated with fetotoxicity. The proposed model can be used to prescreen drugs and drug candidates at a lower cost and in less time. It provides a predictive score for fetotoxicity risk, which may be beneficial in the design of studies on fetotoxicity in human pregnancy.},
note = {Correspondence to Sunyong Yoo},
keywords = {Machine learning},
pubstate = {published},
tppubtype = {article}
}
이소연; 유선용
Abstract | Links | BibTeX | Dimensions | Tags: Hepatotoxicity, Machine learning
@article{이소연2023기계학습을,
title = {기계학습을 활용한 화합물의 약인성 간 손상 예측 방법 연구},
author = {이소연 and 유선용},
url = {https://www.dbpia.co.kr/pdf/pdfView.do?nodeId=NODE11519759&googleIPSandBox=false&mark=0&minRead=10&ipRange=false&b2cLoginYN=false&icstClss=010000&isPDFSizeAllowed=true&nodeHistoryTotalCnt=2&accessgl=Y&language=ko_KR&hasTopBanner=true},
doi = {10.5626/JOK.2023.50.9.777},
issn = {2383-6296},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {정보과학회논문지},
volume = {50},
number = {9},
pages = {777–783},
abstract = {약 약인성 간 손상은 임상시험용 의약품이 시장에 유통되는 것을 막는 요인 중 하나이다. 따라서 사전에 화합물의 약인성 간 손상 위험 평가가 필요하다. 안전성을 평가하기 위해 생체 내 (in vivo) 및 시험관 내 시험 방법(in vitro)이 사용되지만 이들은 시간과 비용이 많이 든다. 본 연구에서는 위의 문제를 극복하고자 random forest, light gradient boosting machine, logistic regression 모델을 제안한다. 모델은 입력으로 화합물의 분자 구조와 물리화학적 특징을 사용하고 출력으로 약인성 간 손상을 예측한다. 최적의 모델은 평가 지표에서 전반적으로 좋은 성능을 보인 random forest였다. 본 연구에서 제안된 모델은 신약 후보물질의 잠재적인 간 손상을 미리 파악함으로써 신약 개발 과정에 도움을 줄 수 있을 것으로 기대된다.},
note = {Correspondence to Sunyong Yoo},
keywords = {Hepatotoxicity, Machine learning},
pubstate = {published},
tppubtype = {article}
}
2022
Seonwoo Jung; Min-Keun Song; Eunjoo Lee; Sejin Bae; Yeon-Yong Kim; Doheon Lee; Myoung Jin Lee; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Atrial fibrillation, Attention mechanism, Deep learning, Machine learning, Medical informatics, National health insurance service, Stroke
@article{jung2022predicting,
title = {Predicting ischemic stroke in patients with atrial fibrillation using machine learning},
author = {Seonwoo Jung and Min-Keun Song and Eunjoo Lee and Sejin Bae and Yeon-Yong Kim and Doheon Lee and Myoung Jin Lee and Sunyong Yoo},
url = {https://www.imrpress.com/journal/FBL/27/3/10.31083/j.fbl2703080/htm?utm_source=TrendMD&utm_medium=cpc&utm_campaign=Frontiers_in_Bioscience-Landmark_TrendMD_1},
doi = {10.31083/j.fbl2703080},
year = {2022},
date = {2022-03-04},
urldate = {2022-03-04},
journal = {Frontiers in Bioscience-Landmark},
volume = {27},
number = {3},
pages = {80},
publisher = {IMR Press},
abstract = {Background
Atrial fibrillation (AF) is a well-known risk factor for stroke. Predicting the risk is important to prevent the first and secondary attacks of cerebrovascular diseases by determining early treatment. This study aimed to predict the ischemic stroke in AF patients based on the massive and complex Korean National Health Insurance (KNHIS) data through a machine learning approach.
Methods
We extracted 65-dimensional features, including demographics, health examination, and medical history information, of 754,949 patients with AF from KNHIS. Logistic regression was used to determine whether the extracted features had a statistically significant association with ischemic stroke occurrence. Then, we constructed the ischemic stroke prediction model using an attention-based deep neural network. The extracted features were used as input, and the occurrence of ischemic stroke after the diagnosis of AF was the output used to train the model.
Results We found 48 features significantly associated with ischemic stroke occurrence through regression analysis (p-value < 0.001). When the proposed deep learning model was applied to 150,989 AF patients, it was confirmed that the occurrence ischemic stroke was predicted to be higher AUROC (AUROC = 0.727 ± 0.003) compared to CHA2DS2-VASc score (AUROC = 0.651 ± 0.007) and other machine learning methods.
Conclusions
As part of preventive medicine, this study could help AF patients prepare for ischemic stroke prevention based on predicted stoke associated features and risk scores.},
note = {Correspondence to Sunyong Yoo},
keywords = {Atrial fibrillation, Attention mechanism, Deep learning, Machine learning, Medical informatics, National health insurance service, Stroke},
pubstate = {published},
tppubtype = {article}
}
Atrial fibrillation (AF) is a well-known risk factor for stroke. Predicting the risk is important to prevent the first and secondary attacks of cerebrovascular diseases by determining early treatment. This study aimed to predict the ischemic stroke in AF patients based on the massive and complex Korean National Health Insurance (KNHIS) data through a machine learning approach.
Methods
We extracted 65-dimensional features, including demographics, health examination, and medical history information, of 754,949 patients with AF from KNHIS. Logistic regression was used to determine whether the extracted features had a statistically significant association with ischemic stroke occurrence. Then, we constructed the ischemic stroke prediction model using an attention-based deep neural network. The extracted features were used as input, and the occurrence of ischemic stroke after the diagnosis of AF was the output used to train the model.
Results We found 48 features significantly associated with ischemic stroke occurrence through regression analysis (p-value < 0.001). When the proposed deep learning model was applied to 150,989 AF patients, it was confirmed that the occurrence ischemic stroke was predicted to be higher AUROC (AUROC = 0.727 ± 0.003) compared to CHA2DS2-VASc score (AUROC = 0.651 ± 0.007) and other machine learning methods.
Conclusions
As part of preventive medicine, this study could help AF patients prepare for ischemic stroke prevention based on predicted stoke associated features and risk scores.
2021
정선우; 이민지; 유선용
Abstract | Links | BibTeX | Dimensions | Tags: Machine learning, Medical informatics
@article{정선우2021공공빅데이터를,
title = {공공빅데이터를 활용한 기계학습 기반 뇌졸중 위험도 예측},
author = {정선우 and 이민지 and 유선용},
url = {https://kiss.kstudy.com/Detail/Ar?key=3863715},
doi = {10.12673/jant.2021.25.1.96},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {한국항행학회논문지},
volume = {25},
number = {1},
pages = {96–101},
publisher = {한국항행학회},
abstract = {본 논문은 빅데이터를 이용하여 심방세동 환자의 뇌졸중 발병을 예측하는 기계 학습 모델을 제시한다. 학습 데이터로는 국민 건강 보험공단에서 제공하는 대한민국 전수에 해당하는 심방세동 환자의 정보를 수집하였다. 수집된 정보는 인구사회학, 과거 병력, 건강검진을 포함한 68개 독립변수로 구성된다. 본 연구의 목표는 기존 심방세동 환자의 뇌졸중 위험도 예측에 사용되던 통계적 모델 (CHADS2, CHA2DS2-VASc)의 성능을 검증하고 기계 학습 모델을 적용하여 기존 모델보다 높은 정확도를 가지는 모델을 제시하는 것이다. 제안하는 모델의 정확도, AUROC (area under the receiver operating characteristic)를 검증한 결과 제안하는 기계 학습 기반의 모형이 심방세동 환자의 뇌졸중 위험도를 사용한 모델이 기존의 통계적 모델보다 높은 정확도, 민감도, 특이도를 가지는 것을 확인할 수 있었다.},
keywords = {Machine learning, Medical informatics},
pubstate = {published},
tppubtype = {article}
}
2018
Sunyong Yoo; Suhyun Ha; Moonshik Shin; Kyungrin Noh; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Database, Drugs, Ethnopharmacology, Machine learning
@article{yoo2018data,
title = {A data-driven approach for identifying medicinal combinations of natural products},
author = {Sunyong Yoo and Suhyun Ha and Moonshik Shin and Kyungrin Noh and Hojung Nam and Doheon Lee},
url = {https://ieeexplore.ieee.org/abstract/document/8482294},
doi = {10.1109/ACCESS.2018.2874089},
year = {2018},
date = {2018-10-05},
urldate = {2018-10-05},
journal = {IEEE Access},
volume = {6},
pages = {58106–58118},
publisher = {IEEE},
abstract = {Combinations of natural products have been used as important sources of disease treatments. Existing databases contain information about prescriptions, herbs, and compounds and their relationships with phenotypes, but they do not have information on the use of combinations of natural product compounds. In this paper, we identified large-scale associations between natural product combinations and phenotypes by applying an association rule mining technique to integrated information on herbal medicine, combination drugs, functional foods, molecular compounds, and target genes. The rationale behind this approach is that natural products commonly found in medicinal multicomponent mixtures have statistically significant associations with the therapeutic effects of the multicomponent mixtures. Based on a molecular network analysis and an external literature validation, we show that the inferred associations are valuable information for identifying medicinal combinations of natural products since they have statistically significant closeness proximity in the molecular layer and have much experimental evidence. All results are available through the workbench site at http://biosoft.kaist.ac.kr/coconut to facilitate the investigation of the medicinal use of natural products and their combinations.},
keywords = {Database, Drugs, Ethnopharmacology, Machine learning},
pubstate = {published},
tppubtype = {article}
}