2021
Jinmyung Jung; Yongdeuk Hwang; Hongryul Ahn; Sunjae Lee; Sunyong Yoo
Abstract | Links | BibTeX | Dimensions | Tags: Cancer therapeutics, Genetic interaction, Network analysis, Refining process
@article{jung2021precise,
title = {Precise Characterization of Genetic Interactions in Cancer via Molecular Network Refining Processes},
author = {Jinmyung Jung and Yongdeuk Hwang and Hongryul Ahn and Sunjae Lee and Sunyong Yoo},
url = {https://www.mdpi.com/1422-0067/22/20/11114},
doi = {10.3390/ijms222011114},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {International journal of molecular sciences},
volume = {22},
number = {20},
pages = {11114},
publisher = {MDPI},
abstract = {Genetic interactions (GIs), such as the synthetic lethal interaction, are promising therapeutic targets in precision medicine. However, despite extensive efforts to characterize GIs by large-scale perturbation screening, considerable false positives have been reported in multiple studies. We propose a new computational approach for improved precision in GI identification by applying constraints that consider actual biological phenomena. In this study, GIs were characterized by assessing mutation, loss of function, and expression profiles in the DEPMAP database. The expression profiles were used to exclude loss-of-function data for nonexpressed genes in GI characterization. More importantly, the characterized GIs were refined based on Kyoto Encyclopedia of Genes and Genomes (KEGG) or protein–protein interaction (PPI) networks, under the assumption that genes genetically interacting with a certain mutated gene are adjacent in the networks. As a result, the initial GIs characterized with CRISPR and RNAi screenings were refined to 65 and 23 GIs based on KEGG networks and to 183 and 142 GIs based on PPI networks. The evaluation of refined GIs showed improved precision with respect to known synthetic lethal interactions. The refining process also yielded a synthetic partner network (SPN) for each mutated gene, which provides insight into therapeutic strategies for the mutated genes; specifically, exploring the SPN of mutated BRAF revealed ELAVL1 as a potential target for treating BRAF-mutated cancer, as validated by previous research. We expect that this work will advance cancer therapeutic research.},
keywords = {Cancer therapeutics, Genetic interaction, Network analysis, Refining process},
pubstate = {published},
tppubtype = {article}
}
2020
Junseok Park; Seongkuk Park; Kwangmin Kim; Woochang Hwang; Sunyong Yoo; Gwan-su Yi; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Clinical trial, Medical informatics
@article{park2020interactive,
title = {An interactive retrieval system for clinical trial studies with context-dependent protocol elements},
author = {Junseok Park and Seongkuk Park and Kwangmin Kim and Woochang Hwang and Sunyong Yoo and Gwan-su Yi and Doheon Lee},
url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0238290},
doi = {10.1371/journal.pone.0238290},
year = {2020},
date = {2020-09-18},
urldate = {2020-09-18},
journal = {PloS one},
volume = {15},
number = {9},
pages = {e0238290},
publisher = {Public Library of Science San Francisco, CA USA},
abstract = {A well-defined protocol for a clinical trial guarantees a successful outcome report. When designing the protocol, most researchers refer to electronic databases and extract protocol elements using a keyword search. However, state-of-the-art database systems only offer text-based searches for user-entered keywords. In this study, we present a database system with a context-dependent and protocol-element-selection function for successfully designing a clinical trial protocol. To do this, we first introduce a database for a protocol retrieval system constructed from individual protocol data extracted from 184,634 clinical trials and 13,210 frame structures of clinical trial protocols. The database contains a variety of semantic information that allows the filtering of protocols during the search operation. Based on the database, we developed a web application called the clinical trial protocol database system (CLIPS; available at https://corus.kaist.edu/clips). This system enables an interactive search by utilizing protocol elements. To enable an interactive search for combinations of protocol elements, CLIPS provides optional next element selection according to the previous element in the form of a connected tree. The validation results show that our method achieves better performance than that of existing databases in predicting phenotypic features.},
keywords = {Clinical trial, Medical informatics},
pubstate = {published},
tppubtype = {article}
}
Junseok Park; Seongkuk Park; Gwangmin Kim; Kwangmin Kim; Jaegyun Jung; Sunyong Yoo; Gwan-Su Yi; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Clinical trial, Medical informatics
@article{park2020reliable,
title = {Reliable data collection in participatory trials to assess digital healthcare applications},
author = {Junseok Park and Seongkuk Park and Gwangmin Kim and Kwangmin Kim and Jaegyun Jung and Sunyong Yoo and Gwan-Su Yi and Doheon Lee},
url = {https://ieeexplore.ieee.org/abstract/document/9054970},
doi = {10.1109/ACCESS.2020.2985122},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {IEEE Access},
volume = {8},
pages = {79472–79490},
publisher = {IEEE},
abstract = {The number of digital healthcare mobile applications in the market is exponentially increasing owing to the development of mobile networks and widespread usage of smartphones. However, only few of these applications have been adequately validated. Like many mobile applications, in general, the use of healthcare applications is considered safe; thus, developers and end users can easily exchange them in the marketplace. However, existing platforms are unsuitable for collecting reliable data for evaluating the effectiveness of the applications. Moreover, these platforms reflect only the perspectives of developers and experts, and not of end users. For instance, typical clinical trial data collection methods are not appropriate for participant-driven assessment of healthcare applications because of their complexity and high cost. Thus, we identified the need for a participant-driven data collection platform for end users that is interpretable, systematic, and sustainable, as a first step to validate the effectiveness of the applications. To collect reliable data in the participatory trial format, we defined distinct stages for data preparation, storage, and sharing. The interpretable data preparation consists of a protocol database system and semantic feature retrieval method that allow a person without professional knowledge to create a protocol. The systematic data storage stage includes calculation of the collected data reliability weight. For sustainable data collection, we integrated a weight method and a future reward distribution function. We validated the methods through statistical tests involving 718 human participants. The results of a validation experiment demonstrate that the compared methods differ significantly and prove that the choice of an appropriate method is essential for reliable data collection, to facilitate effectiveness validation of digital healthcare applications. Furthermore, we created a Web-based system for our pilot platform to collect reliable data in an integrated pipeline. We compared the platform features using existing clinical and pragmatic trial data collection platforms.},
keywords = {Clinical trial, Medical informatics},
pubstate = {published},
tppubtype = {article}
}
Sunyong Yoo; Hyung Chae Yang; Seongyeong Lee; Jaewook Shin; Seyoung Min; Eunjoo Lee; Minkeun Song; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Chemical property, Deep learning, Molecular interaction, Natural product, Network analysis, Text mining
@article{10.3389/fphar.2020.584875,
title = {A Deep Learning-Based Approach for Identifying the Medicinal Uses of Plant-Derived Natural Compounds},
author = {Sunyong Yoo and Hyung Chae Yang and Seongyeong Lee and Jaewook Shin and Seyoung Min and Eunjoo Lee and Minkeun Song and Doheon Lee},
url = {https://www.frontiersin.org/journals/pharmacology/articles/10.3389/fphar.2020.584875},
doi = {10.3389/fphar.2020.584875},
issn = {1663-9812},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Frontiers in Pharmacology},
volume = {11},
pages = {584875},
abstract = {Medicinal plants and their extracts have been used as important sources for drug discovery. In particular, plant-derived natural compounds, including phytochemicals, antioxidants, vitamins, and minerals, are gaining attention as they promote health and prevent disease. Although several in vitro methods have been developed to confirm the biological activities of natural compounds, there is still considerable room to reduce time and cost. To overcome these limitations, several in silico methods have been proposed for conducting large-scale analysis, but they are still limited in terms of dealing with incomplete and heterogeneous natural compound data. Here, we propose a deep learning-based approach to identify the medicinal uses of natural compounds by exploiting massive and heterogeneous drug and natural compound data. The rationale behind this approach is that deep learning can effectively utilize heterogeneous features to alleviate incomplete information. Based on latent knowledge, molecular interactions, and chemical property features, we generated 686 dimensional features for 4,507 natural compounds and 2,882 approved and investigational drugs. The deep learning model was trained using the generated features and verified drug indication information. When the features of natural compounds were applied as input to the trained model, potential efficacies were successfully predicted with high accuracy, sensitivity, and specificity.},
keywords = {Bioinformatics, Chemical property, Deep learning, Molecular interaction, Natural product, Network analysis, Text mining},
pubstate = {published},
tppubtype = {article}
}
2018
Sunyong Yoo; Suhyun Ha; Moonshik Shin; Kyungrin Noh; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Database, Drugs, Ethnopharmacology, Machine learning
@article{yoo2018data,
title = {A data-driven approach for identifying medicinal combinations of natural products},
author = {Sunyong Yoo and Suhyun Ha and Moonshik Shin and Kyungrin Noh and Hojung Nam and Doheon Lee},
url = {https://ieeexplore.ieee.org/abstract/document/8482294},
doi = {10.1109/ACCESS.2018.2874089},
year = {2018},
date = {2018-10-05},
urldate = {2018-10-05},
journal = {IEEE Access},
volume = {6},
pages = {58106–58118},
publisher = {IEEE},
abstract = {Combinations of natural products have been used as important sources of disease treatments. Existing databases contain information about prescriptions, herbs, and compounds and their relationships with phenotypes, but they do not have information on the use of combinations of natural product compounds. In this paper, we identified large-scale associations between natural product combinations and phenotypes by applying an association rule mining technique to integrated information on herbal medicine, combination drugs, functional foods, molecular compounds, and target genes. The rationale behind this approach is that natural products commonly found in medicinal multicomponent mixtures have statistically significant associations with the therapeutic effects of the multicomponent mixtures. Based on a molecular network analysis and an external literature validation, we show that the inferred associations are valuable information for identifying medicinal combinations of natural products since they have statistically significant closeness proximity in the molecular layer and have much experimental evidence. All results are available through the workbench site at http://biosoft.kaist.ac.kr/coconut to facilitate the investigation of the medicinal use of natural products and their combinations.},
keywords = {Database, Drugs, Ethnopharmacology, Machine learning},
pubstate = {published},
tppubtype = {article}
}
Sunyong Yoo; Kwansoo Kim; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Chemical property, Ethnopharmacology, Herbal medicine, Molecular analysis, Network analysis, Phytochemical
@article{yoo2018discovering,
title = {Discovering health benefits of phytochemicals with integrated analysis of the molecular network, chemical properties and ethnopharmacological evidence},
author = {Sunyong Yoo and Kwansoo Kim and Hojung Nam and Doheon Lee},
url = {https://www.mdpi.com/2072-6643/10/8/1042},
doi = {10.3390/nu10081042},
year = {2018},
date = {2018-08-08},
urldate = {2018-08-08},
journal = {Nutrients},
volume = {10},
number = {8},
pages = {1042},
publisher = {MDPI},
abstract = {Identifying the health benefits of phytochemicals is an essential step in drug and functional food development. While many in vitro screening methods have been developed to identify the health effects of phytochemicals, there is still room for improvement because of high cost and low productivity. Therefore, researchers have alternatively proposed in silico methods, primarily based on three types of approaches; utilizing molecular, chemical or ethnopharmacological information. Although each approach has its own strength in analyzing the characteristics of phytochemicals, previous studies have not considered them all together. Here, we apply an integrated in silico analysis to identify the potential health benefits of phytochemicals based on molecular analysis and chemical properties as well as ethnopharmacological evidence. From the molecular analysis, we found an average of 415.6 health effects for 591 phytochemicals. We further investigated ethnopharmacological evidence of phytochemicals and found that on average 129.1 (31%) of the predicted health effects had ethnopharmacological evidence. Lastly, we investigated chemical properties to confirm whether they are orally bio-available, drug available or effective on certain tissues. The evaluation results indicate that the health effects can be predicted more accurately by cooperatively considering the molecular analysis, chemical properties and ethnopharmacological evidence.},
keywords = {Bioinformatics, Chemical property, Ethnopharmacology, Herbal medicine, Molecular analysis, Network analysis, Phytochemical},
pubstate = {published},
tppubtype = {article}
}
Sunyong Yoo; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Ethnopharmacology, Natural product, Network analysis
@article{yoo2018phenotype,
title = {Phenotype-oriented network analysis for discovering pharmacological effects of natural compounds},
author = {Sunyong Yoo and Hojung Nam and Doheon Lee},
url = {https://www.nature.com/articles/s41598-018-30138-w},
doi = {10.1038/s41598-018-30138-w},
year = {2018},
date = {2018-08-03},
urldate = {2018-08-03},
journal = {Scientific Reports},
volume = {8},
number = {1},
pages = {11667},
publisher = {Nature Publishing Group UK London},
abstract = {Although natural compounds have provided a wealth of leads and clues in drug development, the process of identifying their pharmacological effects is still a challenging task. Over the last decade, many in vitro screening methods have been developed to identify the pharmacological effects of natural compounds, but they are still costly processes with low productivity. Therefore, in silico methods, primarily based on molecular information, have been proposed. However, large-scale analysis is rarely considered, since many natural compounds do not have molecular structure and target protein information. Empirical knowledge of medicinal plants can be used as a key resource to solve the problem, but this information is not fully exploited and is used only as a preliminary tool for selecting plants for specific diseases. Here, we introduce a novel method to identify pharmacological effects of natural compounds from herbal medicine based on phenotype-oriented network analysis. In this study, medicinal plants with similar efficacy were clustered by investigating hierarchical relationships between the known efficacy of plants and 5,021 phenotypes in the phenotypic network. We then discovered significantly enriched natural compounds in each plant cluster and mapped the averaged pharmacological effects of the plant cluster to the natural compounds. This approach allows us to predict unexpected effects of natural compounds that have not been found by molecular analysis. When applied to verified medicinal compounds, our method successfully identified their pharmacological effects with high specificity and sensitivity.},
keywords = {Ethnopharmacology, Natural product, Network analysis},
pubstate = {published},
tppubtype = {article}
}
Kyungrin Noh; Sunyong Yoo; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Medicinal Compound, Metabolite, Natural product
@article{noh2018systematic,
title = {A systematic approach to identify therapeutic effects of natural products based on human metabolite information},
author = {Kyungrin Noh and Sunyong Yoo and Doheon Lee},
url = {https://link.springer.com/article/10.1186/s12859-018-2196-0},
doi = {10.1186/s12859-018-2196-0},
year = {2018},
date = {2018-06-13},
urldate = {2018-06-13},
journal = {BMC bioinformatics},
volume = {19},
number = {205},
pages = {49–55},
publisher = {Springer},
abstract = {Background
Natural products have been widely investigated in the drug development field. Their traditional use cases as medicinal agents and their resemblance of our endogenous compounds show the possibility of new drug development. Many researchers have focused on identifying therapeutic effects of natural products, yet the resemblance of natural products and human metabolites has been rarely touched.
Methods
We propose a novel method which predicts therapeutic effects of natural products based on their similarity with human metabolites. In this study, we compare the structure, target and phenotype similarities between natural products and human metabolites to capture molecular and phenotypic properties of both compounds. With the generated similarity features, we train support vector machine model to identify similar natural product and human metabolite pairs. The known functions of human metabolites are then mapped to the paired natural products to predict their therapeutic effects.
Results
With our selected three feature sets, structure, target and phenotype similarities, our trained model successfully paired similar natural products and human metabolites. When applied to the natural product derived drugs, we could successfully identify their indications with high specificity and sensitivity. We further validated the found therapeutic effects of natural products with the literature evidence.
Conclusions
These results suggest that our model can match natural products to similar human metabolites and provide possible therapeutic effects of natural products. By utilizing the similar human metabolite information, we expect to find new indications of natural products which could not be covered by previous in silico methods.},
keywords = {Bioinformatics, Medicinal Compound, Metabolite, Natural product},
pubstate = {published},
tppubtype = {article}
}
Natural products have been widely investigated in the drug development field. Their traditional use cases as medicinal agents and their resemblance of our endogenous compounds show the possibility of new drug development. Many researchers have focused on identifying therapeutic effects of natural products, yet the resemblance of natural products and human metabolites has been rarely touched.
Methods
We propose a novel method which predicts therapeutic effects of natural products based on their similarity with human metabolites. In this study, we compare the structure, target and phenotype similarities between natural products and human metabolites to capture molecular and phenotypic properties of both compounds. With the generated similarity features, we train support vector machine model to identify similar natural product and human metabolite pairs. The known functions of human metabolites are then mapped to the paired natural products to predict their therapeutic effects.
Results
With our selected three feature sets, structure, target and phenotype similarities, our trained model successfully paired similar natural products and human metabolites. When applied to the natural product derived drugs, we could successfully identify their indications with high specificity and sensitivity. We further validated the found therapeutic effects of natural products with the literature evidence.
Conclusions
These results suggest that our model can match natural products to similar human metabolites and provide possible therapeutic effects of natural products. By utilizing the similar human metabolite information, we expect to find new indications of natural products which could not be covered by previous in silico methods.
Sunyong Yoo; Kyungrin Noh; Moonshik Shin; Junseok Park; Kwang-Hyung Lee; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: ADR, Bioinformatics, Drugs, Network analysis
@article{yoo2018silico,
title = {In silico profiling of systemic effects of drugs to predict unexpected interactions},
author = {Sunyong Yoo and Kyungrin Noh and Moonshik Shin and Junseok Park and Kwang-Hyung Lee and Hojung Nam and Doheon Lee},
url = {https://www.nature.com/articles/s41598-018-19614-5},
doi = {10.1038/s41598-018-19614-5},
year = {2018},
date = {2018-01-25},
urldate = {2018-01-25},
journal = {Scientific Reports},
volume = {8},
number = {1},
pages = {1612},
publisher = {Nature Publishing Group UK London},
abstract = {Identifying unexpected drug interactions is an essential step in drug development. Most studies focus on predicting whether a drug pair interacts or is effective on a certain disease without considering the mechanism of action (MoA). Here, we introduce a novel method to infer effects and interactions of drug pairs with MoA based on the profiling of systemic effects of drugs. By investigating propagated drug effects from the molecular and phenotypic networks, we constructed profiles of 5,441 approved and investigational drugs for 3,833 phenotypes. Our analysis indicates that highly connected phenotypes between drug profiles represent the potential effects of drug pairs and the drug pairs with strong potential effects are more likely to interact. When applied to drug interactions with verified effects, both therapeutic and adverse effects have been successfully identified with high specificity and sensitivity. Finally, tracing drug interactions in molecular and phenotypic networks allows us to understand the MoA.},
keywords = {ADR, Bioinformatics, Drugs, Network analysis},
pubstate = {published},
tppubtype = {article}
}
2016
Jongsoo Keum; Sunyong Yoo; Doheon Lee; Hojung Nam
Abstract | Links | BibTeX | Dimensions | Tags: Bioinformatics, Database, Herbal medicine, Target proteins
@article{keum2016prediction,
title = {Prediction of compound-target interactions of natural products using large-scale drug and protein information},
author = {Jongsoo Keum and Sunyong Yoo and Doheon Lee and Hojung Nam},
url = {https://link.springer.com/article/10.1186/s12859-016-1081-y},
doi = {10.1186/s12859-016-1081-y},
year = {2016},
date = {2016-07-28},
urldate = {2016-07-28},
journal = {BMC bioinformatics},
volume = {17},
number = {219},
pages = {417–425},
publisher = {Springer},
abstract = {Background
Verifying the proteins that are targeted by compounds of natural herbs will be helpful to select natural herb-based drug candidates. However, this entails a great deal of effort to clarify the interaction throughout in vitro or in vivo experiments. In this light, in silico prediction of the interactions between compounds and target proteins can help ease the efforts.
Results
In this study, we performed in silico predictions of herbal compound target identification. First, data related to compounds, target proteins, and interactions between them are taken from the DrugBank database. Then we characterized six classes of compound-target interaction in humans including G-protein-coupled receptors (GPCRs), ion channel, enzymes, receptors, transporters, and other proteins. Also, classification-prediction models that predict the interactions between compounds and target proteins through a machine learning method were constructed using these matrices. As a result, AUC values of six classes are 0.94, 0.93, 0.90, 0.89, 0.91, and 0.76 respectively. Finally, the interactions of compounds from natural products were predicted using the constructed classification models. Furthermore, from our predicted results, we confirmed that several important disease related proteins were predicted as targets of natural herbal compounds.
Conclusions
We constructed classification-prediction models that predict the interactions between compounds and target proteins. The constructed models showed good prediction performances, and numbers of potential natural compounds target proteins were predicted from our results.},
keywords = {Bioinformatics, Database, Herbal medicine, Target proteins},
pubstate = {published},
tppubtype = {article}
}
Verifying the proteins that are targeted by compounds of natural herbs will be helpful to select natural herb-based drug candidates. However, this entails a great deal of effort to clarify the interaction throughout in vitro or in vivo experiments. In this light, in silico prediction of the interactions between compounds and target proteins can help ease the efforts.
Results
In this study, we performed in silico predictions of herbal compound target identification. First, data related to compounds, target proteins, and interactions between them are taken from the DrugBank database. Then we characterized six classes of compound-target interaction in humans including G-protein-coupled receptors (GPCRs), ion channel, enzymes, receptors, transporters, and other proteins. Also, classification-prediction models that predict the interactions between compounds and target proteins through a machine learning method were constructed using these matrices. As a result, AUC values of six classes are 0.94, 0.93, 0.90, 0.89, 0.91, and 0.76 respectively. Finally, the interactions of compounds from natural products were predicted using the constructed classification models. Furthermore, from our predicted results, we confirmed that several important disease related proteins were predicted as targets of natural herbal compounds.
Conclusions
We constructed classification-prediction models that predict the interactions between compounds and target proteins. The constructed models showed good prediction performances, and numbers of potential natural compounds target proteins were predicted from our results.
2015
Moonshik Shin; Sungyoung Yoo; Suhyun Ha; Kyungrin Noh; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: ADME, Bioinformatics, Natural product
@conference{shin2015identifying,
title = {Identifying Potential Bioactive Compounds of Natural Products by Combining ADMET Prediction Methods},
author = {Moonshik Shin and Sungyoung Yoo and Suhyun Ha and Kyungrin Noh and Doheon Lee},
url = {https://dl.acm.org/doi/abs/10.1145/2811163.2811168},
doi = {10.1145/2811163.2811168},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {Proceedings of the ACM Ninth International Workshop on Data and Text Mining in Biomedical Informatics},
pages = {19–19},
publisher = {CIKM},
abstract = {Herbs consist of various chemical compounds. Thus, identifying potential bioactive compounds from those diversity is an important task for studies in the herb, food and natural products. Even though various computational approaches are developed for predicting bioactive compounds, the prediction performances are diverse due to different methods and dataset. Therefore, there is urgent demand for an approach that connotes the previous methods and identify potential bioactive compounds with high accuracy. To meet the demand, we proposed a filtering strategy that identifies potential bioactive compounds by combining previously developed computational methods which predict ADMET, such as Human Intestinal Absorption (HIA) and Caco-2 permeability. Our approach was evaluated on 930 compounds that are known as bioactive compounds, which were extracted from literature, DrugBank and Dr. Dukes phytochemical databases. By applying our filtering strategy, 97.5% of the known bioactive compounds were correctly predicted as bioactive. We examined whether our approach can distinguish the potential bioactive compound from the non-potential bioactive compounds with Fishers' exact test, and a reasonable p-value (3.806 x 10-9) was derived. For the next step, we are planning to develop a machine-learning based method to improve our filtering approach.},
keywords = {ADME, Bioinformatics, Natural product},
pubstate = {published},
tppubtype = {conference}
}
2014
Suhyun Ha; Sunyong Yoo; Moonshik Shin; Jin Sook Kwak; Oran Kwon; Min Chang Choi; Keon Wook Kang; Hojung Nam; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags: Ethnopharmacology, Natural product
@conference{ha2014integrative,
title = {Integrative Database for Exploring Compound Combinations of Natural Products for Medical Effects},
author = {Suhyun Ha and Sunyong Yoo and Moonshik Shin and Jin Sook Kwak and Oran Kwon and Min Chang Choi and Keon Wook Kang and Hojung Nam and Doheon Lee},
url = {https://dl.acm.org/doi/abs/10.1145/2665970.2665986},
doi = {10.1145/2665970.2665986},
year = {2014},
date = {2014-01-01},
urldate = {2014-01-01},
booktitle = {Proceedings of the ACM 8th International Workshop on Data and Text Mining in Bioinformatics},
pages = {41–41},
publisher = {CIKM},
abstract = {Natural products used in dietary supplements, complementary and alternative medicine (CAM) and conventional medicine are composites of multiple chemical compounds. These chemical compounds potentially offer an extensive source for drug discovery with accumulated knowledge of efficacy and safety. However, existing natural product related databases have drawbacks in both standardization and structuralization of information. Therefore, in this work, we construct an integrated database of natural products by mapping the prescription, herb, compound, and phenotype information to international identifiers and structuralizing the efficacy information through database integration and text-mining methods. We expect that the constructed database could serve as a fundamental resource for the natural products research.},
keywords = {Ethnopharmacology, Natural product},
pubstate = {published},
tppubtype = {conference}
}
2012
Moonshik Shin; Sunyong Yoo; Kwang H Lee; Doheon Lee
Abstract | Links | BibTeX | Dimensions | Tags:
@conference{shin2012electronic,
title = {Electronic medical records privacy preservation through k-anonymity clustering method},
author = {Moonshik Shin and Sunyong Yoo and Kwang H Lee and Doheon Lee},
url = {https://ieeexplore.ieee.org/abstract/document/6505046},
doi = {10.1109/SCIS-ISIS.2012.6505046},
year = {2012},
date = {2012-01-01},
urldate = {2012-01-01},
booktitle = {The 6th International Conference on Soft Computing and Intelligent Systems, and The 13th International Symposium on Advanced Intelligence Systems},
pages = {1119–1124},
publisher = {IEEE},
organization = {IEEE},
abstract = {Electronic Medical Records (EMRs) enable the sharing of patient medical data whenever it is needed and also are used as a tool for building new medical technology and patient recommendation systems. Since EMRs include patients' private data, access is restricted to researchers. Thus, an anonymizing technique is necessary that keeps patients' private data safe while not damaging useful medical information. k-member clustering anonymization approaches k-anonymization as a clustering issue. The objective of the k-member clustering problem is to gather records that will minimize the data distortion during data generalization. Most of the previous clustering techniques include random seed selection. However, randomly selecting a cluster seed will provide inconsistent performance. The authors propose a k-member cluster seed selection algorithm (KMCSSA) that is distinct from the previous clustering methods. Instead of randomly selecting a cluster seed, the proposed method selects the seed based on the closeness centrality to provide consistent information loss (IL) and to reduce the information distortion. An adult database from University of California Irvine Machine Learning Repository was used for the experiment. By comparing the proposed method with two previous methods, the experimental results shows that KMCSSA provides superior performance with respect to information loss. The authors provide a privacy protection algorithm that derives consistent information loss and reduces the overall information distortion.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Sunyong Yoo; Moonshik Shin; Doheon Lee; others
Abstract | Links | BibTeX | Dimensions | Tags: k-anonymity, l-diversity, Medical informatics
@article{yoo2012approach,
title = {An approach to reducing information loss and achieving diversity of sensitive attributes in k-anonymity methods},
author = {Sunyong Yoo and Moonshik Shin and Doheon Lee and others},
url = {https://www.i-jmr.org/2012/2/e14},
doi = {10.2196/ijmr.2140},
year = {2012},
date = {2012-01-01},
urldate = {2012-01-01},
journal = {Interactive Journal of Medical Research},
volume = {1},
number = {2},
pages = {e2140},
publisher = {JMIR Publications Inc., Toronto, Canada},
abstract = {Electronic Health Records (EHRs) enable the sharing of patients’ medical data. Since EHRs include patients’ private data, access by researchers is restricted. Therefore k-anonymity is necessary to keep patients’ private data safe without damaging useful medical information. However, k-anonymity cannot prevent sensitive attribute disclosure. An alternative, l-diversity, has been proposed as a solution to this problem and is defined as: each Q-block (ie, each set of rows corresponding to the same value for identifiers) contains at least l well-represented values for each sensitive attribute. While l-diversity protects against sensitive attribute disclosure, it is limited in that it focuses only on diversifying sensitive attributes.
The aim of the study is to develop a k-anonymity method that not only minimizes information loss but also achieves diversity of the sensitive attribute.
This paper proposes a new privacy protection method that uses conditional entropy and mutual information. This method considers both information loss as well as diversity of sensitive attributes. Conditional entropy can measure the information loss by generalization, and mutual information is used to achieve the diversity of sensitive attributes. This method can offer appropriate Q-blocks for generalization.
We used the adult database from the UCI Machine Learning Repository and found that the proposed method can greatly reduce information loss compared with a recent l-diversity study. It can also achieve the diversity of sensitive attributes by counting the number of Q-blocks that have leaks of diversity.
This study provides a privacy protection method that can improve data utility and protect against sensitive attribute disclosure. The method is viable and should be of interest for further privacy protection in EHR applications.},
keywords = {k-anonymity, l-diversity, Medical informatics},
pubstate = {published},
tppubtype = {article}
}
The aim of the study is to develop a k-anonymity method that not only minimizes information loss but also achieves diversity of the sensitive attribute.
This paper proposes a new privacy protection method that uses conditional entropy and mutual information. This method considers both information loss as well as diversity of sensitive attributes. Conditional entropy can measure the information loss by generalization, and mutual information is used to achieve the diversity of sensitive attributes. This method can offer appropriate Q-blocks for generalization.
We used the adult database from the UCI Machine Learning Repository and found that the proposed method can greatly reduce information loss compared with a recent l-diversity study. It can also achieve the diversity of sensitive attributes by counting the number of Q-blocks that have leaks of diversity.
This study provides a privacy protection method that can improve data utility and protect against sensitive attribute disclosure. The method is viable and should be of interest for further privacy protection in EHR applications.