PUBLICATIONS
2024
1.
Sunyong Yoo; Myeonghyeon Jeong; Subhin Seomun; Kiseong Kim; Youngmahn Han
Abstract | Links | BibTeX | Dimensions | Tags: Amino acids, Attention mechanism, Bioinformatics, Coronaviruses, Deep learning, Immune system, Lymphocytes, Predictive models, Proteins, Transformer
@article{yoo2024interpretable,
title = {Interpretable Prediction of SARS-CoV-2 Epitope-specific TCR Recognition Using a Pre-Trained Protein Language Model},
author = {Sunyong Yoo and Myeonghyeon Jeong and Subhin Seomun and Kiseong Kim and Youngmahn Han},
url = {https://ieeexplore.ieee.org/abstract/document/10443062},
doi = {10.1109/TCBB.2024.3368046},
year = {2024},
date = {2024-02-21},
urldate = {2024-02-21},
journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
volume = {21},
issue = {3},
pages = {428-438},
publisher = {IEEE},
abstract = {The emergence of the novel coronavirus, designated as severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2), has posed a significant threat to public health worldwide. There has been progress in reducing hospitalizations and deaths due to SARS-CoV-2. However, challenges stem from the emergence of SARS-CoV-2 variants, which exhibit high transmission rates, increased disease severity, and the ability to evade humoral immunity. Epitope-specific T-cell receptor (TCR) recognition is key in determining the T-cell immunogenicity for SARS-CoV-2 epitopes. Although several data-driven methods for predicting epitope-specific TCR recognition have been proposed, they remain challenging due to the enormous diversity of TCRs and the lack of available training data. Self-supervised transfer learning has recently been proven useful for extracting information from unlabeled protein sequences, increasing the predictive performance of fine-tuned models, and using a relatively small amount of training data. This study presents a deep-learning model generated by fine-tuning pre-trained protein embeddings from a large corpus of protein sequences. The fine-tuned model showed markedly high predictive performance and outperformed the recent Gaussian process-based prediction model. The output attentions captured by the deep-learning model suggested critical amino acid positions in the SARS-CoV-2 epitope-specific TCRβ sequences that are highly associated with the viral escape of T-cell immune response.},
note = {Correspondence to Sunyong Yoo},
keywords = {Amino acids, Attention mechanism, Bioinformatics, Coronaviruses, Deep learning, Immune system, Lymphocytes, Predictive models, Proteins, Transformer},
pubstate = {published},
tppubtype = {article}
}
The emergence of the novel coronavirus, designated as severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2), has posed a significant threat to public health worldwide. There has been progress in reducing hospitalizations and deaths due to SARS-CoV-2. However, challenges stem from the emergence of SARS-CoV-2 variants, which exhibit high transmission rates, increased disease severity, and the ability to evade humoral immunity. Epitope-specific T-cell receptor (TCR) recognition is key in determining the T-cell immunogenicity for SARS-CoV-2 epitopes. Although several data-driven methods for predicting epitope-specific TCR recognition have been proposed, they remain challenging due to the enormous diversity of TCRs and the lack of available training data. Self-supervised transfer learning has recently been proven useful for extracting information from unlabeled protein sequences, increasing the predictive performance of fine-tuned models, and using a relatively small amount of training data. This study presents a deep-learning model generated by fine-tuning pre-trained protein embeddings from a large corpus of protein sequences. The fine-tuned model showed markedly high predictive performance and outperformed the recent Gaussian process-based prediction model. The output attentions captured by the deep-learning model suggested critical amino acid positions in the SARS-CoV-2 epitope-specific TCRβ sequences that are highly associated with the viral escape of T-cell immune response.