JOURNALS

2024

Sunyong Yoo; Myeonghyeon Jeong; Subhin Seomun; Kiseong Kim; Youngmahn Han
Interpretable Prediction of SARS-CoV-2 Epitope-specific TCR Recognition Using a Pre-Trained Protein Language Model Journal Article SCI (JCR10%)
In: IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 21, iss. 3, pp. 428-438, 2024, (Correspondence to Sunyong Yoo).

Abstract | Links | BibTeX | Dimensions | Tags: Amino acids, Attention mechanism, Bioinformatics, Coronaviruses, Deep learning, Immune system, Lymphocytes, Predictive models, Proteins, Transformer

@article{yoo2024interpretable,

title = {Interpretable Prediction of SARS-CoV-2 Epitope-specific TCR Recognition Using a Pre-Trained Protein Language Model},

author = {Sunyong Yoo and Myeonghyeon Jeong and Subhin Seomun and Kiseong Kim and Youngmahn Han},

url = {https://ieeexplore.ieee.org/abstract/document/10443062},

doi = {10.1109/TCBB.2024.3368046},

year  = {2024},

date = {2024-02-21},

urldate = {2024-02-21},

journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},

volume = {21},

issue = {3},

pages = {428-438},

publisher = {IEEE},

abstract = {The emergence of the novel coronavirus, designated as severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2), has posed a significant threat to public health worldwide. There has been progress in reducing hospitalizations and deaths due to SARS-CoV-2. However, challenges stem from the emergence of SARS-CoV-2 variants, which exhibit high transmission rates, increased disease severity, and the ability to evade humoral immunity. Epitope-specific T-cell receptor (TCR) recognition is key in determining the T-cell immunogenicity for SARS-CoV-2 epitopes. Although several data-driven methods for predicting epitope-specific TCR recognition have been proposed, they remain challenging due to the enormous diversity of TCRs and the lack of available training data. Self-supervised transfer learning has recently been proven useful for extracting information from unlabeled protein sequences, increasing the predictive performance of fine-tuned models, and using a relatively small amount of training data. This study presents a deep-learning model generated by fine-tuning pre-trained protein embeddings from a large corpus of protein sequences. The fine-tuned model showed markedly high predictive performance and outperformed the recent Gaussian process-based prediction model. The output attentions captured by the deep-learning model suggested critical amino acid positions in the SARS-CoV-2 epitope-specific TCRβ sequences that are highly associated with the viral escape of T-cell immune response.},

note = {Correspondence to Sunyong Yoo},

keywords = {Amino acids, Attention mechanism, Bioinformatics, Coronaviruses, Deep learning, Immune system, Lymphocytes, Predictive models, Proteins, Transformer},

pubstate = {published},

tppubtype = {article}

}