JOURNALS
2025
1.
Md Sanzid Bin Hossain; Hwan Choi; Zhishan Guo; Sunyong Yoo; Min-Keun Song; Hyunjun Shin; Dexter Hadley
Abstract | Links | BibTeX | Dimensions | Tags: Systems biology
@article{Hossain2025,
title = {Knowledge transfer-driven estimation of knee moments and ground reaction forces from smartphone videos via temporal-spatial modeling of augmented joint kinematics},
author = {Md Sanzid Bin Hossain and Hwan Choi and Zhishan Guo and Sunyong Yoo and Min-Keun Song and Hyunjun Shin and Dexter Hadley},
url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0335257},
doi = {10.1371/journal.pone.0335257},
issn = {1932-6203},
year = {2025},
date = {2025-11-07},
urldate = {2025-11-07},
journal = {PLOS One},
volume = {20},
number = {11},
pages = {e0335257},
abstract = {The knee adduction and flexion moment provides critical information about knee joint health, while 3D ground reaction forces (GRFs) help identify force and energy characteristics for maneuvering the entire human body. Existing methods of acquiring joint moments and GRFs require expensive equipment, time-consuming pre-processing, and limited accessibility. This study proposes to tackle these limitations by utilizing only smartphone videos to estimate joint moments and 3D GRFs accurately. We also propose the augmentation of joint kinematics by generating additional modalities of 2D joint center velocity and acceleration from 2D joint center position acquired from the videos. This augmented joint kinematics helps to apply a multi-modal fusion module to learn the importance of inter-modal interactions. Additionally, we utilize recurrent neural networks and graph convolutional networks to perform temporal-spatial modeling of joint center dynamics for enhanced accuracy. To overcome another challenge of video-based estimation, particularly the lack of inertial information related to body segments, we propose multi-modal knowledge transfer to train the video-only student model from a teacher model that integrates both video and inertial measurement unit (IMU) data. The student model significantly reduces the normalized root mean square error (NRMSE) from 5.71 to 4.68 and increases the Pearson correlation coefficient (PCC) from 0.929 to 0.951. These results demonstrate that knowledge transfer, augmentation of joint kinematics for multi-modal fusion, and temporal-spatial modeling significantly enhance smartphone video-based estimation, offering a potential cost-effective alternative to traditional motion capture for clinical assessments, rehabilitation, and sports applications.},
keywords = {Systems biology},
pubstate = {published},
tppubtype = {article}
}
The knee adduction and flexion moment provides critical information about knee joint health, while 3D ground reaction forces (GRFs) help identify force and energy characteristics for maneuvering the entire human body. Existing methods of acquiring joint moments and GRFs require expensive equipment, time-consuming pre-processing, and limited accessibility. This study proposes to tackle these limitations by utilizing only smartphone videos to estimate joint moments and 3D GRFs accurately. We also propose the augmentation of joint kinematics by generating additional modalities of 2D joint center velocity and acceleration from 2D joint center position acquired from the videos. This augmented joint kinematics helps to apply a multi-modal fusion module to learn the importance of inter-modal interactions. Additionally, we utilize recurrent neural networks and graph convolutional networks to perform temporal-spatial modeling of joint center dynamics for enhanced accuracy. To overcome another challenge of video-based estimation, particularly the lack of inertial information related to body segments, we propose multi-modal knowledge transfer to train the video-only student model from a teacher model that integrates both video and inertial measurement unit (IMU) data. The student model significantly reduces the normalized root mean square error (NRMSE) from 5.71 to 4.68 and increases the Pearson correlation coefficient (PCC) from 0.929 to 0.951. These results demonstrate that knowledge transfer, augmentation of joint kinematics for multi-modal fusion, and temporal-spatial modeling significantly enhance smartphone video-based estimation, offering a potential cost-effective alternative to traditional motion capture for clinical assessments, rehabilitation, and sports applications.