@inproceedings{dc49bc3284fd4427a7a7f6cd9f274b17,
title = "Learning individual styles of conversational gesture",
abstract = "Human speech is often accompanied by hand and arm gestures. We present a method for cross-modal translation from 'in-the-wild' monologue speech of a single speaker to their conversational gesture motion. We train on unlabeled videos for which we only have noisy pseudo ground truth from an automatic pose detection system. Our proposed model significantly outperforms baseline methods in a quantitative comparison. To support research toward obtaining a computational understanding of the relationship between gesture and speech, we release a large video dataset of person-specific gestures.",
keywords = "And Body Pose, Datasets and Evaluation, Deep Learning, Face, Gesture, Image and Video Synthesis",
author = "Shiry Ginosar and Amir Bar and Gefen Kohavi and Caroline Chan and Andrew Owens and Jitendra Malik",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 32nd IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2019 ; Conference date: 16-06-2019 Through 20-06-2019",
year = "2019",
month = jun,
doi = "10.1109/CVPR.2019.00361",
language = "אנגלית",
series = "Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition",
publisher = "IEEE Computer Society",
pages = "3492--3501",
booktitle = "Proceedings - 2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2019",
address = "ארצות הברית",
}