@inproceedings{fbf0d89fd8d244c8942069f6819c7dd3,
title = "Altogether: Image Captioning via Re-aligning Alt-text",
abstract = "This paper focuses on creating synthetic data to improve the quality of image captions.Existing works typically have two shortcomings.First, they caption images from scratch, ignoring existing alt-text metadata, and second, lack transparency if the captioners' training data (e.g.GPT) is unknown.In this paper, we study a principled approach Altogether based on the key idea to edit and re-align existing alt-texts associated with the images.To generate training data, we perform human annotation where annotators start with the existing alt-text and realign it to the image content in multiple rounds, consequently constructing captions with rich visual concepts.This differs from prior work that carries out human annotation as a one-time description task solely based on images and annotator knowledge.We train a captioner on this data that generalizes the process of realigning alt-texts at scale.Our results show our Altogether approach leads to richer image captions that also improve text-to-image generation and zero-shot image classification tasks.",
author = "Hu Xu and Huang, \{Po Yao\} and Tan, \{Xiaoqing Ellen\} and Yeh, \{Ching Feng\} and Jacob Kahn and Christine Jou and Gargi Ghosh and Omer Levy and Luke Zettlemoyer and Yih, \{Wen Tau\} and Li, \{Shang Wen\} and Saining Xie and Christoph Feichtenhofer",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 2024 Conference on Empirical Methods in Natural Language Processing, EMNLP 2024 ; Conference date: 12-11-2024 Through 16-11-2024",
year = "2024",
doi = "10.18653/v1/2024.emnlp-main.1075",
language = "אנגלית",
series = "EMNLP 2024 - 2024 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "19302--19318",
editor = "Yaser Al-Onaizan and Mohit Bansal and Yun-Nung Chen",
booktitle = "EMNLP 2024 - 2024 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference",
address = "ארצות הברית",
}