@inproceedings{a6976f98dbe54938a0bb19432e5b1665,
title = "Saliency Based Data Augmentation for Few-Shot Video Action Recognition",
abstract = "Despite the progress made in few-shot video action recognition, existing methods still struggle to achieve satisfactory performance when support samples are limited (e.g., 1-shot task). This paper proposes to augment training samples without relying on additional supervision and labor costs, aiming at improving generalizability of learned representations. We introduce a novel self-supervised salient object detection model which results in frame-level saliency and background features of videos. A shared encoder is employed to fuse saliency and background information from different videos. Both intra- and inter-class fusion are performed, in which the latter is controlled by prior probability to avoid semantic ambiguities. This way actually corresponds to augment training data in feature space. The saliency-background representations formed from query and support videos are used to construct class prototypes through Temporal-Relational CrossTransformers. Experimental results on four standard benchmarks demonstrate that the proposed method outperforms state-of-the-arts under various few-shot settings, particularly excelling in the 1-shot case.",
keywords = "Action recognition, Data augmentation, Few-shot learning, Saliency",
author = "Yongqiang Kong and Yunhong Wang and Annan Li",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.; 31st International Conference on Multimedia Modeling, MMM 2025 ; Conference date: 08-01-2025 Through 10-01-2025",
year = "2025",
doi = "10.1007/978-981-96-2064-7\_27",
language = "英语",
isbn = "9789819620630",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "367--380",
editor = "Ichiro Ide and Ioannis Kompatsiaris and Changsheng Xu and Keiji Yanai and Wei-Ta Chu and Naoko Nitta and Michael Riegler and Toshihiko Yamasaki",
booktitle = "MultiMedia Modeling - 31st International Conference on Multimedia Modeling, MMM 2025, Proceedings",
address = "德国",
}