@inproceedings{08862352a2784204b51e7e2bcac690cf,
title = "Exploring Out-of-Distribution Scene Text Recognition for Driving Scenes with Hybrid Test-Time Adaptation",
abstract = "Scene Text Recognition (STR) in dynamic driving scenes is important for recognizing real-world kilometer marker to facilitate the scheduling and operation of industrial scenes. For example, the location information of the train affects the safe and reliable operation of the transportation, which can be effectively determined by identifying the kilometer markers with STR technology. However, most of the existing STR models make the independent and identically distributed (i.i.d) assumption that all the training data and test data are drawn from the same data distribution. Although satisfactory performance is achieved under i.i.d assumption, existing STR models remain notoriously weak at generalization on out-of-distribution (o.o.d) data, making a system unreliable and unsafe. To validate this phenomenon, we attempt to propose a new hybrid test-time adaptation (HTTA) to improve the performance of an STR model on o.o.d test data. Previously, test-time adaptation methods are targeted at classification models and do not consider the multi-step reasoning characteristic of sequence learning tasks. In HTTA, we deploy multiple semantically-reserved image augmentation and design a semantically-consistent auxiliary task to present a continual adaptation. Additionally, we construct a new Real-world Subway Kilometer Marker (RSKM) dataset for an out-of-distribution STR practice under dynamic driving scenes. We conduct extensive experiments on RSKM by embedding our HTTA into multiple classical STR methods to show the effectiveness. The experiment results show that our semantically-consistent augmentation and HTTA significantly improve the generalization performance on o.o.d STR practice.",
keywords = "Data Augmentation, Driving Scenes, Out-of-Distribution, Scene Text Recognition",
author = "Xiaoyu Xian and Jinghui Qin and Yukai Shi and Daxin Tian and Liang Lin",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.; 7th Chinese Conference on Pattern Recognition and Computer Vision, PRCV 2024 ; Conference date: 18-10-2024 Through 20-10-2024",
year = "2025",
doi = "10.1007/978-981-97-8487-5\_5",
language = "英语",
isbn = "9789819784868",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) ",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "65--80",
editor = "Zhouchen Lin and Hongbin Zha and Ming-Ming Cheng and Ran He and Cheng-Lin Liu and Kurban Ubul and Wushouer Silamu and Jie Zhou",
booktitle = "Pattern Recognition and Computer Vision - 7th Chinese Conference, PRCV 2024, Proceedings",
address = "德国",
}