@inproceedings{1190f93e52944eb6ae435b08ee74ecf7,
title = "Empirical Study of Unsupervised Pre-Training in CNN and Transformer Based Visual Tracking",
abstract = "Deep learning-based visual object tracking has seen the emergence of CNN-based and Transformer-based algorithms built upon the Siamese-based pipeline to pursue robustness and accuracy. However, the performance gap between them requires high-quality and large-scale labeled data for sufficient training. In this work, we design an unsupervised pre-training scheme based on data augmentation to reduce the dependence on expensive labeled data. The core step is the object localization pretext task, which randomly crops the object and pastes it onto several background images. Moreover, we apply the method to both CNN-based and Transformer-based visual trackers. Extensive experiments on public datasets demonstrate that our method outperforms prevailing unsupervised trackers on large-scale benchmarks such as LaSOT and TrackingNet. Additionally, a simple strategy of freezing the CNN backbone during Transformer-based pre-training proves to be effective.",
keywords = "CNN, Transformer, unsupervised, visual tracking",
author = "Yannan Cai and Zhenzhong Wei",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 5th International Conference on Artificial Intelligence and Computer Applications, ICAICA 2023 ; Conference date: 28-11-2023 Through 30-11-2023",
year = "2023",
doi = "10.1109/ICAICA58456.2023.10405496",
language = "英语",
series = "2023 5th International Conference on Artificial Intelligence and Computer Applications, ICAICA 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "291--295",
booktitle = "2023 5th International Conference on Artificial Intelligence and Computer Applications, ICAICA 2023",
address = "美国",
}