@inproceedings{37706e3c0a4c486292c760e4c4f9d2ca,
title = "Q-ViT: Accurate and Fully Quantized Low-bit Vision Transformer",
abstract = "The large pre-trained vision transformers (ViTs) have demonstrated remarkable performance on various visual tasks, but suffer from expensive computational and memory cost problems when deployed on resource-constrained devices. Among the powerful compression approaches, quantization extremely reduces the computation and memory consumption by low-bit parameters and bit-wise operations. However, low-bit ViTs remain largely unexplored and usually suffer from a significant performance drop compared with the real-valued counterparts. In this work, through extensive empirical analysis, we first identify the bottleneck for severe performance drop comes from the information distortion of the low-bit quantized self-attention map. We then develop an information rectification module (IRM) and a distribution guided distillation (DGD) scheme for fully quantized vision transformers (Q-ViT) to effectively eliminate such distortion, leading to a fully quantized ViTs. We evaluate our methods on popular DeiT and Swin backbones. Extensive experimental results show that our method achieves a much better performance than the prior arts. For example, our Q-ViT can theoretically accelerates the ViT-S by 6.14× and achieves about 80.9\% Top-1 accuracy, even surpassing the full-precision counterpart by 1.0\% on ImageNet dataset. Our codes and models are attached on https://github.com/YanjingLi0202/Q-ViT.",
author = "Yanjing Li and Sheng Xu and Baochang Zhang and Xianbin Cao and Peng Gao and Guodong Guo",
note = "Publisher Copyright: {\textcopyright} 2022 Neural information processing systems foundation. All rights reserved.; 36th Conference on Neural Information Processing Systems, NeurIPS 2022 ; Conference date: 28-11-2022 Through 09-12-2022",
year = "2022",
language = "英语",
series = "Advances in Neural Information Processing Systems",
publisher = "Neural information processing systems foundation",
editor = "S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh",
booktitle = "Advances in Neural Information Processing Systems 35 - 36th Conference on Neural Information Processing Systems, NeurIPS 2022",
address = "美国",
}