@inproceedings{99c95446ad894a22b8b97c7f0d3d15ab,
title = "ACBatch: Adaptive and Cooperative Batching for Edge Inference",
abstract = "Batching is a key technique in deep learning in-ference that enhances computational efficiency. Although widely applied in the cloud, batching may suffer from longer batch latency at edge servers due to highly dynamic task arrivals. In this paper, we propose an Adaptive and Cooperative Batching (ACBatch) framework for edge inference, wherein temporal adaptive batching and spatial task steering are jointly devised to balance the trade-off between batch latency and computational efficiency. To this end, a batch efficiency model is built to quantify the relationship between computational efficiency and batch size based on empirical measurements across diverse computing platforms and mainstream neural networks. Then, an optimization problem is formulated to minimize the completion time of a task sequence under ACBatch. For the simplified single-server case, the problem exhibits an optimal substructure and is solved by our proposed Dynamic Programming-based Adaptive Batching algorithm. For the general multi-server case, the optimization of ACBatch is proved NP-hard, and we propose the Multi-Server Cooperative Batching algorithm by iteratively optimizing batching and steering. Real-trace experiments show that ACBatch achieves an average improvement of 89.17\% in completion time and 76.52\% in latency compared to state-of-the-art methods.",
keywords = "batching, cooperative edge computing, edge inference, traffic steering",
author = "Ziming Yang and Zichuan Zheng and Liyou Deng and Shan Zhang and Zhiyuan Wang and Hongbin Luo",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE Conference on Computer Communications, INFOCOM 2025 ; Conference date: 19-05-2025 Through 22-05-2025",
year = "2025",
doi = "10.1109/INFOCOM55648.2025.11044583",
language = "英语",
series = "Proceedings - IEEE INFOCOM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "INFOCOM 2025 - IEEE Conference on Computer Communications",
address = "美国",
}