@inproceedings{3d70733a629a4a59ae016a9cd4a64e5e,
title = "CoGNN: Efficient Scheduling for Concurrent GNN Training on GPUs",
abstract = "Graph neural networks (GNNs) suffer from low GPU utilization due to frequent memory accesses. Existing concurrent training mechanisms cannot be directly adapted to GNNs because they fail to consider the impact of input irregularity. This requires pre-profiling the memory footprint of concurrent tasks based on input dimensions to ensure successful co-location on GPU. Moreover, massive training tasks generated from scenarios such as hyper-parameter tuning require flexible scheduling strategies. To address these problems, we propose CoGNN that enables efficient management of GNN training tasks on GPUs. Specifically, the CoGNN organizes the tasks in a queue and estimates the memory consumption of each task based on cost functions at operator basis. In addition, the CoGNN implements scheduling policies to generate task groups, which are iteratively submitted for execution. The experiment results show that the CoGNN can achieve shorter completion and queuing time for training tasks from diverse GNN models.",
keywords = "Concurrent Training, Estimation Model, GPU, Graph Neural Networks, Task Scheduling",
author = "Qingxiao Sun and Yi Liu and Hailong Yang and Ruizhe Zhang and Ming Dun and Mingzhen Li and Xiaoyan Liu and Wencong Xiao and Yong Li and Zhongzhi Luan and Depei Qian",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2022 ; Conference date: 13-11-2022 Through 18-11-2022",
year = "2022",
doi = "10.1109/SC41404.2022.00044",
language = "英语",
series = "International Conference for High Performance Computing, Networking, Storage and Analysis, SC",
publisher = "IEEE Computer Society",
booktitle = "Proceedings of SC 2022",
address = "美国",
}