@inproceedings{9d97a53dbf9d4cadbf6677e54f54c310,
title = "SPM: Modeling Spark Task Execution Time from the Sub-stage Perspective",
abstract = "Tasks are the basic unit of Spark application scheduling, and its execution is affected by various configurations of Spark cluster. Therefore, the prediction of task execution time is a challenging job. In this paper, we analyze the features of task execution procedure on different stages, and propose the method of prediction of each sub-stage execution time. Moreover, the correlative time overheads of GC and shuffle spill are analyzed in detail. As a result, we propose SPM, a task-level execution time prediction model. SPM can be used to predict the task execution time of each stage according to the input data size and configuration of parallelism. We further apply SPM to the Spark network emulation tool SNemu, which can determine the start time of each shuffle procedure for emulation effectively. Experimental results show that the prediction method can achieve high accuracy in a variety of Spark benchmarks on Hibench.",
keywords = "Network emulation, Regression model, Spark, Task-level execution time prediction",
author = "Wei Li and Shengjie Hu and Di Wang and Tianba Chen and Yunchun Li",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 19th International Conference on Algorithms and Architectures for Parallel Processing, ICA3PP 2019 ; Conference date: 09-12-2019 Through 11-12-2019",
year = "2020",
doi = "10.1007/978-3-030-38961-1\_1",
language = "英语",
isbn = "9783030389604",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "3--10",
editor = "Sheng Wen and Albert Zomaya and Yang, \{Laurence T.\}",
booktitle = "Algorithms and Architectures for Parallel Processing - 19th International Conference, ICA3PP 2019, Proceedings",
address = "德国",
}