@inproceedings{4c921e1e6961458cb781a13c647858ad,
title = "Hierarchical and Pairwise Document Embedding for Plagiarism Detection",
abstract = "The rapid development of the Internet, especially the application of search engines and machine translation, makes it easier to copy texts. Most existing text plagiarism detection methods are not capable of dealing with the increasing number of plagiarism sources and the increasingly ambiguous plagiarized texts. In this paper, we pay attention to the task of large-scale text deduplication, and propose a multi-level distributed text computing model, which improves the checking speed through multi-level latent semantic analysis, and combines BERT to judge plagiarized text more accurately. In order to further verify the model, we also combined the latest fuzzy plagiarism technology to construct a three-level data set. The experimental results show that our model performs well when plagiarism data increases and plagiarism ambiguity increases.",
keywords = "BERT, LSA, Plagiarism detection",
author = "Ruitong Zhang and Lianzhong Liu and Jiaofu Zhang and Zihang Huang and Caiwei Yang and Liangxuan Zhao and Tongge Xu",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 16th International Conference on Advanced Data Mining and Applications, ADMA 2020 ; Conference date: 12-11-2020 Through 14-11-2020",
year = "2020",
doi = "10.1007/978-3-030-65390-3\_12",
language = "英语",
isbn = "9783030653897",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "148--156",
editor = "Xiaochun Yang and Chang-Dong Wang and Islam, \{Md. Saiful\} and Zheng Zhang",
booktitle = "Advanced Data Mining and Applications - 16th International Conference, ADMA 2020, Proceedings",
address = "德国",
}