@inproceedings{6839be60ab7e48b491286298abc3cf20,
title = "Modeling text with graph convolutional network for cross-modal information retrieval",
abstract = "Cross-modal information retrieval aims to find heterogeneous data of various modalities from a given query of one modality. The main challenge is to map different modalities into a common semantic space, in which distance between concepts in different modalities can be well modeled. For cross-modal information retrieval between images and texts, existing work mostly uses off-the-shelf Convolutional Neural Network (CNN) for image feature extraction. For texts, word-level features such as bag-of-words or word2vec are employed to build deep learning models to represent texts. Besides word-level semantics, the semantic relations between words are also informative but less explored. In this paper, we model texts by graphs using similarity measure based on word2vec. A dual-path neural network model is proposed for couple feature learning in cross-modal information retrieval. One path utilizes Graph Convolutional Network (GCN) for text modeling based on graph representations. The other path uses a neural network with layers of nonlinearities for image modeling based on off-the-shelf features. The model is trained by a pairwise similarity loss function to maximize the similarity of relevant text-image pairs and minimize the similarity of irrelevant pairs. Experimental results show that the proposed model outperforms the state-of-the-art methods significantly, with 17\% improvement on accuracy for the best case.",
author = "Jing Yu and Yuhang Lu and Zengchang Qin and Weifeng Zhang and Yanbing Liu and Jianlong Tan and Li Guo",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2018.; 19th Pacific-Rim Conference on Multimedia, PCM 2018 ; Conference date: 21-09-2018 Through 22-09-2018",
year = "2018",
doi = "10.1007/978-3-030-00776-8\_21",
language = "英语",
isbn = "9783030007751",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "223--234",
editor = "Chong-Wah Ngo and Richang Hong and Meng Wang and Wen-Huang Cheng and Toshihiko Yamasaki",
booktitle = "Advances in Multimedia Information Processing – PCM 2018 - 19th Pacific-Rim Conference on Multimedia, 2018, Proceedings",
address = "德国",
}