@inproceedings{ba128db0d4174326a265cdf5831ab16e,
title = "CUS3D: Clip-Based Unsupervised 3D Segmentation via Object-Level Denoise",
abstract = "To ease the difficulty of acquiring annotation labels in 3D data, a common method is using unsupervised and open-vocabulary semantic segmentation, which leverage 2D CLIP semantic knowledge. In this paper, unlike previous research that ignores the {"}noise{"}raised during feature projection from 2D to 3D, we propose a novel distillation learning framework named CUS3D. In our approach, an object-level denosing projection module is designed to screen out the {"}noise{"}and ensure more accurate 3D feature. Based on the obtained features, a multimodal distillation learning module is designed to align the 3D feature with CLIP semantic feature space with object-centered constrains to achieve advanced unsupervised semantic segmentation. We conduct comprehensive experiments in both unsupervised and open-vocabulary segmentation, and the results consistently showcase the superiority of our model in achieving advanced unsupervised segmentation results and its effectiveness in open-vocabulary segmentation.",
keywords = "Knowledge Distillation, Multimodal, Open-vocabulary, Point Cloud, Unsupervised Semantic Segmentation",
author = "Fuyang Yu and Runze Tian and Zhen Wang and Xiaochuan Wang and Xiaohui Liang",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE International Conference on Multimedia and Expo, ICME 2024 ; Conference date: 15-07-2024 Through 19-07-2024",
year = "2024",
doi = "10.1109/ICME57554.2024.10687429",
language = "英语",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
booktitle = "2024 IEEE International Conference on Multimedia and Expo, ICME 2024",
address = "美国",
}