@inproceedings{11c19b84e6cc44f985606adf06ca714e,
title = "PatchRefiner: Leveraging Synthetic Data for Real-Domain High-Resolution Monocular Metric Depth Estimation",
abstract = "This paper introduces PatchRefiner, an advanced framework for metric single image depth estimation aimed at high-resolution real-domain inputs. While depth estimation is crucial for applications such as autonomous driving, 3D generative modeling, and 3D reconstruction, achieving accurate high-resolution depth in real-world scenarios is challenging due to the constraints of existing architectures and the scarcity of detailed real-world depth data. PatchRefiner adopts a tile-based methodology, reconceptualizing high-resolution depth estimation as a refinement process, which results in notable performance enhancements. Utilizing a pseudo-labeling strategy that leverages synthetic data, PatchRefiner incorporates a Detail and Scale Disentangling (DSD) loss to enhance detail capture while maintaining scale accuracy, thus facilitating the effective transfer of knowledge from synthetic to real-world data. Our extensive evaluations demonstrate PatchRefiner{\textquoteright}s superior performance, significantly outperforming existing benchmarks on the Unreal4KStereo dataset by 18.1\% in terms of the root mean squared error (RMSE) and showing marked improvements in detail accuracy and consistent scale estimation on diverse real-world datasets like CityScape, ScanNet++, and ETH3D.",
keywords = "High-Resolution Metric Depth Estimation, Synthetic Data",
author = "Zhenyu Li and Bhat, \{Shariq Farooq\} and Peter Wonka",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.; 18th European Conference on Computer Vision, ECCV 2024 ; Conference date: 29-09-2024 Through 04-10-2024",
year = "2025",
doi = "10.1007/978-3-031-72855-6\_15",
language = "English (US)",
isbn = "9783031728549",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "250--267",
editor = "Ale{\v s} Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and G{\"u}l Varol",
booktitle = "Computer Vision – ECCV 2024 - 18th European Conference, Proceedings",
address = "Germany",
}