@inproceedings{78921ace53fc4c77996c3c546f1b3ec0,
title = "Addressing instance ambiguity in web harvesting",
abstract = "Web Harvesting enables the enrichment of incomplete data sets by retrieving required information from the Web. However, the ambiguity of instances may greatly decrease the quality of the harvested data, given that any instance in the local data set may become ambiguous when attempting to identify it on theWeb. Although plenty of disambiguation methods have been proposed to deal with the ambiguity problems in various settings, none of them are able to handle the instance ambiguity problem in Web Harvesting. In this paper, we propose to do instance disambiguation in Web Harvesting with a novel disambiguation method inspired by the idea of collaborative identity recognition. In particular, we expect to find some common properties in forms of latent shared attribute values among instances in the list, such that these shared attribute values can differentiate instances within the list against those ambiguous ones on the Web. Our extensive experimental evaluation illustrates the utility of collaborative disambiguation for a popular Web Harvesting application, and shows that it substantially improves the accuracy of the harvested data.",
author = "Zhixu Li and Zhang, {Xiang Liang} and Hai Huang and Qing Xie and Jia Zhu and Xiaofang Zhou",
note = "Publisher Copyright: Copyright {\textcopyright}2010 by the Association for Computing Machinery.; 18th International Workshop on the Web and Databases, WebDB 2015 ; Conference date: 31-05-2015",
year = "2015",
month = may,
day = "31",
doi = "10.1145/2767109.2767114",
language = "English (US)",
series = "18th International Workshop on the Web and Databases, WebDB 2015: Freshness, Correctness, Quality of Information and Knowledge on the Web - Proceedings",
publisher = "Association for Computing Machinery, Inc",
pages = "6--12",
editor = "Julia Stoyanovich and Suchanek, {Fabian M.}",
booktitle = "18th International Workshop on the Web and Databases, WebDB 2015",
}