@techreport{69c98d954f794f96bf4ac5ec18dfb1c6,
title = "Assessing the Quality of Web Content",
abstract = "This paper describes our approach towards the ECML/PKDD Discovery Challenge 2010. The challenge consists of three tasks: (1) a Web genre and facet classification task for English hosts, (2) an English quality task, and (3) a multilingual quality task (German and French). In our approach, we create an ensemble of three classifiers to predict unseen Web hosts whereas each classifier is trained on a different feature set. Our final NDCG on the whole test set is 0:575 for Task 1, 0:852 for Task 2, and 0:81 (French) and 0:77 (German) for Task 3, which ranks second place in the ECML/PKDD Discovery Challenge 2010.",
keywords = "information quality, classification, machine learning, big data",
author = "Elisabeth Lex and Inayat Khan and Horst Bischof and Michael Granitzer",
note = "4 pages, ECML/PKDD 2010 Discovery Challenge Workshop",
year = "2014",
month = jun,
day = "12",
language = "English",
series = "arXiv.org e-Print archive",
publisher = "Cornell University Library",
type = "WorkingPaper",
institution = "Cornell University Library",
}