Cleanup the HTML before parsing it.

This commit is contained in:
Michael Vogel 2014-12-08 14:26:44 +01:00
parent c398974918
commit 620ee6be0b

View file

@ -263,6 +263,11 @@ function scrape_feed($url) {
} }
try { try {
// Cleanup invalid HTML
$doc = new DOMDocument();
@$doc->loadHTML($s);
$s = $doc->saveHTML();
$dom = HTML5_Parser::parse($s); $dom = HTML5_Parser::parse($s);
} catch (DOMException $e) { } catch (DOMException $e) {
logger('scrape_feed: parse error: ' . $e); logger('scrape_feed: parse error: ' . $e);