From 620ee6be0bdb4e706abfa2172d3dfb3e2d035e25 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 8 Dec 2014 14:26:44 +0100 Subject: [PATCH] Cleanup the HTML before parsing it. --- include/Scrape.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/Scrape.php b/include/Scrape.php index 99784af336..ad9e030a39 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -263,6 +263,11 @@ function scrape_feed($url) { } try { + // Cleanup invalid HTML + $doc = new DOMDocument(); + @$doc->loadHTML($s); + $s = $doc->saveHTML(); + $dom = HTML5_Parser::parse($s); } catch (DOMException $e) { logger('scrape_feed: parse error: ' . $e);