diff --git a/include/Scrape.php b/include/Scrape.php index ad9e030a3..99784af33 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -263,11 +263,6 @@ function scrape_feed($url) { } try { - // Cleanup invalid HTML - $doc = new DOMDocument(); - @$doc->loadHTML($s); - $s = $doc->saveHTML(); - $dom = HTML5_Parser::parse($s); } catch (DOMException $e) { logger('scrape_feed: parse error: ' . $e); diff --git a/library/HTML5/Parser.php b/library/HTML5/Parser.php index 5f9ca560e..c7faf875a 100644 --- a/library/HTML5/Parser.php +++ b/library/HTML5/Parser.php @@ -17,6 +17,12 @@ class HTML5_Parser * @return Parsed HTML as DOMDocument */ static public function parse($text, $builder = null) { + + // Cleanup invalid HTML + $doc = new DOMDocument(); + @$doc->loadHTML($text); + $text = $doc->saveHTML(); + $tokenizer = new HTML5_Tokenizer($text, $builder); $tokenizer->parse(); return $tokenizer->save();