From 04106ff6f52b03d0d5b009f07a7b6ab11107f0c0 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 8 Dec 2014 22:37:49 +0100 Subject: [PATCH] Moving the clean up code to a central place. --- include/Scrape.php | 5 ----- library/HTML5/Parser.php | 6 ++++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/Scrape.php b/include/Scrape.php index ad9e030a39..99784af336 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -263,11 +263,6 @@ function scrape_feed($url) { } try { - // Cleanup invalid HTML - $doc = new DOMDocument(); - @$doc->loadHTML($s); - $s = $doc->saveHTML(); - $dom = HTML5_Parser::parse($s); } catch (DOMException $e) { logger('scrape_feed: parse error: ' . $e); diff --git a/library/HTML5/Parser.php b/library/HTML5/Parser.php index 5f9ca560e5..c7faf875ad 100644 --- a/library/HTML5/Parser.php +++ b/library/HTML5/Parser.php @@ -17,6 +17,12 @@ class HTML5_Parser * @return Parsed HTML as DOMDocument */ static public function parse($text, $builder = null) { + + // Cleanup invalid HTML + $doc = new DOMDocument(); + @$doc->loadHTML($text); + $text = $doc->saveHTML(); + $tokenizer = new HTML5_Tokenizer($text, $builder); $tokenizer->parse(); return $tokenizer->save();