diff --git a/include/gprobe.php b/include/gprobe.php index 0cf32e95fe..36650eb9ae 100644 --- a/include/gprobe.php +++ b/include/gprobe.php @@ -10,7 +10,7 @@ function gprobe_run(&$argv, &$argc){ if(is_null($a)) { $a = new App; } - + if(is_null($db)) { @include(".htconfig.php"); require_once("include/dba.php"); @@ -37,6 +37,8 @@ function gprobe_run(&$argv, &$argc){ dbesc(normalise_link($url)) ); + logger("gprobe start for ".normalise_link($url), LOGGER_DEBUG); + if(! count($r)) { $arr = probe_url($url); @@ -55,7 +57,8 @@ function gprobe_run(&$argv, &$argc){ } if(count($r)) poco_load(0,0,$r[0]['id'], str_replace('/profile/','/poco/',$r[0]['url'])); - + + logger("gprobe end for ".normalise_link($url), LOGGER_DEBUG); return; } diff --git a/library/HTML5/Parser.php b/library/HTML5/Parser.php index 5f9ca560e5..c7faf875ad 100644 --- a/library/HTML5/Parser.php +++ b/library/HTML5/Parser.php @@ -17,6 +17,12 @@ class HTML5_Parser * @return Parsed HTML as DOMDocument */ static public function parse($text, $builder = null) { + + // Cleanup invalid HTML + $doc = new DOMDocument(); + @$doc->loadHTML($text); + $text = $doc->saveHTML(); + $tokenizer = new HTML5_Tokenizer($text, $builder); $tokenizer->parse(); return $tokenizer->save();