purify html before trying to parse wild urls. This way at least it should parse.
This commit is contained in:
parent
92831c9416
commit
24d41e2c6e
1 changed files with 13 additions and 4 deletions
|
@ -1,6 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once('library/HTML5/Parser.php');
|
require_once('library/HTML5/Parser.php');
|
||||||
|
require_once('library/HTMLPurifier.auto.php');
|
||||||
|
|
||||||
function parse_url_content(&$a) {
|
function parse_url_content(&$a) {
|
||||||
|
|
||||||
|
@ -31,16 +32,25 @@ function parse_url_content(&$a) {
|
||||||
killme();
|
killme();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger('parse_url: data: ' . $s, LOGGER_DATA);
|
||||||
|
|
||||||
if(! $s) {
|
if(! $s) {
|
||||||
echo sprintf($template,$url,$url,'');
|
echo sprintf($template,$url,$url,'');
|
||||||
killme();
|
killme();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Cache.DefinitionImpl', null);
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
$s = $purifier->purify($s);
|
||||||
|
|
||||||
$dom = @HTML5_Parser::parse($s);
|
$dom = @HTML5_Parser::parse($s);
|
||||||
|
|
||||||
if(! $dom)
|
if(! $dom) {
|
||||||
return $ret;
|
echo sprintf($template,$url,$url,'');
|
||||||
|
killme();
|
||||||
|
}
|
||||||
|
|
||||||
$items = $dom->getElementsByTagName('title');
|
$items = $dom->getElementsByTagName('title');
|
||||||
|
|
||||||
|
@ -51,7 +61,6 @@ function parse_url_content(&$a) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
$divs = $dom->getElementsByTagName('div');
|
$divs = $dom->getElementsByTagName('div');
|
||||||
if($divs) {
|
if($divs) {
|
||||||
foreach($divs as $div) {
|
foreach($divs as $div) {
|
||||||
|
@ -94,6 +103,6 @@ function parse_url_content(&$a) {
|
||||||
$text = '<br />' . $text;
|
$text = '<br />' . $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
echo sprintf($template,$url,$title,$text);
|
echo sprintf($template,$url,($title) ? $title : $url,$text);
|
||||||
killme();
|
killme();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue