From 01164c8c2f802f0b4c2f0f7c4a83ed826768ae1a Mon Sep 17 00:00:00 2001 From: Friendika Date: Thu, 20 Oct 2011 16:48:07 -0700 Subject: [PATCH] fix various html parse errors --- include/Scrape.php | 27 ++++++++++++++++++++------- include/event.php | 7 ++++++- include/network.php | 7 ++++++- library/HTML5/TreeBuilder.php | 2 ++ mod/parse_url.php | 8 +++++--- view/diaspora_vcard.tpl | 6 +++--- 6 files changed, 42 insertions(+), 15 deletions(-) diff --git a/include/Scrape.php b/include/Scrape.php index 642b8e624c..f5b15c82fb 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -30,8 +30,11 @@ function scrape_dfrn($url) { } } - - $dom = HTML5_Parser::parse($s); + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_dfrn: parse error: ' . $e); + } if(! $dom) return $ret; @@ -132,9 +135,11 @@ function scrape_meta($url) { } } - - - $dom = HTML5_Parser::parse($s); + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_meta: parse error: ' . $e); + } if(! $dom) return $ret; @@ -177,7 +182,11 @@ function scrape_vcard($url) { } } - $dom = HTML5_Parser::parse($s); + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_vcard: parse error: ' . $e); + } if(! $dom) return $ret; @@ -243,7 +252,11 @@ function scrape_feed($url) { } } - $dom = HTML5_Parser::parse($s); + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_feed: parse error: ' . $e); + } if(! $dom) return $ret; diff --git a/include/event.php b/include/event.php index 746422a137..c7669b381c 100644 --- a/include/event.php +++ b/include/event.php @@ -53,7 +53,12 @@ function parse_event($h) { $ret = array(); - $dom = HTML5_Parser::parse($h); + + try { + $dom = HTML5_Parser::parse($h); + } catch (DOMException $e) { + logger('parse_event: parse error: ' . $e); + } if(! $dom) return $ret; diff --git a/include/network.php b/include/network.php index ec99d1e0dc..c6f5fd909f 100644 --- a/include/network.php +++ b/include/network.php @@ -429,7 +429,12 @@ function lrdd($uri) { // don't try and parse raw xml as html if(! strstr($html,'getElementsByTagName('link'); diff --git a/library/HTML5/TreeBuilder.php b/library/HTML5/TreeBuilder.php index 03e2ee77fd..a64816f567 100644 --- a/library/HTML5/TreeBuilder.php +++ b/library/HTML5/TreeBuilder.php @@ -3041,6 +3041,8 @@ class HTML5_TreeBuilder { if (!empty($token['attr'])) { foreach($token['attr'] as $attr) { + // mike@macgirvin.com 2011-10-21, stray double quotes cause everything to abort + $attr['name'] = str_replace('"','',$attr['name']); if(!$el->hasAttribute($attr['name'])) { $el->setAttribute($attr['name'], $attr['value']); } diff --git a/mod/parse_url.php b/mod/parse_url.php index a238ecb2f9..7a520ba92f 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -88,9 +88,11 @@ function parse_url_content(&$a) { $purifier = new HTMLPurifier($config); $s = $purifier->purify($s); -// logger('parse_url: purified: ' . $s, LOGGER_DATA); - - $dom = @HTML5_Parser::parse($s); + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_dfrn: parse error: ' . $e); + } if(! $dom) { echo sprintf($template,$url,$url,'') . $str_tags; diff --git a/view/diaspora_vcard.tpl b/view/diaspora_vcard.tpl index 684b1d18de..e71fc33ef2 100644 --- a/view/diaspora_vcard.tpl +++ b/view/diaspora_vcard.tpl @@ -33,19 +33,19 @@
Photo
- +
Photo
- +
Photo
- +