From 793967a1d3c23fcf1f3b00a2832f51e6f473f4bd Mon Sep 17 00:00:00 2001 From: Friendika Date: Mon, 4 Apr 2011 19:36:18 -0700 Subject: [PATCH] better handling of troublesome feeds. --- boot.php | 42 +++++++++++++++++++++++++++++++----------- include/Scrape.php | 2 +- include/items.php | 23 +++++++++++++++-------- include/poller.php | 2 +- mod/dfrn_confirm.php | 2 +- mod/dfrn_poll.php | 4 ++-- mod/follow.php | 14 +++++++++++--- 7 files changed, 62 insertions(+), 27 deletions(-) diff --git a/boot.php b/boot.php index 3b86d0dbe..f5c0e6f92 100644 --- a/boot.php +++ b/boot.php @@ -1478,7 +1478,9 @@ function lrdd($uri) { return array(); logger('lrdd: host_meta: ' . $xml, LOGGER_DATA); - $h = simplexml_load_string($xml); + + $h = parse_xml_string($xml); + $arr = convert_xml_element_to_array($h); if(isset($arr['xrd']['property'])) { @@ -1550,16 +1552,19 @@ function lrdd($uri) { $headers = $a->get_curl_headers(); logger('lrdd: headers=' . $headers, LOGGER_DEBUG); - require_once('library/HTML5/Parser.php'); - $dom = @HTML5_Parser::parse($html); + // don't try and parse raw xml as html + if(! strstr($html,'getElementsByTagName('link'); - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if($x == "lrdd") { - $pagelink = $item->getAttribute('href'); - break; + if($dom) { + $items = $dom->getElementsByTagName('link'); + foreach($items as $item) { + $x = $item->getAttribute('rel'); + if($x == "lrdd") { + $pagelink = $item->getAttribute('href'); + break; + } } } } @@ -1638,7 +1643,7 @@ function fetch_xrd_links($url) { return array(); logger('fetch_xrd_links: ' . $xml, LOGGER_DATA); - $h = simplexml_load_string($xml); + $h = parse_xml_string($xml); $arr = convert_xml_element_to_array($h); $links = array(); @@ -2759,3 +2764,18 @@ function lang_selector() { $o .= ''; return $o; }} + + +if(! function_exists('parse_xml_string')) { +function parse_xml_string($s) { + if(! strstr($s,'' . "\r\n"; - $r = @simplexml_load_string($item['object']); + $r = parse_xml_string($item['object']); if($r->type) $o .= '' . xmlify($r->type) . '' . "\r\n"; if($r->id) @@ -206,7 +206,7 @@ function construct_activity_target($item) { if($item['target']) { $o = '' . "\r\n"; - $r = @simplexml_load_string($item['target']); + $r = parse_xml_string($item['target']); if($r->type) $o .= '' . xmlify($r->type) . '' . "\r\n"; if($r->id) @@ -241,8 +241,14 @@ function get_atom_elements($feed,$item) { $res = array(); $author = $item->get_author(); - $res['author-name'] = unxmlify($author->get_name()); - $res['author-link'] = unxmlify($author->get_link()); + if($author) { + $res['author-name'] = unxmlify($author->get_name()); + $res['author-link'] = unxmlify($author->get_link()); + } + else { + $res['author-name'] = unxmlify($feed->get_title()); + $res['author-link'] = unxmlify($feed->get_permalink()); + } $res['uri'] = unxmlify($item->get_id()); $res['title'] = unxmlify($item->get_title()); $res['body'] = unxmlify($item->get_content()); @@ -343,7 +349,6 @@ function get_atom_elements($feed,$item) { // the wild, by sanitising it and converting supported tags to bbcode before we rip out any remaining // html. - if((strpos($res['body'],'<') !== false) || (strpos($res['body'],'>') !== false)) { $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', @@ -783,7 +788,7 @@ function dfrn_deliver($owner,$contact,$atom, $dissolve = false) { return 3; } - $res = simplexml_load_string($xml); + $res = parse_xml_string($xml); if((intval($res->status) != 0) || (! strlen($res->challenge)) || (! strlen($res->dfrn_id))) return (($res->status) ? $res->status : 3); @@ -878,7 +883,7 @@ function dfrn_deliver($owner,$contact,$atom, $dissolve = false) { return 3; } - $res = simplexml_load_string($xml); + $res = parse_xml_string($xml); return $res->status; @@ -916,6 +921,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee if($feed->error()) logger('consume_feed: Error parsing XML: ' . $feed->error()); + $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo @@ -1230,6 +1236,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee // Head post of a conversation. Have we seen it? If not, import it. $item_id = $item->get_id(); + $datarray = get_atom_elements($feed,$item); $r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", @@ -1275,7 +1282,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee if(! is_array($contact)) return; - if($contact['network'] === 'stat') { + if($contact['network'] === 'stat' || stristr($permalink,'twitter.com')) { if(strlen($datarray['title'])) unset($datarray['title']); $datarray['last-child'] = 1; diff --git a/include/poller.php b/include/poller.php index 3b80c1c04..9362c28b3 100644 --- a/include/poller.php +++ b/include/poller.php @@ -203,7 +203,7 @@ function poller_run($argv, $argc){ } - $res = simplexml_load_string($xml); + $res = parse_xml_string($xml); if(intval($res->status) == 1) { logger("poller: $url replied status 1 - marking for death "); diff --git a/mod/dfrn_confirm.php b/mod/dfrn_confirm.php index 1bf1ba954..2db745d25 100644 --- a/mod/dfrn_confirm.php +++ b/mod/dfrn_confirm.php @@ -240,7 +240,7 @@ function dfrn_confirm_post(&$a,$handsfree = null) { notice( t('Unexpected response from remote site: ') . EOL . $leading_junk . EOL ); } - $xml = simplexml_load_string($res); + $xml = parse_xml_string($res); $status = (int) $xml->status; $message = unxmlify($xml->message); // human readable text of what may have gone wrong. switch($status) { diff --git a/mod/dfrn_poll.php b/mod/dfrn_poll.php index 5149dc3b2..2ccfadd03 100644 --- a/mod/dfrn_poll.php +++ b/mod/dfrn_poll.php @@ -69,7 +69,7 @@ function dfrn_poll_init(&$a) { if(strlen($s)) { - $xml = simplexml_load_string($s); + $xml = parse_xml_string($s); if((int) $xml->status == 1) { $_SESSION['authenticated'] = 1; @@ -468,7 +468,7 @@ function dfrn_poll_content(&$a) { if(strlen($s) && strstr($s,'get_permalink(); + if(isset($lnk) && strlen($lnk)) + $profile = $lnk; + if(! (x($vcard,'fn'))) + $vcard['fn'] = notags($feed->get_title()); + if(! (x($vcard,'fn'))) + $vcard['fn'] = notags($feed->get_description()); $network = 'feed'; $priority = 2; }