better handling of troublesome feeds.
This commit is contained in:
parent
178362e50b
commit
793967a1d3
7 changed files with 62 additions and 27 deletions
24
boot.php
24
boot.php
|
@ -1478,7 +1478,9 @@ function lrdd($uri) {
|
||||||
return array();
|
return array();
|
||||||
|
|
||||||
logger('lrdd: host_meta: ' . $xml, LOGGER_DATA);
|
logger('lrdd: host_meta: ' . $xml, LOGGER_DATA);
|
||||||
$h = simplexml_load_string($xml);
|
|
||||||
|
$h = parse_xml_string($xml);
|
||||||
|
|
||||||
$arr = convert_xml_element_to_array($h);
|
$arr = convert_xml_element_to_array($h);
|
||||||
|
|
||||||
if(isset($arr['xrd']['property'])) {
|
if(isset($arr['xrd']['property'])) {
|
||||||
|
@ -1550,6 +1552,8 @@ function lrdd($uri) {
|
||||||
$headers = $a->get_curl_headers();
|
$headers = $a->get_curl_headers();
|
||||||
logger('lrdd: headers=' . $headers, LOGGER_DEBUG);
|
logger('lrdd: headers=' . $headers, LOGGER_DEBUG);
|
||||||
|
|
||||||
|
// don't try and parse raw xml as html
|
||||||
|
if(! strstr($html,'<?xml')) {
|
||||||
require_once('library/HTML5/Parser.php');
|
require_once('library/HTML5/Parser.php');
|
||||||
$dom = @HTML5_Parser::parse($html);
|
$dom = @HTML5_Parser::parse($html);
|
||||||
|
|
||||||
|
@ -1563,6 +1567,7 @@ function lrdd($uri) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(isset($pagelink))
|
if(isset($pagelink))
|
||||||
return(fetch_xrd_links($pagelink));
|
return(fetch_xrd_links($pagelink));
|
||||||
|
@ -1638,7 +1643,7 @@ function fetch_xrd_links($url) {
|
||||||
return array();
|
return array();
|
||||||
|
|
||||||
logger('fetch_xrd_links: ' . $xml, LOGGER_DATA);
|
logger('fetch_xrd_links: ' . $xml, LOGGER_DATA);
|
||||||
$h = simplexml_load_string($xml);
|
$h = parse_xml_string($xml);
|
||||||
$arr = convert_xml_element_to_array($h);
|
$arr = convert_xml_element_to_array($h);
|
||||||
|
|
||||||
$links = array();
|
$links = array();
|
||||||
|
@ -2759,3 +2764,18 @@ function lang_selector() {
|
||||||
$o .= '</select></form></div>';
|
$o .= '</select></form></div>';
|
||||||
return $o;
|
return $o;
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
if(! function_exists('parse_xml_string')) {
|
||||||
|
function parse_xml_string($s) {
|
||||||
|
if(! strstr($s,'<?xml'))
|
||||||
|
return false;
|
||||||
|
$s2 = substr($s,strpos($s,'<?xml'));
|
||||||
|
libxml_use_internal_errors(true);
|
||||||
|
$x = @simplexml_load_string($s2);
|
||||||
|
if(count(libxml_get_errors()))
|
||||||
|
foreach(libxml_get_errors() as $err)
|
||||||
|
logger('libxml: parse: ' . $err, LOGGER_DATA);
|
||||||
|
libxml_clear_errors();
|
||||||
|
return $x;
|
||||||
|
}}
|
||||||
|
|
|
@ -216,7 +216,7 @@ function scrape_feed($url) {
|
||||||
}
|
}
|
||||||
if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) {
|
if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) {
|
||||||
$ret['feed_rss'] = $url;
|
$ret['feed_rss'] = $url;
|
||||||
return ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,7 +180,7 @@ function construct_activity_object($item) {
|
||||||
|
|
||||||
if($item['object']) {
|
if($item['object']) {
|
||||||
$o = '<as:object>' . "\r\n";
|
$o = '<as:object>' . "\r\n";
|
||||||
$r = @simplexml_load_string($item['object']);
|
$r = parse_xml_string($item['object']);
|
||||||
if($r->type)
|
if($r->type)
|
||||||
$o .= '<as:object-type>' . xmlify($r->type) . '</as:object-type>' . "\r\n";
|
$o .= '<as:object-type>' . xmlify($r->type) . '</as:object-type>' . "\r\n";
|
||||||
if($r->id)
|
if($r->id)
|
||||||
|
@ -206,7 +206,7 @@ function construct_activity_target($item) {
|
||||||
|
|
||||||
if($item['target']) {
|
if($item['target']) {
|
||||||
$o = '<as:target>' . "\r\n";
|
$o = '<as:target>' . "\r\n";
|
||||||
$r = @simplexml_load_string($item['target']);
|
$r = parse_xml_string($item['target']);
|
||||||
if($r->type)
|
if($r->type)
|
||||||
$o .= '<as:object-type>' . xmlify($r->type) . '</as:object-type>' . "\r\n";
|
$o .= '<as:object-type>' . xmlify($r->type) . '</as:object-type>' . "\r\n";
|
||||||
if($r->id)
|
if($r->id)
|
||||||
|
@ -241,8 +241,14 @@ function get_atom_elements($feed,$item) {
|
||||||
$res = array();
|
$res = array();
|
||||||
|
|
||||||
$author = $item->get_author();
|
$author = $item->get_author();
|
||||||
|
if($author) {
|
||||||
$res['author-name'] = unxmlify($author->get_name());
|
$res['author-name'] = unxmlify($author->get_name());
|
||||||
$res['author-link'] = unxmlify($author->get_link());
|
$res['author-link'] = unxmlify($author->get_link());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$res['author-name'] = unxmlify($feed->get_title());
|
||||||
|
$res['author-link'] = unxmlify($feed->get_permalink());
|
||||||
|
}
|
||||||
$res['uri'] = unxmlify($item->get_id());
|
$res['uri'] = unxmlify($item->get_id());
|
||||||
$res['title'] = unxmlify($item->get_title());
|
$res['title'] = unxmlify($item->get_title());
|
||||||
$res['body'] = unxmlify($item->get_content());
|
$res['body'] = unxmlify($item->get_content());
|
||||||
|
@ -343,7 +349,6 @@ function get_atom_elements($feed,$item) {
|
||||||
// the wild, by sanitising it and converting supported tags to bbcode before we rip out any remaining
|
// the wild, by sanitising it and converting supported tags to bbcode before we rip out any remaining
|
||||||
// html.
|
// html.
|
||||||
|
|
||||||
|
|
||||||
if((strpos($res['body'],'<') !== false) || (strpos($res['body'],'>') !== false)) {
|
if((strpos($res['body'],'<') !== false) || (strpos($res['body'],'>') !== false)) {
|
||||||
|
|
||||||
$res['body'] = preg_replace('#<object[^>]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s',
|
$res['body'] = preg_replace('#<object[^>]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s',
|
||||||
|
@ -783,7 +788,7 @@ function dfrn_deliver($owner,$contact,$atom, $dissolve = false) {
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
$res = simplexml_load_string($xml);
|
$res = parse_xml_string($xml);
|
||||||
|
|
||||||
if((intval($res->status) != 0) || (! strlen($res->challenge)) || (! strlen($res->dfrn_id)))
|
if((intval($res->status) != 0) || (! strlen($res->challenge)) || (! strlen($res->dfrn_id)))
|
||||||
return (($res->status) ? $res->status : 3);
|
return (($res->status) ? $res->status : 3);
|
||||||
|
@ -878,7 +883,7 @@ function dfrn_deliver($owner,$contact,$atom, $dissolve = false) {
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
$res = simplexml_load_string($xml);
|
$res = parse_xml_string($xml);
|
||||||
|
|
||||||
return $res->status;
|
return $res->status;
|
||||||
|
|
||||||
|
@ -916,6 +921,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee
|
||||||
if($feed->error())
|
if($feed->error())
|
||||||
logger('consume_feed: Error parsing XML: ' . $feed->error());
|
logger('consume_feed: Error parsing XML: ' . $feed->error());
|
||||||
|
|
||||||
|
$permalink = $feed->get_permalink();
|
||||||
|
|
||||||
// Check at the feed level for updated contact name and/or photo
|
// Check at the feed level for updated contact name and/or photo
|
||||||
|
|
||||||
|
@ -1230,6 +1236,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee
|
||||||
// Head post of a conversation. Have we seen it? If not, import it.
|
// Head post of a conversation. Have we seen it? If not, import it.
|
||||||
|
|
||||||
$item_id = $item->get_id();
|
$item_id = $item->get_id();
|
||||||
|
|
||||||
$datarray = get_atom_elements($feed,$item);
|
$datarray = get_atom_elements($feed,$item);
|
||||||
|
|
||||||
$r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1",
|
$r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1",
|
||||||
|
@ -1275,7 +1282,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee
|
||||||
if(! is_array($contact))
|
if(! is_array($contact))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if($contact['network'] === 'stat') {
|
if($contact['network'] === 'stat' || stristr($permalink,'twitter.com')) {
|
||||||
if(strlen($datarray['title']))
|
if(strlen($datarray['title']))
|
||||||
unset($datarray['title']);
|
unset($datarray['title']);
|
||||||
$datarray['last-child'] = 1;
|
$datarray['last-child'] = 1;
|
||||||
|
|
|
@ -203,7 +203,7 @@ function poller_run($argv, $argc){
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
$res = simplexml_load_string($xml);
|
$res = parse_xml_string($xml);
|
||||||
|
|
||||||
if(intval($res->status) == 1) {
|
if(intval($res->status) == 1) {
|
||||||
logger("poller: $url replied status 1 - marking for death ");
|
logger("poller: $url replied status 1 - marking for death ");
|
||||||
|
|
|
@ -240,7 +240,7 @@ function dfrn_confirm_post(&$a,$handsfree = null) {
|
||||||
notice( t('Unexpected response from remote site: ') . EOL . $leading_junk . EOL );
|
notice( t('Unexpected response from remote site: ') . EOL . $leading_junk . EOL );
|
||||||
}
|
}
|
||||||
|
|
||||||
$xml = simplexml_load_string($res);
|
$xml = parse_xml_string($res);
|
||||||
$status = (int) $xml->status;
|
$status = (int) $xml->status;
|
||||||
$message = unxmlify($xml->message); // human readable text of what may have gone wrong.
|
$message = unxmlify($xml->message); // human readable text of what may have gone wrong.
|
||||||
switch($status) {
|
switch($status) {
|
||||||
|
|
|
@ -69,7 +69,7 @@ function dfrn_poll_init(&$a) {
|
||||||
|
|
||||||
if(strlen($s)) {
|
if(strlen($s)) {
|
||||||
|
|
||||||
$xml = simplexml_load_string($s);
|
$xml = parse_xml_string($s);
|
||||||
|
|
||||||
if((int) $xml->status == 1) {
|
if((int) $xml->status == 1) {
|
||||||
$_SESSION['authenticated'] = 1;
|
$_SESSION['authenticated'] = 1;
|
||||||
|
@ -468,7 +468,7 @@ function dfrn_poll_content(&$a) {
|
||||||
|
|
||||||
if(strlen($s) && strstr($s,'<?xml')) {
|
if(strlen($s) && strstr($s,'<?xml')) {
|
||||||
|
|
||||||
$xml = simplexml_load_string($s);
|
$xml = parse_xml_string($s);
|
||||||
|
|
||||||
logger('dfrn_poll: profile: parsed xml: ' . print_r($xml,true), LOGGER_DATA);
|
logger('dfrn_poll: profile: parsed xml: ' . print_r($xml,true), LOGGER_DATA);
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,8 @@ function follow_post(&$a) {
|
||||||
$email_conversant = false;
|
$email_conversant = false;
|
||||||
|
|
||||||
if($url) {
|
if($url) {
|
||||||
$links = @lrdd($url);
|
$links = lrdd($url);
|
||||||
|
|
||||||
if(count($links)) {
|
if(count($links)) {
|
||||||
foreach($links as $link) {
|
foreach($links as $link) {
|
||||||
if($link['@attributes']['rel'] === NAMESPACE_DFRN)
|
if($link['@attributes']['rel'] === NAMESPACE_DFRN)
|
||||||
|
@ -107,7 +108,7 @@ function follow_post(&$a) {
|
||||||
if((! isset($vcard)) && (! $poll)) {
|
if((! isset($vcard)) && (! $poll)) {
|
||||||
|
|
||||||
$ret = scrape_feed($url);
|
$ret = scrape_feed($url);
|
||||||
|
logger('mod_follow: scrape_feed returns: ' . print_r($ret,true), LOGGER_DATA);
|
||||||
if(count($ret) && ($ret['feed_atom'] || $ret['feed_rss'])) {
|
if(count($ret) && ($ret['feed_atom'] || $ret['feed_rss'])) {
|
||||||
$poll = ((x($ret,'feed_atom')) ? unamp($ret['feed_atom']) : unamp($ret['feed_rss']));
|
$poll = ((x($ret,'feed_atom')) ? unamp($ret['feed_atom']) : unamp($ret['feed_rss']));
|
||||||
$vcard = array();
|
$vcard = array();
|
||||||
|
@ -156,7 +157,14 @@ function follow_post(&$a) {
|
||||||
}
|
}
|
||||||
if((! $vcard['photo']) && strlen($email))
|
if((! $vcard['photo']) && strlen($email))
|
||||||
$vcard['photo'] = gravatar_img($email);
|
$vcard['photo'] = gravatar_img($email);
|
||||||
|
if($poll === $profile)
|
||||||
|
$lnk = $feed->get_permalink();
|
||||||
|
if(isset($lnk) && strlen($lnk))
|
||||||
|
$profile = $lnk;
|
||||||
|
if(! (x($vcard,'fn')))
|
||||||
|
$vcard['fn'] = notags($feed->get_title());
|
||||||
|
if(! (x($vcard,'fn')))
|
||||||
|
$vcard['fn'] = notags($feed->get_description());
|
||||||
$network = 'feed';
|
$network = 'feed';
|
||||||
$priority = 2;
|
$priority = 2;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue