|
|
@ -1,144 +1,6 @@ |
|
|
|
<?php |
|
|
|
|
|
|
|
require_once('library/HTML5/Parser.php'); |
|
|
|
require_once('include/crypto.php'); |
|
|
|
require_once('include/feed.php'); |
|
|
|
require_once('include/Probe.php'); |
|
|
|
|
|
|
|
if(! function_exists('scrape_dfrn')) { |
|
|
|
function scrape_dfrn($url, $dont_probe = false) { |
|
|
|
|
|
|
|
$a = get_app(); |
|
|
|
|
|
|
|
$ret = array(); |
|
|
|
|
|
|
|
logger('scrape_dfrn: url=' . $url); |
|
|
|
|
|
|
|
// Try to fetch the data from noscrape. This is faster than parsing the HTML
|
|
|
|
$noscrape = str_replace("/hcard/", "/noscrape/", $url); |
|
|
|
$noscrapejson = fetch_url($noscrape); |
|
|
|
$noscrapedata = array(); |
|
|
|
if ($noscrapejson) { |
|
|
|
$noscrapedata = json_decode($noscrapejson, true); |
|
|
|
|
|
|
|
if (is_array($noscrapedata)) { |
|
|
|
if ($noscrapedata["nick"] != "") |
|
|
|
return($noscrapedata); |
|
|
|
else |
|
|
|
unset($noscrapedata["nick"]); |
|
|
|
} else |
|
|
|
$noscrapedata = array(); |
|
|
|
} |
|
|
|
|
|
|
|
$s = fetch_url($url); |
|
|
|
|
|
|
|
if (!$s) |
|
|
|
return $ret; |
|
|
|
|
|
|
|
if (!$dont_probe) { |
|
|
|
$probe = probe_url($url); |
|
|
|
|
|
|
|
if (isset($probe["addr"])) |
|
|
|
$ret["addr"] = $probe["addr"]; |
|
|
|
} |
|
|
|
|
|
|
|
$headers = $a->get_curl_headers(); |
|
|
|
logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG); |
|
|
|
|
|
|
|
|
|
|
|
$lines = explode("\n",$headers); |
|
|
|
if(count($lines)) { |
|
|
|
foreach($lines as $line) { |
|
|
|
// don't try and run feeds through the html5 parser
|
|
|
|
if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) |
|
|
|
return ret; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
try { |
|
|
|
$dom = HTML5_Parser::parse($s); |
|
|
|
} catch (DOMException $e) { |
|
|
|
logger('scrape_dfrn: parse error: ' . $e); |
|
|
|
} |
|
|
|
|
|
|
|
if(! $dom) |
|
|
|
return $ret; |
|
|
|
|
|
|
|
$items = $dom->getElementsByTagName('link'); |
|
|
|
|
|
|
|
// get DFRN link elements
|
|
|
|
|
|
|
|
foreach($items as $item) { |
|
|
|
$x = $item->getAttribute('rel'); |
|
|
|
if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml')) |
|
|
|
$ret['feed_atom'] = $item->getAttribute('href'); |
|
|
|
if(substr($x,0,5) == "dfrn-") { |
|
|
|
$ret[$x] = $item->getAttribute('href'); |
|
|
|
} |
|
|
|
if($x === 'lrdd') { |
|
|
|
$decoded = urldecode($item->getAttribute('href')); |
|
|
|
if(preg_match('/acct:([^@]*)@/',$decoded,$matches)) |
|
|
|
$ret['nick'] = $matches[1]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Pull out hCard profile elements
|
|
|
|
|
|
|
|
$largest_photo = 0; |
|
|
|
|
|
|
|
$items = $dom->getElementsByTagName('*'); |
|
|
|
foreach($items as $item) { |
|
|
|
if(attribute_contains($item->getAttribute('class'), 'vcard')) { |
|
|
|
$level2 = $item->getElementsByTagName('*'); |
|
|
|
foreach($level2 as $x) { |
|
|
|
if(attribute_contains($x->getAttribute('class'),'uid')) |
|
|
|
$ret['guid'] = $x->textContent; |
|
|
|
if(attribute_contains($x->getAttribute('class'),'nickname')) |
|
|
|
$ret['nickname'] = $x->textContent; |
|
|
|
if(attribute_contains($x->getAttribute('class'),'fn')) |
|
|
|
$ret['fn'] = $x->textContent; |
|
|
|
if(attribute_contains($x->getAttribute('class'),'searchable')) |
|
|
|
$ret['searchable'] = $x->textContent; |
|
|
|
if(attribute_contains($x->getAttribute('class'),'key')) |
|
|
|
$ret['key'] = $x->textContent; |
|
|
|
if(attribute_contains($x->getAttribute('class'),'url')) |
|
|
|
$ret['url'] = $x->textContent; |
|
|
|
if((attribute_contains($x->getAttribute('class'),'photo')) |
|
|
|
|| (attribute_contains($x->getAttribute('class'),'avatar'))) { |
|
|
|
$size = intval($x->getAttribute('width')); |
|
|
|
// dfrn prefers 175, so if we find this, we set largest_size so it can't be topped.
|
|
|
|
if(($size > $largest_photo) || ($size == 175) || (! $largest_photo)) { |
|
|
|
$ret['photo'] = $x->getAttribute('src'); |
|
|
|
$largest_photo = (($size == 175) ? 9999 : $size); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return array_merge($ret, $noscrapedata); |
|
|
|
}} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(! function_exists('validate_dfrn')) { |
|
|
|
function validate_dfrn($a) { |
|
|
|
$errors = 0; |
|
|
|
if(! x($a,'key')) |
|
|
|
$errors ++; |
|
|
|
if(! x($a,'dfrn-request')) |
|
|
|
$errors ++; |
|
|
|
if(! x($a,'dfrn-confirm')) |
|
|
|
$errors ++; |
|
|
|
if(! x($a,'dfrn-notify')) |
|
|
|
$errors ++; |
|
|
|
if(! x($a,'dfrn-poll')) |
|
|
|
$errors ++; |
|
|
|
return $errors; |
|
|
|
}} |
|
|
|
|
|
|
|
/** |
|
|
|
* |
|
|
|
* Probe a network address to discover what kind of protocols we need to communicate with it. |
|
|
@ -157,7 +19,6 @@ function validate_dfrn($a) { |
|
|
|
* |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
define('PROBE_NORMAL', 0); |
|
|
|
define('PROBE_DIASPORA', 1); |
|
|
|
|
|
|
@ -172,57 +33,3 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { |
|
|
|
|
|
|
|
return $data; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* @brief Find the matching part between two url |
|
|
|
* |
|
|
|
* @param string $url1 |
|
|
|
* @param string $url2 |
|
|
|
* @return string The matching part |
|
|
|
*/ |
|
|
|
function matching_url($url1, $url2) { |
|
|
|
|
|
|
|
if (($url1 == "") OR ($url2 == "")) |
|
|
|
return ""; |
|
|
|
|
|
|
|
$url1 = normalise_link($url1); |
|
|
|
$url2 = normalise_link($url2); |
|
|
|
|
|
|
|
$parts1 = parse_url($url1); |
|
|
|
$parts2 = parse_url($url2); |
|
|
|
|
|
|
|
if (!isset($parts1["host"]) OR !isset($parts2["host"])) |
|
|
|
return ""; |
|
|
|
|
|
|
|
if ($parts1["scheme"] != $parts2["scheme"]) |
|
|
|
return ""; |
|
|
|
|
|
|
|
if ($parts1["host"] != $parts2["host"]) |
|
|
|
return ""; |
|
|
|
|
|
|
|
if ($parts1["port"] != $parts2["port"]) |
|
|
|
return ""; |
|
|
|
|
|
|
|
$match = $parts1["scheme"]."://".$parts1["host"]; |
|
|
|
|
|
|
|
if ($parts1["port"]) |
|
|
|
$match .= ":".$parts1["port"]; |
|
|
|
|
|
|
|
$pathparts1 = explode("/", $parts1["path"]); |
|
|
|
$pathparts2 = explode("/", $parts2["path"]); |
|
|
|
|
|
|
|
$i = 0; |
|
|
|
$path = ""; |
|
|
|
do { |
|
|
|
$path1 = $pathparts1[$i]; |
|
|
|
$path2 = $pathparts2[$i]; |
|
|
|
|
|
|
|
if ($path1 == $path2) |
|
|
|
$path .= $path1."/"; |
|
|
|
|
|
|
|
} while (($path1 == $path2) AND ($i++ <= count($pathparts1))); |
|
|
|
|
|
|
|
$match .= $path; |
|
|
|
|
|
|
|
return normalise_link($match); |
|
|
|
} |