Reduced network request when probing

This commit is contained in:
Michael 2020-06-14 13:37:28 +00:00
parent 9b85d0b16e
commit b948887221
2 changed files with 201 additions and 158 deletions

View file

@ -47,6 +47,8 @@ use Friendica\Util\XML;
*/ */
class Probe class Probe
{ {
const WEBFINGER = '/.well-known/webfinger?resource={uri}';
private static $baseurl; private static $baseurl;
private static $istimeout; private static $istimeout;
@ -207,7 +209,7 @@ class Probe
return []; return [];
} }
$lrdd = ['application/jrd+json' => $host_url . '/.well-known/webfinger?resource={uri}']; $lrdd = [];
foreach ($links["xrd"]["link"] as $value => $link) { foreach ($links["xrd"]["link"] as $value => $link) {
if (!empty($link["@attributes"])) { if (!empty($link["@attributes"])) {
@ -285,65 +287,11 @@ class Probe
*/ */
public static function lrdd(string $uri) public static function lrdd(string $uri)
{ {
$lrdd = self::hostMeta($uri); $data = self::getWebfingerArray($uri);
$webfinger = null; if (empty($data)) {
if (is_bool($lrdd)) {
return []; return [];
} }
$webfinger = $data['webfinger'];
if (!$lrdd) {
$parts = @parse_url($uri);
if (!$parts || empty($parts["host"]) || empty($parts["path"])) {
return [];
}
$host = $parts['scheme'] . '://' . $parts["host"];
if (!empty($parts["port"])) {
$host .= ':'.$parts["port"];
}
$path_parts = explode("/", trim($parts["path"], "/"));
$nick = array_pop($path_parts);
do {
$lrdd = self::hostMeta($host);
$host .= "/".array_shift($path_parts);
} while (!$lrdd && (sizeof($path_parts) > 0));
}
if (!$lrdd) {
Logger::log("No lrdd data found for ".$uri, Logger::DEBUG);
return [];
}
foreach ($lrdd as $type => $template) {
if ($webfinger) {
continue;
}
$path = str_replace('{uri}', urlencode($uri), $template);
$webfinger = self::webfinger($path, $type);
if (!$webfinger && (strstr($uri, "@"))) {
$path = str_replace('{uri}', urlencode("acct:".$uri), $template);
$webfinger = self::webfinger($path, $type);
}
// Special treatment for Mastodon
// Problem is that Mastodon uses an URL format like http://domain.tld/@nick
// But the webfinger for this format fails.
if (!$webfinger && !empty($nick)) {
// Mastodon uses a "@" as prefix for usernames in their url format
$nick = ltrim($nick, '@');
$addr = $nick."@".$host;
$path = str_replace('{uri}', urlencode("acct:".$addr), $template);
$webfinger = self::webfinger($path, $type);
}
}
if (empty($webfinger["links"])) { if (empty($webfinger["links"])) {
Logger::log("No webfinger links found for ".$uri, Logger::DEBUG); Logger::log("No webfinger links found for ".$uri, Logger::DEBUG);
@ -381,8 +329,9 @@ class Probe
*/ */
public static function uri($uri, $network = '', $uid = -1, $cache = true) public static function uri($uri, $network = '', $uid = -1, $cache = true)
{ {
$cachekey = 'Probe::uri:' . $network . ':' . $uri;
if ($cache) { if ($cache) {
$result = DI::cache()->get('Probe::uri:' . $network . ':' . $uri); $result = DI::cache()->get($cachekey);
if (!is_null($result)) { if (!is_null($result)) {
return $result; return $result;
} }
@ -396,13 +345,16 @@ class Probe
if ($network != Protocol::ACTIVITYPUB) { if ($network != Protocol::ACTIVITYPUB) {
$data = self::detect($uri, $network, $uid); $data = self::detect($uri, $network, $uid);
if (!is_array($data)) {
$data = [];
}
} else { } else {
$data = null; $data = [];
} }
// When the previous detection process had got a time out // When the previous detection process had got a time out
// we could falsely detect a Friendica profile as AP profile. // we could falsely detect a Friendica profile as AP profile.
if (!self::$istimeout) { if (!self::$istimeout && (empty($network) || $network == Protocol::ACTIVITYPUB)) {
$ap_profile = ActivityPub::probeProfile($uri, !$cache); $ap_profile = ActivityPub::probeProfile($uri, !$cache);
if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) { if (empty($data) || (!empty($ap_profile) && empty($network) && (($data['network'] ?? '') != Protocol::DFRN))) {
@ -411,8 +363,6 @@ class Probe
$ap_profile['batch'] = ''; $ap_profile['batch'] = '';
$data = array_merge($ap_profile, $data); $data = array_merge($ap_profile, $data);
} }
} else {
Logger::notice('Time out detected. AP will not be probed.', ['uri' => $uri]);
} }
if (!isset($data['url'])) { if (!isset($data['url'])) {
@ -441,10 +391,6 @@ class Probe
} }
} }
if (empty($data['baseurl']) && !empty(self::$baseurl)) {
$data['baseurl'] = self::$baseurl;
}
if (!empty($data['baseurl']) && empty($data['gsid'])) { if (!empty($data['baseurl']) && empty($data['gsid'])) {
$data['gsid'] = GServer::getID($data['baseurl']); $data['gsid'] = GServer::getID($data['baseurl']);
} }
@ -466,7 +412,7 @@ class Probe
// Only store into the cache if the value seems to be valid // Only store into the cache if the value seems to be valid
if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) { if (!in_array($data['network'], [Protocol::PHANTOM, Protocol::MAIL])) {
DI::cache()->set('Probe::uri:' . $network . ':' . $uri, $data, Duration::DAY); DI::cache()->set($cachekey, $data, Duration::DAY);
} }
return $data; return $data;
@ -558,6 +504,160 @@ class Probe
return $result; return $result;
} }
/**
* Get webfinger data from a given URI
*
* @param string $uri
* @return array Webfinger array
*/
private static function getWebfingerArray(string $uri)
{
$parts = parse_url($uri);
if (!empty($parts['scheme']) && !empty($parts['host'])) {
$host = $parts['host'];
if (!empty($parts['port'])) {
$host .= ':'.$parts['port'];
}
$baseurl = $parts['scheme'] . '://' . $host;
$nick = '';
$addr = '';
$path_parts = explode("/", trim($parts['path'] ?? '', "/"));
if (!empty($path_parts)) {
$nick = ltrim(end($path_parts), '@');
// When the last part of the URI is numeric then it is most likely an ID and not a nick name
if (!is_numeric($nick)) {
$addr = $nick."@".$host;
} else {
$nick = '';
}
}
$webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
if (empty($webfinger)) {
$lrdd = self::hostMeta($host);
}
if (empty($webfinger) && empty($lrdd)) {
while (empty($lrdd) && empty($webfinger) && (sizeof($path_parts) > 1)) {
$host .= "/".array_shift($path_parts);
$baseurl = $parts['scheme'] . '://' . $host;
if (!empty($nick)) {
$addr = $nick."@".$host;
}
$webfinger = self::getWebfinger($parts['scheme'] . '://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
if (empty($webfinger)) {
$lrdd = self::hostMeta($host);
}
}
if (empty($lrdd) && empty($webfinger)) {
return [];
}
}
} elseif (strstr($uri, '@')) {
// Remove "acct:" from the URI
$uri = str_replace('acct:', '', $uri);
$host = substr($uri, strpos($uri, '@') + 1);
$nick = substr($uri, 0, strpos($uri, '@'));
$addr = $uri;
$webfinger = self::getWebfinger('https://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
if (self::$istimeout) {
return [];
}
if (empty($webfinger)) {
$webfinger = self::getWebfinger('http://' . $host . self::WEBFINGER, 'application/jrd+json', $uri, $addr);
if (self::$istimeout) {
return [];
}
} else {
$baseurl = 'https://' . $host;
}
if (empty($webfinger)) {
$lrdd = self::hostMeta($host);
if (self::$istimeout) {
return [];
}
$baseurl = self::$baseurl;
} else {
$baseurl = 'http://' . $host;
}
} else {
Logger::info('URI was not detectable', ['uri' => $uri]);
return [];
}
if (empty($webfinger)) {
foreach ($lrdd as $type => $template) {
if ($webfinger) {
continue;
}
$webfinger = self::getWebfinger($template, $type, $uri, $addr);
}
}
if (empty($webfinger)) {
return [];
}
if ($webfinger['detected'] == $addr) {
$webfinger['nick'] = $nick;
$webfinger['addr'] = $addr;
}
$webfinger['baseurl'] = $baseurl;
return $webfinger;
}
/**
* Perform network request for webfinger data
*
* @param string $template
* @param string $type
* @param string $uri
* @param string $addr
* @return array webfinger results
*/
private static function getWebfinger(string $template, string $type, string $uri, string $addr)
{
// First try the address because this is the primary purpose of webfinger
if (!empty($addr)) {
$detected = $addr;
$path = str_replace('{uri}', urlencode("acct:" . $addr), $template);
$webfinger = self::webfinger($path, $type);
if (self::$istimeout) {
return [];
}
}
// Then try the URI
if (empty($webfinger) && $uri != $addr) {
$detected = $uri;
$path = str_replace('{uri}', urlencode($uri), $template);
$webfinger = self::webfinger($path, $type);
if (self::$istimeout) {
return [];
}
}
if (empty($webfinger)) {
return [];
}
return ['webfinger' => $webfinger, 'detected' => $detected];
}
/** /**
* Fetch information (protocol endpoints and user information) about a given uri * Fetch information (protocol endpoints and user information) about a given uri
* *
@ -572,8 +672,6 @@ class Probe
*/ */
private static function detect($uri, $network, $uid) private static function detect($uri, $network, $uid)
{ {
$parts = parse_url($uri);
$hookData = [ $hookData = [
'uri' => $uri, 'uri' => $uri,
'network' => $network, 'network' => $network,
@ -584,40 +682,19 @@ class Probe
Hook::callAll('probe_detect', $hookData); Hook::callAll('probe_detect', $hookData);
if ($hookData['result']) { if ($hookData['result']) {
return $hookData['result']; if (!is_array($hookData['result'])) {
return [];
} else {
return $hookData['result'];
}
} }
if (!empty($parts["scheme"]) && !empty($parts["host"])) { $parts = parse_url($uri);
$host = $parts["host"];
if (!empty($parts["port"])) {
$host .= ':'.$parts["port"];
}
if ($host == 'twitter.com') { if (!empty($parts['scheme']) && !empty($parts['host'])) {
if ($parts['host'] == 'twitter.com') {
return self::twitter($uri); return self::twitter($uri);
} }
$lrdd = self::hostMeta($host);
if (is_bool($lrdd)) {
return [];
}
$path_parts = explode("/", trim($parts['path'] ?? '', "/"));
while (!$lrdd && (sizeof($path_parts) > 1)) {
$host .= "/".array_shift($path_parts);
$lrdd = self::hostMeta($host);
}
if (!$lrdd) {
Logger::log('No XRD data was found for '.$uri, Logger::DEBUG);
return self::feed($uri);
}
$nick = array_pop($path_parts);
// Mastodon uses a "@" as prefix for usernames in their url format
$nick = ltrim($nick, '@');
$addr = $nick."@".$host;
} elseif (strstr($uri, '@')) { } elseif (strstr($uri, '@')) {
// If the URI starts with "mailto:" then jump directly to the mail detection // If the URI starts with "mailto:" then jump directly to the mail detection
if (strpos($uri, 'mailto:') !== false) { if (strpos($uri, 'mailto:') !== false) {
@ -628,73 +705,35 @@ class Probe
if ($network == Protocol::MAIL) { if ($network == Protocol::MAIL) {
return self::mail($uri, $uid); return self::mail($uri, $uid);
} }
// Remove "acct:" from the URI
$uri = str_replace('acct:', '', $uri);
$host = substr($uri, strpos($uri, '@') + 1);
$nick = substr($uri, 0, strpos($uri, '@'));
if (strpos($uri, '@twitter.com')) { if (strpos($uri, '@twitter.com')) {
return self::twitter($uri); return self::twitter($uri);
} }
$lrdd = self::hostMeta($host);
if (is_bool($lrdd)) {
return [];
}
if (!$lrdd) {
Logger::log('No XRD data was found for '.$uri, Logger::DEBUG);
return self::mail($uri, $uid);
}
$addr = $uri;
} else { } else {
Logger::log("Uri ".$uri." was not detectable", Logger::DEBUG); Logger::info('URI was not detectable', ['uri' => $uri]);
return []; return [];
} }
$webfinger = false; Logger::info('Probing start', ['uri' => $uri]);
/// @todo Do we need the prefix "acct:" or "acct://"? $data = self::getWebfingerArray($uri);
if (empty($data)) {
foreach ($lrdd as $type => $template) { if (!empty($parts['scheme'])) {
if ($webfinger) { return self::feed($uri);
continue; } elseif (!empty($uid)) {
} return self::mail($uri, $uid);
} else {
// Try the URI first return [];
if ($uri != $addr) {
$path = str_replace('{uri}', urlencode($uri), $template);
$webfinger = self::webfinger($path, $type);
}
// Then try the address
if (!$webfinger) {
$path = str_replace('{uri}', urlencode("acct:" . $addr), $template);
$webfinger = self::webfinger($path, $type);
}
// Finally try without the "acct"
if (!$webfinger) {
$path = str_replace('{uri}', urlencode($addr), $template);
$webfinger = self::webfinger($path, $type);
}
// We cannot be sure that the detected address was correct, so we don't use the values
if ($webfinger && ($uri != $addr)) {
$nick = "";
$addr = "";
} }
} }
if (!$webfinger) { $webfinger = $data['webfinger'];
return self::feed($uri); $nick = $data['nick'] ?? '';
} $addr = $data['addr'] ?? '';
$baseurl = $data['baseurl'] ?? '';
$result = []; $result = [];
Logger::info("Probing", ['uri' => $uri]);
if (in_array($network, ["", Protocol::DFRN])) { if (in_array($network, ["", Protocol::DFRN])) {
$result = self::dfrn($webfinger); $result = self::dfrn($webfinger);
} }
@ -705,7 +744,7 @@ class Probe
$result = self::ostatus($webfinger); $result = self::ostatus($webfinger);
} }
if (in_array($network, ['', Protocol::ZOT])) { if (in_array($network, ['', Protocol::ZOT])) {
$result = self::zot($webfinger, $result); $result = self::zot($webfinger, $result, $baseurl);
} }
if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) { if ((!$result && ($network == "")) || ($network == Protocol::PUMPIO)) {
$result = self::pumpio($webfinger, $addr); $result = self::pumpio($webfinger, $addr);
@ -730,11 +769,15 @@ class Probe
$result["network"] = Protocol::PHANTOM; $result["network"] = Protocol::PHANTOM;
} }
if (empty($result['baseurl']) && !empty($baseurl)) {
$result['baseurl'] = $baseurl;
}
if (empty($result["url"])) { if (empty($result["url"])) {
$result["url"] = $uri; $result["url"] = $uri;
} }
Logger::log($uri." is ".$result["network"], Logger::DEBUG); Logger::info('Probing done', ['uri' => $uri, 'network' => $result["network"]]);
return $result; return $result;
} }
@ -748,7 +791,7 @@ class Probe
* @return array Zot data * @return array Zot data
* @throws HTTPException\InternalServerErrorException * @throws HTTPException\InternalServerErrorException
*/ */
private static function zot($webfinger, $data) private static function zot($webfinger, $data, $baseurl)
{ {
if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) { if (!empty($webfinger["aliases"]) && is_array($webfinger["aliases"])) {
foreach ($webfinger["aliases"] as $alias) { foreach ($webfinger["aliases"] as $alias) {
@ -769,12 +812,12 @@ class Probe
} }
} }
if (empty($zot_url) && !empty($data['addr']) && !empty(self::$baseurl)) { if (empty($zot_url) && !empty($data['addr']) && !empty($baseurl)) {
$condition = ['nurl' => Strings::normaliseLink(self::$baseurl), 'platform' => ['hubzilla']]; $condition = ['nurl' => Strings::normaliseLink($baseurl), 'platform' => ['hubzilla']];
if (!DBA::exists('gserver', $condition)) { if (!DBA::exists('gserver', $condition)) {
return $data; return $data;
} }
$zot_url = self::$baseurl . '/.well-known/zot-info?address=' . $data['addr']; $zot_url = $baseurl . '/.well-known/zot-info?address=' . $data['addr'];
} }
if (empty($zot_url)) { if (empty($zot_url)) {

View file

@ -145,7 +145,7 @@ class ActivityPub
{ {
$apcontact = APContact::getByURL($url, $update); $apcontact = APContact::getByURL($url, $update);
if (empty($apcontact)) { if (empty($apcontact)) {
return false; return [];
} }
$profile = ['network' => Protocol::ACTIVITYPUB]; $profile = ['network' => Protocol::ACTIVITYPUB];