From 99f94edc9cc0b7d124f68eadba99721545e7e555 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 3 Jul 2016 22:27:16 +0200 Subject: [PATCH 01/19] New class to probe urls --- include/Probe.php | 658 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 658 insertions(+) create mode 100644 include/Probe.php diff --git a/include/Probe.php b/include/Probe.php new file mode 100644 index 000000000..9a2303bfd --- /dev/null +++ b/include/Probe.php @@ -0,0 +1,658 @@ + Link to LRDD endpoint + * 'lrdd-xml' => Link to LRDD endpoint in XML format + * 'lrdd-json' => Link to LRDD endpoint in JSON format + */ + private function xrd($host) { + + $ssl_url = "https://".$host."/.well-known/host-meta"; + $url = "http://".$host."/.well-known/host-meta"; + + $xrd_timeout = Config::get('system','xrd_timeout', 20); + $redirects = 0; + + $xml = fetch_url($ssl_url, false, $redirects, $xrd_timeout, "application/xrd+xml"); + $xrd = parse_xml_string($xml, false); + + if (!is_object($xrd)) { + $xml = fetch_url($url, false, $redirects, $xrd_timeout, "application/xrd+xml"); + $xrd = parse_xml_string($xml, false); + } + if (!is_object($xrd)) + return false; + + $links = xml::element_to_array($xrd); + if (!isset($links["xrd"]["link"])) + return false; + + $xrd_data = array(); + foreach ($links["xrd"]["link"] AS $value => $link) { + if (isset($link["@attributes"])) + $attributes = $link["@attributes"]; + elseif ($value == "@attributes") + $attributes = $link; + else + continue; + + if (($attributes["rel"] == "lrdd") AND + ($attributes["type"] == "application/xrd+xml")) + $xrd_data["lrdd-xml"] = $attributes["template"]; + elseif (($attributes["rel"] == "lrdd") AND + ($attributes["type"] == "application/json")) + $xrd_data["lrdd-json"] = $attributes["template"]; + elseif ($attributes["rel"] == "lrdd") + $xrd_data["lrdd"] = $attributes["template"]; + } + return $xrd_data; + } + + public static function uri($uri) { + $data = self::detect($uri); + + //if (!data) + // return false; + + if (!isset($data["url"])) + $data["url"] = $uri; + + if ($data["photo"] != "") + $data["baseurl"] = matching_url(normalise_link($data["baseurl"]), normalise_link($data["photo"])); + else + $data["photo"] = App::get_baseurl().'/images/person-175.jpg'; + + if (!isset($data["name"])) + $data["name"] = $data["url"]; + + if (!isset($data["nick"])) + $data["nick"] = strtolower($data["name"]); + + if (!isset($data["network"])) + $data["network"] = NETWORK_PHANTOM; + + $data = self::rearrange_data($data); + + return $data; + } + + private function detect($uri) { + if (strstr($uri, '@')) { + // If the URI starts with "mailto:" then jum directly to the mail detection + if (strpos($url,'mailto:') !== false) { + $uri = str_replace('mailto:', '', $url); + return self::mail($uri); + } + + // Remove "acct:" from the URI + $uri = str_replace('acct:', '', $uri); + + $host = substr($uri,strpos($uri, '@') + 1); + $nick = substr($uri,0, strpos($uri, '@')); + + $lrdd = self::xrd($host); + if (!$lrdd) + return self::mail($uri); + + $addr = $uri; + } else { + $parts = parse_url($uri); + if (!isset($parts["scheme"]) OR + !isset($parts["host"]) OR + !isset($parts["path"])) + return false; + + // todo: Ports? + $host = $parts["host"]; + $lrdd = self::xrd($host); + + $path_parts = explode("/", trim($parts["path"], "/")); + + while (!$lrdd AND (sizeof($path_parts) > 1)) { + $host .= "/".array_shift($path_parts); + $lrdd = self::xrd($host); + } + if (!$lrdd) + return self::feed($uri); + + $nick = array_pop($path_parts); + $addr = $nick."@".$host; + } + + $webfinger = false; + + /// @todo Do we need the prefix "acct:" or "acct://"? + + foreach ($lrdd AS $key => $link) { + if ($webfinger) + continue; + + if (!in_array($key, array("lrdd", "lrdd-xml", "lrdd-json"))) + continue; + + $path = str_replace('{uri}', urlencode($addr), $link); + + $webfinger = self::webfinger($path); + } + if (!$webfinger) + return self::feed($uri); + + $result = false; + + if (!$result) + $result = self::dfrn($webfinger); + if (!$result) + $result = self::diaspora($webfinger); + if (!$result) + $result = self::ostatus($webfinger); + if (!$result) + $result = self::pumpio($webfinger); + if (!$result) + $result = self::feed($uri); + else { + // We overwrite the detected nick with our try if the previois routines hadn't detected it. + // Additionally it is overwritten when the nickname doesn't make sense (contains spaces). + if (!isset($result["nick"]) OR ($result["nick"] == "") OR (strstr($result["nick"], " "))) + $result["nick"] = $nick; + + if (!isset($result["addr"]) OR ($result["addr"] == "")) + $result["addr"] = $addr; + } + + if (!isset($result["baseurl"]) OR ($result["baseurl"] == "")) { + $pos = strpos($result["url"], $host); + if ($pos) + $result["baseurl"] = substr($result["url"], 0, $pos).$host; + } + + return $result; + } + + private function webfinger($url) { + + $xrd_timeout = Config::get('system','xrd_timeout', 20); + $redirects = 0; + + $data = fetch_url($url, false, $redirects, $xrd_timeout, "application/xrd+xml"); + $xrd = parse_xml_string($data, false); + + if (!is_object($xrd)) { + // If it is not XML, maybe it is JSON + $webfinger = json_decode($data, true); + + if (!isset($webfinger["links"])) + return false; + + return $webfinger; + } + + $xrd_arr = xml::element_to_array($xrd); + if (!isset($xrd_arr["xrd"]["link"])) + return false; + + $webfinger = array(); + + if (isset($xrd_arr["xrd"]["subject"])) + $webfinger["subject"] = $xrd_arr["xrd"]["subject"]; + + if (isset($xrd_arr["xrd"]["alias"])) + $webfinger["aliases"] = $xrd_arr["xrd"]["alias"]; + + $webfinger["links"] = array(); + + foreach ($xrd_arr["xrd"]["link"] AS $value => $data) { + if (isset($data["@attributes"])) + $attributes = $data["@attributes"]; + elseif ($value == "@attributes") + $attributes = $data; + else + continue; + + $webfinger["links"][] = $attributes; + } + return $webfinger; + } + + private function dfrn($webfinger) { + + $hcard = ""; + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == NAMESPACE_DFRN) AND ($link["href"] != "")) + $data["network"] = NETWORK_DFRN; + elseif (($link["rel"] == NAMESPACE_FEED) AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == "http://microformats.org/profile/hcard") AND ($link["href"] != "")) + $hcard = $link["href"]; + elseif (($link["rel"] == NAMESPACE_POCO) AND ($link["href"] != "")) + $data["poco"] = $link["href"]; + elseif (($link["rel"] == "http://webfinger.net/rel/avatar") AND ($link["href"] != "")) + $data["photo"] = $link["href"]; + + elseif (($link["rel"] == "http://joindiaspora.com/seed_location") AND ($link["href"] != "")) + $data["baseurl"] = trim($link["href"], '/'); + elseif (($link["rel"] == "http://joindiaspora.com/guid") AND ($link["href"] != "")) + $data["guid"] = $link["href"]; + elseif (($link["rel"] == "diaspora-public-key") AND ($link["href"] != "")) { + $data["pubkey"] = base64_decode($link["href"]); + + if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + $data["pubkey"] = rsatopem($data["pubkey"]); + } + } + + if (!isset($data["network"]) OR ($hcard == "")) + return false; + + $data = self::poll_hcard($hcard, $data, true); + + return $data; + } + + private function poll_hcard($hcard, $data, $dfrn = false) { + + $doc = new DOMDocument(); + if (!@$doc->loadHTMLFile($hcard)) + return false; + + $xpath = new DomXPath($doc); + + $vcards = $xpath->query("//div[contains(concat(' ', @class, ' '), ' vcard ')]"); + if (!is_object($vcards)) + return false; + + if ($vcards->length == 0) + return false; + + $vcard = $vcards->item(0); + + // We have to discard the guid from the hcard in favour of the guid from lrdd + // Reason: Hubzilla doesn't use the value "uid" in the hcard like Diaspora does. + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' uid ')]", $vcard); // */ + if (($search->length > 0) AND ($data["guid"] == "")) + $data["guid"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' nickname ')]", $vcard); // */ + if ($search->length > 0) + $data["nick"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' fn ')]", $vcard); // */ + if ($search->length > 0) + $data["name"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' searchable ')]", $vcard); // */ + if ($search->length > 0) + $data["searchable"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' key ')]", $vcard); // */ + if ($search->length > 0) { + $data["pubkey"] = $search->item(0)->nodeValue; + if (strstr($data["pubkey"], 'RSA ')) + $data["pubkey"] = rsatopem($data["pubkey"]); + } + + $search = $xpath->query("//*[@id='pod_location']", $vcard); // */ + if ($search->length > 0) + $data["baseurl"] = trim($search->item(0)->nodeValue, "/"); + + $avatar = array(); + $photos = $xpath->query("//*[contains(concat(' ', @class, ' '), ' photo ') or contains(concat(' ', @class, ' '), ' avatar ')]", $vcard); // */ + foreach ($photos AS $photo) { + $attr = array(); + foreach ($photo->attributes as $attribute) + $attr[$attribute->name] = trim($attribute->value); + + if (isset($attr["src"]) AND isset($attr["width"])) + $avatar[$attr["width"]] = $attr["src"]; + } + + if (sizeof($avatar)) { + ksort($avatar); + $data["photo"] = array_pop($avatar); + } + + if ($dfrn) { + // Poll DFRN specific data + $search = $xpath->query("//link[contains(concat(' ', @rel), ' dfrn-')]"); + if ($search->length > 0) { + foreach ($search AS $link) { + //$data["request"] = $search->item(0)->nodeValue; + $attr = array(); + foreach ($link->attributes as $attribute) + $attr[$attribute->name] = trim($attribute->value); + + $data[substr($attr["rel"], 5)] = $attr["href"]; + } + } + + // Older Friendica versions had used the "uid" field differently than newer versions + if ($data["nick"] == $data["guid"]) + unset($data["guid"]); + } + + + return $data; + } + + private function diaspora($webfinger) { + + $hcard = ""; + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == "http://microformats.org/profile/hcard") AND ($link["href"] != "")) + $hcard = $link["href"]; + elseif (($link["rel"] == "http://joindiaspora.com/seed_location") AND ($link["href"] != "")) + $data["baseurl"] = trim($link["href"], '/'); + elseif (($link["rel"] == "http://joindiaspora.com/guid") AND ($link["href"] != "")) + $data["guid"] = $link["href"]; + elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == NAMESPACE_FEED) AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == NAMESPACE_POCO) AND ($link["href"] != "")) + $data["poco"] = $link["href"]; + elseif (($link["rel"] == "salmon") AND ($link["href"] != "")) + $data["notify"] = $link["href"]; + elseif (($link["rel"] == "diaspora-public-key") AND ($link["href"] != "")) { + $data["pubkey"] = base64_decode($link["href"]); + + if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + $data["pubkey"] = rsatopem($data["pubkey"]); + } + } + + if (!isset($data["url"]) OR ($hcard == "")) + return false; + + if (isset($webfinger["aliases"])) + foreach ($webfinger["aliases"] AS $alias) + if (normalise_link($alias) != normalise_link($data["url"]) AND !strstr($alias, "@")) + $data["alias"] = $alias; + + // Fetch further information from the hcard + $data = self::poll_hcard($hcard, $data); + + if (!$data) + return false; + + if (isset($data["url"]) AND isset($data["guid"]) AND isset($data["baseurl"]) AND + isset($data["pubkey"]) AND ($hcard != "")) { + $data["network"] = NETWORK_DIASPORA; + + // We have to overwrite the detected value for "notify" since Hubzilla doesn't send it + $data["notify"] = $data["baseurl"]."/receive/users/".$data["guid"]; + $data["batch"] = $data["baseurl"]."/receive/public"; + } else + return false; + + return $data; + } + + private function ostatus($webfinger) { + + $pubkey = ""; + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == "salmon") AND ($link["href"] != "")) + $data["notify"] = $link["href"]; + elseif (($link["rel"] == NAMESPACE_FEED) AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == "magic-public-key") AND ($link["href"] != "")) { + $pubkey = $link["href"]; + + if (substr($pubkey, 0, 5) === 'data:') { + if (strstr($pubkey, ',')) + $pubkey = substr($pubkey, strpos($pubkey, ',') + 1); + else + $pubkey = substr($pubkey, 5); + } else + $pubkey = fetch_url($pubkey); + + $key = explode(".", $pubkey); + + if (sizeof($key) >= 3) { + $m = base64url_decode($key[1]); + $e = base64url_decode($key[2]); + $data["pubkey"] = metopem($m,$e); + } + + } + } + + if (isset($data["notify"]) AND isset($data["pubkey"]) AND + isset($data["poll"]) AND isset($data["url"])) { + $data["network"] = NETWORK_OSTATUS; + } else + return false; + + // Fetch all additional data from the feed + $feed = fetch_url($data["poll"]); + $feed_data = feed_import($feed,$dummy1,$dummy2, $dummy3, true); + if (!$feed_data) + return false; + + if ($feed_data["header"]["author-name"] != "") + $data["name"] = $feed_data["header"]["author-name"]; + + if ($feed_data["header"]["author-nick"] != "") + $data["nick"] = $feed_data["header"]["author-nick"]; + + if ($feed_data["header"]["author-avatar"] != "") + $data["photo"] = $feed_data["header"]["author-avatar"]; + + if ($feed_data["header"]["author-id"] != "") + $data["alias"] = $feed_data["header"]["author-id"]; + + // OStatus has serious issues when the the url doesn't fit (ssl vs. non ssl) + // So we take the value that we just fetched, although the other one worked as well + if ($feed_data["header"]["author-link"] != "") + $data["url"] = $feed_data["header"]["author-link"]; + + /// @todo Fetch location and "about" from the feed as well + return $data; + } + + private function pumpio_profile_data($profile) { + + $doc = new DOMDocument(); + if (!@$doc->loadHTMLFile($profile)) + return false; + + $xpath = new DomXPath($doc); + + $data = array(); + + // This is ugly - but pump.io doesn't seem to know a better way for it + $data["name"] = trim($xpath->query("//h1[@class='media-header']")->item(0)->nodeValue); + $pos = strpos($data["name"], chr(10)); + if ($pos) + $data["name"] = trim(substr($data["name"], 0, $pos)); + + $avatar = $xpath->query("//img[@class='img-rounded media-object']")->item(0); + if ($avatar) + foreach ($avatar->attributes as $attribute) + if ($attribute->name == "src") + $data["photo"] = trim($attribute->value); + + $data["location"] = $xpath->query("//p[@class='location']")->item(0)->nodeValue; + $data["about"] = $xpath->query("//p[@class='summary']")->item(0)->nodeValue; + + return $data; + } + + private function pumpio($webfinger) { + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == "activity-inbox") AND ($link["href"] != "")) + $data["activity-inbox"] = $link["href"]; + elseif (($link["rel"] == "activity-outbox") AND ($link["href"] != "")) + $data["activity-outbox"] = $link["href"]; + elseif (($link["rel"] == "dialback") AND ($link["href"] != "")) + $data["dialback"] = $link["href"]; + } + if (isset($data["activity-inbox"]) AND isset($data["activity-outbox"]) AND + isset($data["dialback"]) AND isset($data["url"])) { + + // by now we use these fields only for the network type detection + // So we unset all data that isn't used at the moment + unset($data["activity-inbox"]); + unset($data["activity-outbox"]); + unset($data["dialback"]); + + $data["network"] = NETWORK_PUMPIO; + } else + return false; + + $profile_data = self::pumpio_profile_data($data["url"]); + + if (!$profile_data) + return false; + + $data = array_merge($data, $profile_data); + + return $data; + } + + private function feed($url) { + $feed = fetch_url($url); + $feed_data = feed_import($feed, $dummy1, $dummy2, $dummy3, true); + + if (!$feed_data) + return false; + + if ($feed_data["header"]["author-name"] != "") + $data["name"] = $feed_data["header"]["author-name"]; + + if ($feed_data["header"]["author-nick"] != "") + $data["nick"] = $feed_data["header"]["author-nick"]; + + if ($feed_data["header"]["author-avatar"] != "") + $data["photo"] = $feed_data["header"]["author-avatar"]; + + if ($feed_data["header"]["author-id"] != "") + $data["alias"] = $feed_data["header"]["author-id"]; + + $data["url"] = $url; + $data["poll"] = $url; + + if ($feed_data["header"]["author-link"] != "") + $data["baseurl"] = $feed_data["header"]["author-link"]; + else + $data["baseurl"] = $data["url"]; + + $data["network"] = NETWORK_FEED; + + return $data; + } + + private function mail($uri) { + + if (!validate_email($uri)) + return false; + + $uid = local_user(); + $uid = 1; + + $x = q("SELECT `prvkey` FROM `user` WHERE `uid` = %d LIMIT 1", intval($uid)); + + $r = q("SELECT * FROM `mailacct` WHERE `uid` = %d AND `server` != '' LIMIT 1", intval($uid)); + + if(count($x) && count($r)) { + $mailbox = construct_mailbox_name($r[0]); + $password = ''; + openssl_private_decrypt(hex2bin($r[0]['pass']), $password,$x[0]['prvkey']); + $mbox = email_connect($mailbox,$r[0]['user'], $password); + if(!mbox) + return false; + } + + $msgs = email_poll($mbox, $uri); + logger('searching '.$uri.', '.count($msgs).' messages found.', LOGGER_DEBUG); + + if (!count($msgs)) + return false; + + $data = array(); + + $data["addr"] = $uri; + $data["network"] = NETWORK_MAIL; + $data["name"] = substr($uri, 0, strpos($uri,'@')); + $data["nick"] = $data["name"]; + $data["photo"] = avatar_img($uri); + + $phost = substr($uri, strpos($uri,'@') + 1); + $data["url"] = 'http://'.$phost."/".$data["nick"]; + $data["notify"] = 'smtp '.random_string(); + $data["poll"] = 'email '.random_string(); + + $x = email_msg_meta($mbox, $msgs[0]); + if(stristr($x[0]->from, $uri)) + $adr = imap_rfc822_parse_adrlist($x[0]->from, ''); + elseif(stristr($x[0]->to, $uri)) + $adr = imap_rfc822_parse_adrlist($x[0]->to, ''); + if(isset($adr)) { + foreach($adr as $feadr) { + if((strcasecmp($feadr->mailbox, $data["name"]) == 0) + &&(strcasecmp($feadr->host, $phost) == 0) + && (strlen($feadr->personal))) { + + $personal = imap_mime_header_decode($feadr->personal); + $data["name"] = ""; + foreach($personal as $perspart) + if ($perspart->charset != "default") + $data["name"] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text); + else + $data["name"] .= $perspart->text; + + $data["name"] = notags($data["name"]); + } + } + } + imap_close($mbox); + + return $data; + } +} +?> From b124b9428873c31c31eda4b0001d68342559637e Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 3 Jul 2016 23:11:21 +0200 Subject: [PATCH 02/19] Support for noscrape when probing. --- include/Probe.php | 85 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 9a2303bfd..230ba950d 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -12,9 +12,10 @@ require_once("include/feed.php"); class Probe { private function rearrange_data($data) { - $fields = array("name", "nick", "guid", "url", "addr", "batch", - "notify", "poll", "request", "confirm", "poco", - "photo", "priority", "network", "alias", "pubkey", "baseurl"); + $fields = array("name", "nick", "guid", "url", "addr", "alias", + "photo", "community", "keywords", "location", "about", + "batch", "notify", "poll", "request", "confirm", "poco", + "priority", "network", "pubkey", "baseurl"); $newdata = array(); foreach ($fields AS $field) @@ -80,8 +81,8 @@ class Probe { return $xrd_data; } - public static function uri($uri) { - $data = self::detect($uri); + public static function uri($uri, $network = "") { + $data = self::detect($uri, $network); //if (!data) // return false; @@ -108,7 +109,7 @@ class Probe { return $data; } - private function detect($uri) { + private function detect($uri, $network) { if (strstr($uri, '@')) { // If the URI starts with "mailto:" then jum directly to the mail detection if (strpos($url,'mailto:') !== false) { @@ -116,6 +117,9 @@ class Probe { return self::mail($uri); } + if ($network == NETWORK_MAIL) + return self::mail($uri); + // Remove "acct:" from the URI $uri = str_replace('acct:', '', $uri); @@ -171,15 +175,15 @@ class Probe { $result = false; - if (!$result) + if (in_array($network, array("", NETWORK_DFRN))) $result = self::dfrn($webfinger); - if (!$result) + if ((!$result AND ($network == "")) OR ($network == NETWORK_DIASPORA)) $result = self::diaspora($webfinger); - if (!$result) + if ((!$result AND ($network == "")) OR ($network == NETWORK_OSTATUS)) $result = self::ostatus($webfinger); - if (!$result) + if ((!$result AND ($network == "")) OR ($network == NETWORK_PUMPIO)) $result = self::pumpio($webfinger); - if (!$result) + if ((!$result AND ($network == "")) OR ($network == NETWORK_FEED)) $result = self::feed($uri); else { // We overwrite the detected nick with our try if the previois routines hadn't detected it. @@ -245,6 +249,57 @@ class Probe { return $webfinger; } + private function poll_noscrape($noscrape, $data) { + $content = fetch_url($noscrape); + + $json = json_decode($content, true); + + if (isset($json["fn"])) + $data["name"] = $json["fn"]; + + if (isset($json["addr"])) + $data["addr"] = $json["addr"]; + + if (isset($json["nick"])) + $data["nick"] = $json["nick"]; + + if (isset($json["comm"])) + $data["community"] = $json["comm"]; + + if (isset($json["tags"])) { + $keywords = implode(" ", $json["tags"]); + if ($keywords != "") + $data["keywords"] = $keywords; + } + + $location = formatted_location($json); + if ($location) + $data["location"] = $location; + + if (isset($json["about"])) + $data["about"] = $json["about"]; + + if (isset($json["key"])) + $data["pubkey"] = $json["key"]; + + if (isset($json["photo"])) + $data["photo"] = $json["photo"]; + + if (isset($json["dfrn-request"])) + $data["request"] = $json["dfrn-request"]; + + if (isset($json["dfrn-confirm"])) + $data["confirm"] = $json["dfrn-confirm"]; + + if (isset($json["dfrn-notify"])) + $data["notify"] = $json["dfrn-notify"]; + + if (isset($json["dfrn-poll"])) + $data["poll"] = $json["dfrn-poll"]; + + return $data; + } + private function dfrn($webfinger) { $hcard = ""; @@ -279,6 +334,14 @@ class Probe { if (!isset($data["network"]) OR ($hcard == "")) return false; + // Fetch data via noscrape - this is faster + $noscrape = str_replace("/hcard/", "/noscrape/", $hcard); + $data = self::poll_noscrape($noscrape, $data); + + if (isset($data["notify"]) AND isset($data["confirm"]) AND isset($data["request"]) AND + isset($data["poll"]) AND isset($data["name"]) AND isset($data["photo"])) + return $data; + $data = self::poll_hcard($hcard, $data, true); return $data; From f5593b19812a854b45a32c93a86f6df45a0cd23d Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 4 Jul 2016 00:14:08 +0200 Subject: [PATCH 03/19] Removed old code --- include/Probe.php | 38 ++- include/Scrape.php | 749 +-------------------------------------------- 2 files changed, 44 insertions(+), 743 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 230ba950d..56eccc8c8 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -8,6 +8,7 @@ use \Friendica\Core\Config; use \Friendica\Core\PConfig; require_once("include/feed.php"); +require_once('include/email.php'); class Probe { @@ -82,10 +83,14 @@ class Probe { } public static function uri($uri, $network = "") { - $data = self::detect($uri, $network); - //if (!data) - // return false; + $result = Cache::get("probe_url:".$network.":".$uri); + if (!is_null($result)) { + $result = unserialize($result); + return $result; + } + + $data = self::detect($uri, $network); if (!isset($data["url"])) $data["url"] = $uri; @@ -106,6 +111,29 @@ class Probe { $data = self::rearrange_data($data); + // Only store into the cache if the value seems to be valid + if ($data['network'] != NETWORK_PHANTOM) { + Cache::set("probe_url:".$network.":".$uri,serialize($data), CACHE_DAY); + + /// @todo temporary fix - we need a real contact update function that updates only changing fields + /// The biggest problem is the avatar picture that could have a reduced image size. + /// It should only be updated if the existing picture isn't existing anymore. + if (($data['network'] != NETWORK_FEED) AND ($mode == PROBE_NORMAL) AND + $data["name"] AND $data["nick"] AND $data["url"] AND $data["addr"] AND $data["poll"]) + q("UPDATE `contact` SET `name` = '%s', `nick` = '%s', `url` = '%s', `addr` = '%s', + `notify` = '%s', `poll` = '%s', `alias` = '%s', `success_update` = '%s' + WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", + dbesc($data["name"]), + dbesc($data["nick"]), + dbesc($data["url"]), + dbesc($data["addr"]), + dbesc($data["notify"]), + dbesc($data["poll"]), + dbesc($data["alias"]), + dbesc(datetime_convert()), + dbesc(normalise_link($data['url'])) + ); + } return $data; } @@ -251,8 +279,12 @@ class Probe { private function poll_noscrape($noscrape, $data) { $content = fetch_url($noscrape); + if (!$content) + return false; $json = json_decode($content, true); + if (!is_array($json)) + return false; if (isset($json["fn"])) $data["name"] = $json["fn"]; diff --git a/include/Scrape.php b/include/Scrape.php index e2dc7d659..5c176fa36 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -3,6 +3,7 @@ require_once('library/HTML5/Parser.php'); require_once('include/crypto.php'); require_once('include/feed.php'); +require_once('include/Probe.php'); if(! function_exists('scrape_dfrn')) { function scrape_dfrn($url, $dont_probe = false) { @@ -138,205 +139,6 @@ function validate_dfrn($a) { return $errors; }} -if(! function_exists('scrape_meta')) { -function scrape_meta($url) { - - $a = get_app(); - - $ret = array(); - - logger('scrape_meta: url=' . $url); - - $s = fetch_url($url); - - if(! $s) - return $ret; - - $headers = $a->get_curl_headers(); - logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG); - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return ret; - } - } - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_meta: parse error: ' . $e); - } - - if(! $dom) - return $ret; - - $items = $dom->getElementsByTagName('meta'); - - // get DFRN link elements - - foreach($items as $item) { - $x = $item->getAttribute('name'); - if(substr($x,0,5) == "dfrn-") - $ret[$x] = $item->getAttribute('content'); - } - - return $ret; -}} - - -if(! function_exists('scrape_vcard')) { -function scrape_vcard($url) { - - $a = get_app(); - - $ret = array(); - - logger('scrape_vcard: url=' . $url); - - $s = fetch_url($url); - - if(! $s) - return $ret; - - $headers = $a->get_curl_headers(); - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return ret; - } - } - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_vcard: parse error: ' . $e); - } - - if(! $dom) - return $ret; - - // Pull out hCard profile elements - - $largest_photo = 0; - - $items = $dom->getElementsByTagName('*'); - foreach($items as $item) { - if(attribute_contains($item->getAttribute('class'), 'vcard')) { - $level2 = $item->getElementsByTagName('*'); - foreach($level2 as $x) { - if(attribute_contains($x->getAttribute('class'),'fn')) - $ret['fn'] = $x->textContent; - if((attribute_contains($x->getAttribute('class'),'photo')) - || (attribute_contains($x->getAttribute('class'),'avatar'))) { - $size = intval($x->getAttribute('width')); - if(($size > $largest_photo) || (! $largest_photo)) { - $ret['photo'] = $x->getAttribute('src'); - $largest_photo = $size; - } - } - if((attribute_contains($x->getAttribute('class'),'nickname')) - || (attribute_contains($x->getAttribute('class'),'uid'))) { - $ret['nick'] = $x->textContent; - } - } - } - } - - return $ret; -}} - - -if(! function_exists('scrape_feed')) { -function scrape_feed($url) { - - $a = get_app(); - - $ret = array(); - $cookiejar = tempnam(get_temppath(), 'cookiejar-scrape-feed-'); - $s = fetch_url($url, false, $redirects, 0, Null, $cookiejar); - unlink($cookiejar); - - $headers = $a->get_curl_headers(); - $code = $a->get_curl_code(); - - logger('scrape_feed: returns: ' . $code . ' headers=' . $headers, LOGGER_DEBUG); - - if(! $s) { - logger('scrape_feed: no data returned for ' . $url); - return $ret; - } - - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - if(stristr($line,'content-type:')) { - if(stristr($line,'application/atom+xml') || stristr($s,'')) { - $ret['feed_rss'] = $url; - return $ret; - } - } - - $basename = implode('/', array_slice(explode('/',$url),0,3)) . '/'; - - $doc = new DOMDocument(); - @$doc->loadHTML($s); - $xpath = new DomXPath($doc); - - $base = $xpath->query("//base"); - foreach ($base as $node) { - $attr = array(); - - if ($node->attributes->length) - foreach ($node->attributes as $attribute) - $attr[$attribute->name] = $attribute->value; - - if ($attr["href"] != "") - $basename = $attr["href"] ; - } - - $list = $xpath->query("//link"); - foreach ($list as $node) { - $attr = array(); - - if ($node->attributes->length) - foreach ($node->attributes as $attribute) - $attr[$attribute->name] = $attribute->value; - - if (($attr["rel"] == "alternate") AND ($attr["type"] == "application/atom+xml")) - $ret["feed_atom"] = $attr["href"]; - - if (($attr["rel"] == "alternate") AND ($attr["type"] == "application/rss+xml")) - $ret["feed_rss"] = $attr["href"]; - } - - // Drupal and perhaps others only provide relative URLs. Turn them into absolute. - - if(x($ret,'feed_atom') && (! strstr($ret['feed_atom'],'://'))) - $ret['feed_atom'] = $basename . $ret['feed_atom']; - if(x($ret,'feed_rss') && (! strstr($ret['feed_rss'],'://'))) - $ret['feed_rss'] = $basename . $ret['feed_rss']; - - return $ret; -}} - - /** * * Probe a network address to discover what kind of protocols we need to communicate with it. @@ -356,552 +158,19 @@ function scrape_feed($url) { */ -define ( 'PROBE_NORMAL', 0); -define ( 'PROBE_DIASPORA', 1); +define('PROBE_NORMAL', 0); +define('PROBE_DIASPORA', 1); function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { - require_once('include/email.php'); - $result = array(); + if ($mode == PROBE_DIASPORA) + $network = NETWORK_DIASPORA; + else + $network = ""; - if (!$url) - return $result; + $data = Probe::uri($url, $network); - $result = Cache::get("probe_url:".$mode.":".$url); - if (!is_null($result)) { - $result = unserialize($result); - return $result; - } - - $original_url = $url; - $network = null; - $diaspora = false; - $diaspora_base = ''; - $diaspora_guid = ''; - $diaspora_key = ''; - $has_lrdd = false; - $email_conversant = false; - $connectornetworks = false; - $appnet = false; - - if (strpos($url,'twitter.com')) { - $connectornetworks = true; - $network = NETWORK_TWITTER; - } - - $lastfm = ((strpos($url,'last.fm/user') !== false) ? true : false); - - $at_addr = ((strpos($url,'@') !== false) ? true : false); - - if((!$appnet) && (!$lastfm) && !$connectornetworks) { - - if(strpos($url,'mailto:') !== false && $at_addr) { - $url = str_replace('mailto:','',$url); - $links = array(); - } - else - $links = lrdd($url); - - if ((count($links) == 0) AND strstr($url, "/index.php")) { - $url = str_replace("/index.php", "", $url); - $links = lrdd($url); - } - - if (count($links)) { - $has_lrdd = true; - - logger('probe_url: found lrdd links: ' . print_r($links,true), LOGGER_DATA); - foreach($links as $link) { - if($link['@attributes']['rel'] === NAMESPACE_ZOT) - $zot = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === NAMESPACE_DFRN) - $dfrn = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'salmon') - $notify = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === NAMESPACE_FEED) - $poll = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') - $hcard = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://webfinger.net/rel/profile-page') - $profile = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://portablecontacts.net/spec/1.0') - $poco = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://joindiaspora.com/seed_location') { - $diaspora_base = unamp($link['@attributes']['href']); - $diaspora = true; - } - if($link['@attributes']['rel'] === 'http://joindiaspora.com/guid') { - $diaspora_guid = unamp($link['@attributes']['href']); - $diaspora = true; - } - if($link['@attributes']['rel'] === 'diaspora-public-key') { - $diaspora_key = base64_decode(unamp($link['@attributes']['href'])); - if(strstr($diaspora_key,'RSA ')) - $pubkey = rsatopem($diaspora_key); - else - $pubkey = $diaspora_key; - $diaspora = true; - } - if(($link['@attributes']['rel'] === 'http://ostatus.org/schema/1.0/subscribe') AND ($mode == PROBE_NORMAL)) { - $diaspora = false; - } - } - - // Status.Net can have more than one profile URL. We need to match the profile URL - // to a contact on incoming messages to prevent spam, and we won't know which one - // to match. So in case of two, one of them is stored as an alias. Only store URL's - // and not webfinger user@host aliases. If they've got more than two non-email style - // aliases, let's hope we're lucky and get one that matches the feed author-uri because - // otherwise we're screwed. - - $backup_alias = ""; - - foreach($links as $link) { - if($link['@attributes']['rel'] === 'alias') { - if(strpos($link['@attributes']['href'],'@') === false) { - if(isset($profile)) { - $alias_url = $link['@attributes']['href']; - - if(($alias_url !== $profile) AND ($backup_alias == "") AND - ($alias_url !== str_replace("/index.php", "", $profile))) - $backup_alias = $alias_url; - - if(($alias_url !== $profile) AND !strstr($alias_url, "index.php") AND - ($alias_url !== str_replace("/index.php", "", $profile))) - $alias = $alias_url; - } - else - $profile = unamp($link['@attributes']['href']); - } - } - } - - if ($alias == "") - $alias = $backup_alias; - - // If the profile is different from the url then the url is abviously an alias - if (($alias == "") AND ($profile != "") AND !$at_addr AND (normalise_link($profile) != normalise_link($url))) - $alias = $url; - } - elseif($mode == PROBE_NORMAL) { - - // Check email - - $orig_url = $url; - if((strpos($orig_url,'@')) && validate_email($orig_url)) { - $x = q("SELECT `prvkey` FROM `user` WHERE `uid` = %d LIMIT 1", - intval(local_user()) - ); - $r = q("SELECT * FROM `mailacct` WHERE `uid` = %d AND `server` != '' LIMIT 1", - intval(local_user()) - ); - if(count($x) && count($r)) { - $mailbox = construct_mailbox_name($r[0]); - $password = ''; - openssl_private_decrypt(hex2bin($r[0]['pass']),$password,$x[0]['prvkey']); - $mbox = email_connect($mailbox,$r[0]['user'],$password); - if(! $mbox) - logger('probe_url: email_connect failed.'); - unset($password); - } - if($mbox) { - $msgs = email_poll($mbox,$orig_url); - logger('probe_url: searching ' . $orig_url . ', ' . count($msgs) . ' messages found.', LOGGER_DEBUG); - if(count($msgs)) { - $addr = $orig_url; - $network = NETWORK_MAIL; - $name = substr($url,0,strpos($url,'@')); - $phost = substr($url,strpos($url,'@')+1); - $profile = 'http://' . $phost; - // fix nick character range - $vcard = array('fn' => $name, 'nick' => $name, 'photo' => avatar_img($url)); - $notify = 'smtp ' . random_string(); - $poll = 'email ' . random_string(); - $priority = 0; - $x = email_msg_meta($mbox,$msgs[0]); - if(stristr($x[0]->from,$orig_url)) - $adr = imap_rfc822_parse_adrlist($x[0]->from,''); - elseif(stristr($x[0]->to,$orig_url)) - $adr = imap_rfc822_parse_adrlist($x[0]->to,''); - if(isset($adr)) { - foreach($adr as $feadr) { - if((strcasecmp($feadr->mailbox,$name) == 0) - &&(strcasecmp($feadr->host,$phost) == 0) - && (strlen($feadr->personal))) { - - $personal = imap_mime_header_decode($feadr->personal); - $vcard['fn'] = ""; - foreach($personal as $perspart) - if ($perspart->charset != "default") - $vcard['fn'] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text); - else - $vcard['fn'] .= $perspart->text; - - $vcard['fn'] = notags($vcard['fn']); - } - } - } - } - imap_close($mbox); - } - } - } - } - - if($mode == PROBE_NORMAL) { - - if(strlen($zot)) { - $s = fetch_url($zot); - if($s) { - $j = json_decode($s); - if($j) { - $network = NETWORK_ZOT; - $vcard = array( - 'fn' => $j->fullname, - 'nick' => $j->nickname, - 'photo' => $j->photo - ); - $profile = $j->url; - $notify = $j->post; - $pubkey = $j->pubkey; - $poll = 'N/A'; - } - } - } - - - if(strlen($dfrn)) { - $ret = scrape_dfrn(($hcard) ? $hcard : $dfrn, true); - if(is_array($ret) && x($ret,'dfrn-request')) { - $network = NETWORK_DFRN; - $request = $ret['dfrn-request']; - $confirm = $ret['dfrn-confirm']; - $notify = $ret['dfrn-notify']; - $poll = $ret['dfrn-poll']; - - $vcard = array(); - $vcard['fn'] = $ret['fn']; - $vcard['nick'] = $ret['nick']; - $vcard['photo'] = $ret['photo']; - } - } - } - - // Scrape the public key from the hcard. - // Diaspora will remove it from the webfinger somewhere in the future. - if (($hcard != "") AND ($pubkey == "")) { - $ret = scrape_dfrn(($hcard) ? $hcard : $dfrn, true); - if (isset($ret["guid"])) - $diaspora_guid = $ret["guid"]; - if (isset($ret["url"])) - $diaspora_base = $ret["url"]; - - if (isset($ret["key"])) { - $hcard_key = $ret["key"]; - if(strstr($hcard_key,'RSA ')) - $pubkey = rsatopem($hcard_key); - else - $pubkey = $hcard_key; - } - } - - if(($network == "") && $diaspora_base && $diaspora_guid) { - $diaspora_notify = $diaspora_base.'receive/users/'.$diaspora_guid; - $diaspora = true; - - if($mode == PROBE_DIASPORA || !$notify || ($notify == $diaspora_notify)) { - $notify = $diaspora_notify; - $batch = $diaspora_base . 'receive/public' ; - } - if(strpos($url,'@')) - $addr = str_replace('acct:', '', $url); - } - - if($network !== NETWORK_ZOT && $network !== NETWORK_DFRN && $network !== NETWORK_MAIL) { - if($diaspora) - $network = NETWORK_DIASPORA; - elseif($has_lrdd AND ($notify)) - $network = NETWORK_OSTATUS; - - if(strpos($url,'@')) - $addr = str_replace('acct:', '', $url); - - $priority = 0; - - if($hcard && ! $vcard) { - $vcard = scrape_vcard($hcard); - - // Google doesn't use absolute url in profile photos - - if((x($vcard,'photo')) && substr($vcard['photo'],0,1) == '/') { - $h = @parse_url($hcard); - if($h) - $vcard['photo'] = $h['scheme'] . '://' . $h['host'] . $vcard['photo']; - } - - logger('probe_url: scrape_vcard: ' . print_r($vcard,true), LOGGER_DATA); - } - - if($diaspora && $addr) { - // Diaspora returns the name as the nick. As the nick will never be updated, - // let's use the Diaspora nickname (the first part of the handle) as the nick instead - $addr_parts = explode('@', $addr); - $vcard['nick'] = $addr_parts[0]; - } - - if($lastfm) { - $profile = $url; - $poll = str_replace(array('www.','last.fm/'),array('','ws.audioscrobbler.com/1.0/'),$url) . '/recenttracks.rss'; - $vcard['nick'] = basename($url); - $vcard['fn'] = $vcard['nick'] . t(' on Last.fm'); - $network = NETWORK_FEED; - } - - if(! x($vcard,'fn')) - if(x($vcard,'nick')) - $vcard['fn'] = $vcard['nick']; - - $check_feed = false; - - if(stristr($url,'tumblr.com') && (! stristr($url,'/rss'))) { - $poll = $url . '/rss'; - $check_feed = true; - // Will leave it to others to figure out how to grab the avatar, which is on the $url page in the open graph meta links - } - - if($appnet || ! $poll) - $check_feed = true; - if((! isset($vcard)) || (! x($vcard,'fn')) || (! $profile)) - $check_feed = true; - if(($at_addr) && (! count($links))) - $check_feed = false; - - if ($connectornetworks) - $check_feed = false; - - if($check_feed) { - - $feedret = scrape_feed(($poll) ? $poll : $url); - - logger('probe_url: scrape_feed ' . (($poll)? $poll : $url) . ' returns: ' . print_r($feedret,true), LOGGER_DATA); - if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) { - $poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss'])); - if(! x($vcard)) - $vcard = array(); - } - - if(x($feedret,'photo') && (! x($vcard,'photo'))) - $vcard['photo'] = $feedret['photo']; - - $cookiejar = tempnam(get_temppath(), 'cookiejar-scrape-feed-'); - $xml = fetch_url($poll, false, $redirects, 0, Null, $cookiejar); - unlink($cookiejar); - - logger('probe_url: fetch feed: ' . $poll . ' returns: ' . $xml, LOGGER_DATA); - - if ($xml == "") { - logger("scrape_feed: XML is empty for feed ".$poll); - $network = NETWORK_PHANTOM; - } else { - $data = feed_import($xml,$dummy1,$dummy2, $dummy3, true); - - if (!is_array($data)) { - logger("scrape_feed: This doesn't seem to be a feed: ".$poll); - $network = NETWORK_PHANTOM; - } else { - if (($vcard["photo"] == "") AND ($data["header"]["author-avatar"] != "")) - $vcard["photo"] = $data["header"]["author-avatar"]; - - if (($vcard["fn"] == "") AND ($data["header"]["author-name"] != "")) - $vcard["fn"] = $data["header"]["author-name"]; - - if (($vcard["nick"] == "") AND ($data["header"]["author-nick"] != "")) - $vcard["nick"] = $data["header"]["author-nick"]; - - if ($network == NETWORK_OSTATUS) { - if ($data["header"]["author-id"] != "") - $alias = $data["header"]["author-id"]; - - if ($data["header"]["author-link"] != "") - $profile = $data["header"]["author-link"]; - - } elseif(!$profile AND ($data["header"]["author-link"] != "") AND !in_array($network, array("", NETWORK_FEED))) - $profile = $data["header"]["author-link"]; - } - } - - // Workaround for misconfigured Friendica servers - if (($network == "") AND (strstr($url, "/profile/"))) { - $noscrape = str_replace("/profile/", "/noscrape/", $url); - $noscrapejson = fetch_url($noscrape); - if ($noscrapejson) { - - $network = NETWORK_DFRN; - - $poco = str_replace("/profile/", "/poco/", $url); - - $noscrapedata = json_decode($noscrapejson, true); - - if (isset($noscrapedata["addr"])) - $addr = $noscrapedata["addr"]; - - if (isset($noscrapedata["fn"])) - $vcard["fn"] = $noscrapedata["fn"]; - - if (isset($noscrapedata["key"])) - $pubkey = $noscrapedata["key"]; - - if (isset($noscrapedata["photo"])) - $vcard["photo"] = $noscrapedata["photo"]; - - if (isset($noscrapedata["dfrn-request"])) - $request = $noscrapedata["dfrn-request"]; - - if (isset($noscrapedata["dfrn-confirm"])) - $confirm = $noscrapedata["dfrn-confirm"]; - - if (isset($noscrapedata["dfrn-notify"])) - $notify = $noscrapedata["dfrn-notify"]; - - if (isset($noscrapedata["dfrn-poll"])) - $poll = $noscrapedata["dfrn-poll"]; - - } - } - - if(! $network) - $network = NETWORK_FEED; - - if(! x($vcard,'nick')) { - $vcard['nick'] = strtolower(notags(unxmlify($vcard['fn']))); - if(strpos($vcard['nick'],' ')) - $vcard['nick'] = trim(substr($vcard['nick'],0,strpos($vcard['nick'],' '))); - } - if(! $priority) - $priority = 2; - } - } - - if(! x($vcard,'photo')) { - $a = get_app(); - $vcard['photo'] = App::get_baseurl() . '/images/person-175.jpg' ; - } - - if(! $profile) - $profile = $url; - - // No human could be associated with this link, use the URL as the contact name - - if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn'))) - $vcard['fn'] = $url; - - if ($diaspora_base != "") - $baseurl = $diaspora_base; - - if (($baseurl == "") AND ($notify != "") AND ($poll != "")) { - $baseurl = matching_url(normalise_link($notify), normalise_link($poll)); - - $baseurl2 = matching_url($baseurl, normalise_link($profile)); - if ($baseurl2 != "") - $baseurl = $baseurl2; - } - - if (($baseurl == "") AND ($notify != "")) - $baseurl = matching_url(normalise_link($profile), normalise_link($notify)); - - if (($baseurl == "") AND ($poll != "")) - $baseurl = matching_url(normalise_link($profile), normalise_link($poll)); - - if (substr($baseurl, -10) == "/index.php") - $baseurl = str_replace("/index.php", "", $baseurl); - - if ($network == "") - $network = NETWORK_PHANTOM; - - $baseurl = rtrim($baseurl, "/"); - - if(strpos($url,'@') AND ($addr == "") AND ($network == NETWORK_DFRN)) - $addr = str_replace('acct:', '', $url); - - $vcard['fn'] = notags($vcard['fn']); - $vcard['nick'] = str_replace(' ','',notags($vcard['nick'])); - - $result['name'] = $vcard['fn']; - $result['nick'] = $vcard['nick']; - $result['guid'] = $diaspora_guid; - $result['url'] = $profile; - $result['addr'] = $addr; - $result['batch'] = $batch; - $result['notify'] = $notify; - $result['poll'] = $poll; - $result['request'] = $request; - $result['confirm'] = $confirm; - $result['poco'] = $poco; - $result['photo'] = $vcard['photo']; - $result['priority'] = $priority; - $result['network'] = $network; - $result['alias'] = $alias; - $result['pubkey'] = $pubkey; - $result['baseurl'] = $baseurl; - - logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG); - - if ($level == 1) { - // Trying if it maybe a diaspora account - if (($result['network'] == NETWORK_FEED) OR ($result['addr'] == "")) { - require_once('include/bbcode.php'); - $address = GetProfileUsername($url, "", true); - $result2 = probe_url($address, $mode, ++$level); - if (!in_array($result2['network'], array("", NETWORK_PHANTOM, NETWORK_FEED))) - $result = $result2; - } - - // Maybe it's some non standard GNU Social installation (Single user, subfolder or no uri rewrite) - if (($result['network'] == NETWORK_FEED) AND ($result['baseurl'] != "") AND ($result['nick'] != "")) { - $addr = $result['nick'].'@'.str_replace("http://", "", $result['baseurl']); - $result2 = probe_url($addr, $mode, ++$level); - if (!in_array($result2['network'], array("", NETWORK_PHANTOM, NETWORK_FEED))) - $result = $result2; - } - - // Quickfix for Hubzilla systems with enabled OStatus plugin - if (($result['network'] == NETWORK_DIASPORA) AND ($result["batch"] == "")) { - $result2 = probe_url($url, PROBE_DIASPORA, ++$level); - if ($result2['network'] == NETWORK_DIASPORA) { - $addr = $result["addr"]; - $result = $result2; - - if (($result["addr"] == "") AND ($addr != "")) - $result["addr"] = $addr; - } - } - } - - // Only store into the cache if the value seems to be valid - if ($result['network'] != NETWORK_PHANTOM) { - Cache::set("probe_url:".$mode.":".$original_url,serialize($result), CACHE_DAY); - - /// @todo temporary fix - we need a real contact update function that updates only changing fields - /// The biggest problem is the avatar picture that could have a reduced image size. - /// It should only be updated if the existing picture isn't existing anymore. - if (($result['network'] != NETWORK_FEED) AND ($mode == PROBE_NORMAL) AND - $result["name"] AND $result["nick"] AND $result["url"] AND $result["addr"] AND $result["poll"]) - q("UPDATE `contact` SET `name` = '%s', `nick` = '%s', `url` = '%s', `addr` = '%s', - `notify` = '%s', `poll` = '%s', `alias` = '%s', `success_update` = '%s' - WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", - dbesc($result["name"]), - dbesc($result["nick"]), - dbesc($result["url"]), - dbesc($result["addr"]), - dbesc($result["notify"]), - dbesc($result["poll"]), - dbesc($result["alias"]), - dbesc(datetime_convert()), - dbesc(normalise_link($result['url'])) - ); - } - - return $result; + return $data; } /** From e1c0b4fe03756a51bd1112ff563cd2f1338f8417 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 4 Jul 2016 00:49:38 +0200 Subject: [PATCH 04/19] Warnings removed --- include/Probe.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 56eccc8c8..51349c4f0 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -203,6 +203,8 @@ class Probe { $result = false; + logger("Probing ".$uri, LOGGER_DEBUG); + if (in_array($network, array("", NETWORK_DFRN))) $result = self::dfrn($webfinger); if ((!$result AND ($network == "")) OR ($network == NETWORK_DIASPORA)) @@ -223,6 +225,8 @@ class Probe { $result["addr"] = $addr; } + logger($uri." is ".$result["network"], LOGGER_DEBUG); + if (!isset($result["baseurl"]) OR ($result["baseurl"] == "")) { $pos = strpos($result["url"], $host); if ($pos) @@ -358,7 +362,8 @@ class Probe { elseif (($link["rel"] == "diaspora-public-key") AND ($link["href"] != "")) { $data["pubkey"] = base64_decode($link["href"]); - if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + //if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + if (strstr($data["pubkey"], 'RSA ')) $data["pubkey"] = rsatopem($data["pubkey"]); } } @@ -487,7 +492,8 @@ class Probe { elseif (($link["rel"] == "diaspora-public-key") AND ($link["href"] != "")) { $data["pubkey"] = base64_decode($link["href"]); - if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + //if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + if (strstr($data["pubkey"], 'RSA ')) $data["pubkey"] = rsatopem($data["pubkey"]); } } @@ -495,7 +501,7 @@ class Probe { if (!isset($data["url"]) OR ($hcard == "")) return false; - if (isset($webfinger["aliases"])) + if (is_array($webfinger["aliases"])) foreach ($webfinger["aliases"] AS $alias) if (normalise_link($alias) != normalise_link($data["url"]) AND !strstr($alias, "@")) $data["alias"] = $alias; From 4e5d335492c7091f2aa9f47c889b29e8b60c1299 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 4 Jul 2016 08:05:30 +0200 Subject: [PATCH 05/19] Removed the legacy function calls, removed unused functions --- include/Probe.php | 57 +++++++++++-- include/Scrape.php | 193 ------------------------------------------- include/network.php | 56 ++++++++++++- include/socgraph.php | 1 + mod/dfrn_request.php | 9 +- 5 files changed, 113 insertions(+), 203 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 51349c4f0..7619a127f 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -9,6 +9,7 @@ use \Friendica\Core\PConfig; require_once("include/feed.php"); require_once('include/email.php'); +require_once('include/network.php'); class Probe { @@ -82,12 +83,14 @@ class Probe { return $xrd_data; } - public static function uri($uri, $network = "") { + public static function uri($uri, $network = "", $cache = true) { - $result = Cache::get("probe_url:".$network.":".$uri); - if (!is_null($result)) { - $result = unserialize($result); - return $result; + if ($cache) { + $result = Cache::get("probe_url:".$network.":".$uri); + if (!is_null($result)) { + $result = unserialize($result); + return $result; + } } $data = self::detect($uri, $network); @@ -336,6 +339,50 @@ class Probe { return $data; } + public static function valid_dfrn($data) { + $errors = 0; + if(!isset($data['key'])) + $errors ++; + if(!isset($data['dfrn-request'])) + $errors ++; + if(!isset($data['dfrn-confirm'])) + $errors ++; + if(!isset($data['dfrn-notify'])) + $errors ++; + if(!isset($data['dfrn-poll'])) + $errors ++; + return $errors; + } + + public static function profile($profile) { + + $data = array(); + + // Fetch data via noscrape - this is faster + $noscrape = str_replace(array("/hcard/", "/profile/"), "/noscrape/", $profile); + $data = self::poll_noscrape($noscrape, $data); + + if (!isset($data["notify"]) OR !isset($data["confirm"]) OR + !isset($data["request"]) OR !isset($data["poll"]) OR + !isset($data["poco"]) OR !isset($data["name"]) OR + !isset($data["photo"])) + $data = self::poll_hcard($profile, $data, true); + + $prof_data = array(); + $prof_data["addr"] = $data["addr"]; + $prof_data["nick"] = $data["nick"]; + $prof_data["dfrn-request"] = $data["request"]; + $prof_data["dfrn-confirm"] = $data["confirm"]; + $prof_data["dfrn-notify"] = $data["notify"]; + $prof_data["dfrn-poll"] = $data["poll"]; + $prof_data["dfrn-poco"] = $data["poco"]; + $prof_data["photo"] = $data["photo"]; + $prof_data["fn"] = $data["name"]; + $prof_data["key"] = $data["pubkey"]; + + return $prof_data; + } + private function dfrn($webfinger) { $hcard = ""; diff --git a/include/Scrape.php b/include/Scrape.php index 5c176fa36..bb9af60d7 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -1,144 +1,6 @@ get_curl_headers(); - logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG); - - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return ret; - } - } - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_dfrn: parse error: ' . $e); - } - - if(! $dom) - return $ret; - - $items = $dom->getElementsByTagName('link'); - - // get DFRN link elements - - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml')) - $ret['feed_atom'] = $item->getAttribute('href'); - if(substr($x,0,5) == "dfrn-") { - $ret[$x] = $item->getAttribute('href'); - } - if($x === 'lrdd') { - $decoded = urldecode($item->getAttribute('href')); - if(preg_match('/acct:([^@]*)@/',$decoded,$matches)) - $ret['nick'] = $matches[1]; - } - } - - // Pull out hCard profile elements - - $largest_photo = 0; - - $items = $dom->getElementsByTagName('*'); - foreach($items as $item) { - if(attribute_contains($item->getAttribute('class'), 'vcard')) { - $level2 = $item->getElementsByTagName('*'); - foreach($level2 as $x) { - if(attribute_contains($x->getAttribute('class'),'uid')) - $ret['guid'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'nickname')) - $ret['nickname'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'fn')) - $ret['fn'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'searchable')) - $ret['searchable'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'key')) - $ret['key'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'url')) - $ret['url'] = $x->textContent; - if((attribute_contains($x->getAttribute('class'),'photo')) - || (attribute_contains($x->getAttribute('class'),'avatar'))) { - $size = intval($x->getAttribute('width')); - // dfrn prefers 175, so if we find this, we set largest_size so it can't be topped. - if(($size > $largest_photo) || ($size == 175) || (! $largest_photo)) { - $ret['photo'] = $x->getAttribute('src'); - $largest_photo = (($size == 175) ? 9999 : $size); - } - } - } - } - } - return array_merge($ret, $noscrapedata); -}} - - - - - - -if(! function_exists('validate_dfrn')) { -function validate_dfrn($a) { - $errors = 0; - if(! x($a,'key')) - $errors ++; - if(! x($a,'dfrn-request')) - $errors ++; - if(! x($a,'dfrn-confirm')) - $errors ++; - if(! x($a,'dfrn-notify')) - $errors ++; - if(! x($a,'dfrn-poll')) - $errors ++; - return $errors; -}} - /** * * Probe a network address to discover what kind of protocols we need to communicate with it. @@ -157,7 +19,6 @@ function validate_dfrn($a) { * */ - define('PROBE_NORMAL', 0); define('PROBE_DIASPORA', 1); @@ -172,57 +33,3 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { return $data; } - -/** - * @brief Find the matching part between two url - * - * @param string $url1 - * @param string $url2 - * @return string The matching part - */ -function matching_url($url1, $url2) { - - if (($url1 == "") OR ($url2 == "")) - return ""; - - $url1 = normalise_link($url1); - $url2 = normalise_link($url2); - - $parts1 = parse_url($url1); - $parts2 = parse_url($url2); - - if (!isset($parts1["host"]) OR !isset($parts2["host"])) - return ""; - - if ($parts1["scheme"] != $parts2["scheme"]) - return ""; - - if ($parts1["host"] != $parts2["host"]) - return ""; - - if ($parts1["port"] != $parts2["port"]) - return ""; - - $match = $parts1["scheme"]."://".$parts1["host"]; - - if ($parts1["port"]) - $match .= ":".$parts1["port"]; - - $pathparts1 = explode("/", $parts1["path"]); - $pathparts2 = explode("/", $parts2["path"]); - - $i = 0; - $path = ""; - do { - $path1 = $pathparts1[$i]; - $path2 = $pathparts2[$i]; - - if ($path1 == $path2) - $path .= $path1."/"; - - } while (($path1 == $path2) AND ($i++ <= count($pathparts1))); - - $match .= $path; - - return normalise_link($match); -} diff --git a/include/network.php b/include/network.php index d27a090fe..64b6d07c6 100644 --- a/include/network.php +++ b/include/network.php @@ -1123,7 +1123,7 @@ function short_link($url) { * This function encodes an array to json format * and adds an application/json HTTP header to the output. * After finishing the process is getting killed. - * + * * @param array $x The input content */ function json_return_and_die($x) { @@ -1131,3 +1131,57 @@ function json_return_and_die($x) { echo json_encode($x); killme(); } + +/** + * @brief Find the matching part between two url + * + * @param string $url1 + * @param string $url2 + * @return string The matching part + */ +function matching_url($url1, $url2) { + + if (($url1 == "") OR ($url2 == "")) + return ""; + + $url1 = normalise_link($url1); + $url2 = normalise_link($url2); + + $parts1 = parse_url($url1); + $parts2 = parse_url($url2); + + if (!isset($parts1["host"]) OR !isset($parts2["host"])) + return ""; + + if ($parts1["scheme"] != $parts2["scheme"]) + return ""; + + if ($parts1["host"] != $parts2["host"]) + return ""; + + if ($parts1["port"] != $parts2["port"]) + return ""; + + $match = $parts1["scheme"]."://".$parts1["host"]; + + if ($parts1["port"]) + $match .= ":".$parts1["port"]; + + $pathparts1 = explode("/", $parts1["path"]); + $pathparts2 = explode("/", $parts2["path"]); + + $i = 0; + $path = ""; + do { + $path1 = $pathparts1[$i]; + $path2 = $pathparts2[$i]; + + if ($path1 == $path2) + $path .= $path1."/"; + + } while (($path1 == $path2) AND ($i++ <= count($pathparts1))); + + $match .= $path; + + return normalise_link($match); +} diff --git a/include/socgraph.php b/include/socgraph.php index 3718f8230..cb2fd97b3 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -9,6 +9,7 @@ require_once('include/datetime.php'); require_once("include/Scrape.php"); +require_once("include/network.php"); require_once("include/html2bbcode.php"); require_once("include/Contact.php"); require_once("include/Photo.php"); diff --git a/mod/dfrn_request.php b/mod/dfrn_request.php index 2a9f68eab..20c47bebf 100644 --- a/mod/dfrn_request.php +++ b/mod/dfrn_request.php @@ -11,6 +11,7 @@ require_once('include/enotify.php'); require_once('include/Scrape.php'); +require_once('include/Probe.php'); require_once('include/group.php'); if(! function_exists('dfrn_request_init')) { @@ -116,7 +117,7 @@ function dfrn_request_post(&$a) { * Scrape the other site's profile page to pick up the dfrn links, key, fn, and photo */ - $parms = scrape_dfrn($dfrn_url); + $parms = Probe::profile($dfrn_url); if(! count($parms)) { notice( t('Profile location is not valid or does not contain profile information.') . EOL ); @@ -127,7 +128,7 @@ function dfrn_request_post(&$a) { notice( t('Warning: profile location has no identifiable owner name.') . EOL ); if(! x($parms,'photo')) notice( t('Warning: profile location has no profile photo.') . EOL ); - $invalid = validate_dfrn($parms); + $invalid = Probe::valid_dfrn($parms); if($invalid) { notice( sprintf( tt("%d required parameter was not found at the given location", "%d required parameters were not found at the given location", @@ -511,7 +512,7 @@ function dfrn_request_post(&$a) { require_once('include/Scrape.php'); - $parms = scrape_dfrn(($hcard) ? $hcard : $url); + $parms = Probe::profile(($hcard) ? $hcard : $url); if(! count($parms)) { notice( t('Profile location is not valid or does not contain profile information.') . EOL ); @@ -522,7 +523,7 @@ function dfrn_request_post(&$a) { notice( t('Warning: profile location has no identifiable owner name.') . EOL ); if(! x($parms,'photo')) notice( t('Warning: profile location has no profile photo.') . EOL ); - $invalid = validate_dfrn($parms); + $invalid = Probe::valid_dfrn($parms); if($invalid) { notice( sprintf( tt("%d required parameter was not found at the given location", "%d required parameters were not found at the given location", From 585c893456e9f42900da5029658cb6db3feb1272 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 4 Jul 2016 22:34:35 +0200 Subject: [PATCH 06/19] You can now prove with an uid (important for mails) --- include/Probe.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 7619a127f..23a89428b 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -83,7 +83,7 @@ class Probe { return $xrd_data; } - public static function uri($uri, $network = "", $cache = true) { + public static function uri($uri, $network = "", $uid = 0, $cache = true) { if ($cache) { $result = Cache::get("probe_url:".$network.":".$uri); @@ -93,7 +93,10 @@ class Probe { } } - $data = self::detect($uri, $network); + if ($uid == 0) + $uid = local_user(); + + $data = self::detect($uri, $network, $uid); if (!isset($data["url"])) $data["url"] = $uri; @@ -115,7 +118,7 @@ class Probe { $data = self::rearrange_data($data); // Only store into the cache if the value seems to be valid - if ($data['network'] != NETWORK_PHANTOM) { + if (!in_array($data['network'], array(NETWORK_PHANTOM, NETWORK_MAIL))) { Cache::set("probe_url:".$network.":".$uri,serialize($data), CACHE_DAY); /// @todo temporary fix - we need a real contact update function that updates only changing fields @@ -140,16 +143,16 @@ class Probe { return $data; } - private function detect($uri, $network) { + private function detect($uri, $network, $uid) { if (strstr($uri, '@')) { // If the URI starts with "mailto:" then jum directly to the mail detection if (strpos($url,'mailto:') !== false) { $uri = str_replace('mailto:', '', $url); - return self::mail($uri); + return self::mail($uri, $uid); } if ($network == NETWORK_MAIL) - return self::mail($uri); + return self::mail($uri, $uid); // Remove "acct:" from the URI $uri = str_replace('acct:', '', $uri); @@ -159,7 +162,7 @@ class Probe { $lrdd = self::xrd($host); if (!$lrdd) - return self::mail($uri); + return self::mail($uri, $uid); $addr = $uri; } else { @@ -735,14 +738,11 @@ class Probe { return $data; } - private function mail($uri) { + private function mail($uri, $uid) { if (!validate_email($uri)) return false; - $uid = local_user(); - $uid = 1; - $x = q("SELECT `prvkey` FROM `user` WHERE `uid` = %d LIMIT 1", intval($uid)); $r = q("SELECT * FROM `mailacct` WHERE `uid` = %d AND `server` != '' LIMIT 1", intval($uid)); From c625b6aba83fa3ad13d57ae46f8821dd3e747496 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 7 Jul 2016 22:37:16 +0200 Subject: [PATCH 07/19] If a page contains a feed url then follow it. --- include/Probe.php | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 23a89428b..4dcbc7841 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -706,12 +706,51 @@ class Probe { return $data; } - private function feed($url) { + private function get_feed_link($url) { + $doc = new DOMDocument(); + + if (!@$doc->loadHTMLFile($url)) + return false; + + $xpath = new DomXPath($doc); + + //$feeds = $xpath->query("/html/head/link[@type='application/rss+xml']"); + $feeds = $xpath->query("/html/head/link[@type='application/rss+xml' and @rel='alternate']"); + if (!is_object($feeds)) + return false; + + if ($feeds->length == 0) + return false; + + $feed_url = ""; + + foreach ($feeds AS $feed) { + $attr = array(); + foreach ($feed->attributes as $attribute) + $attr[$attribute->name] = trim($attribute->value); + + if ($feed_url == "") + $feed_url = $attr["href"]; + } + + return $feed_url; + } + + private function feed($url, $probe = true) { $feed = fetch_url($url); $feed_data = feed_import($feed, $dummy1, $dummy2, $dummy3, true); - if (!$feed_data) - return false; + if (!$feed_data) { + if (!$probe) + return false; + + $feed_url = self::get_feed_link($url); + + if (!$feed_url) + return false; + + return self::feed($feed_url, false); + } if ($feed_data["header"]["author-name"] != "") $data["name"] = $feed_data["header"]["author-name"]; From a65547c5787850ad1f3b30b6fbb5ac0480e0360b Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 7 Jul 2016 23:04:30 +0200 Subject: [PATCH 08/19] Added documentation --- include/Probe.php | 122 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/include/Probe.php b/include/Probe.php index 4dcbc7841..ff2341e26 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -13,6 +13,13 @@ require_once('include/network.php'); class Probe { + /** + * @brief Rearrange the array so that it always has the same order + * + * @param array $data Unordered data + * + * @return array Ordered data + */ private function rearrange_data($data) { $fields = array("name", "nick", "guid", "url", "addr", "alias", "photo", "community", "keywords", "location", "about", @@ -83,6 +90,16 @@ class Probe { return $xrd_data; } + /** + * @brief Fetch information about a given uri + * + * @param string $uri Address that should be probed + * @param string $network Test for this specific network + * @param integer $uid User ID for the probe (only used for mails) + * @param boolean $cache Use cached values? + * + * @return array uri data + */ public static function uri($uri, $network = "", $uid = 0, $cache = true) { if ($cache) { @@ -143,6 +160,15 @@ class Probe { return $data; } + /** + * @brief Detect information about a given uri + * + * @param string $uri Address that should be probed + * @param string $network Test for this specific network + * @param integer $uid User ID for the probe (only used for mails) + * + * @return array uri data + */ private function detect($uri, $network, $uid) { if (strstr($uri, '@')) { // If the URI starts with "mailto:" then jum directly to the mail detection @@ -242,6 +268,13 @@ class Probe { return $result; } + /** + * @brief Do a webfinger request + * + * @param string $url Address that should be probed + * + * @return array webfinger data + */ private function webfinger($url) { $xrd_timeout = Config::get('system','xrd_timeout', 20); @@ -287,6 +320,14 @@ class Probe { return $webfinger; } + /** + * @brief Poll the noscrape page (Friendica specific) + * + * @param string $noscrape Link to the noscrape page + * @param array $data The already fetched data + * + * @return array noscrape data + */ private function poll_noscrape($noscrape, $data) { $content = fetch_url($noscrape); if (!$content) @@ -342,6 +383,13 @@ class Probe { return $data; } + /** + * @brief Check for valid DFRN data + * + * @param array $data DFRN data + * + * @return int Number of errors + */ public static function valid_dfrn($data) { $errors = 0; if(!isset($data['key'])) @@ -357,6 +405,13 @@ class Probe { return $errors; } + /** + * @brief Fetch data from a DFRN profile page + * + * @param string $profile Link to the profile page + * + * @return array profile data + */ public static function profile($profile) { $data = array(); @@ -386,6 +441,13 @@ class Probe { return $prof_data; } + /** + * @brief Check for DFRN contact + * + * @param array $webfinger Webfinger data + * + * @return array DFRN data + */ private function dfrn($webfinger) { $hcard = ""; @@ -434,6 +496,15 @@ class Probe { return $data; } + /** + * @brief Poll the hcard page (Diaspora and Friendica specific) + * + * @param string $hcard Link to the hcard page + * @param array $data The already fetched data + * @param boolean $dfrn Poll DFRN specific data + * + * @return array hcard data + */ private function poll_hcard($hcard, $data, $dfrn = false) { $doc = new DOMDocument(); @@ -519,6 +590,13 @@ class Probe { return $data; } + /** + * @brief Check for Diaspora contact + * + * @param array $webfinger Webfinger data + * + * @return array Diaspora data + */ private function diaspora($webfinger) { $hcard = ""; @@ -575,6 +653,13 @@ class Probe { return $data; } + /** + * @brief Check for OStatus contact + * + * @param array $webfinger Webfinger data + * + * @return array OStatus data + */ private function ostatus($webfinger) { $pubkey = ""; @@ -642,6 +727,13 @@ class Probe { return $data; } + /** + * @brief Fetch data from a pump.io profile page + * + * @param string $profile Link to the profile page + * + * @return array profile data + */ private function pumpio_profile_data($profile) { $doc = new DOMDocument(); @@ -670,6 +762,13 @@ class Probe { return $data; } + /** + * @brief Check for pump.io contact + * + * @param array $webfinger Webfinger data + * + * @return array pump.io data + */ private function pumpio($webfinger) { $data = array(); foreach ($webfinger["links"] AS $link) { @@ -706,6 +805,13 @@ class Probe { return $data; } + /** + * @brief Check page for feed link + * + * @param string $url Page link + * + * @return string feed link + */ private function get_feed_link($url) { $doc = new DOMDocument(); @@ -736,6 +842,14 @@ class Probe { return $feed_url; } + /** + * @brief Check for feed contact + * + * @param string $url Profile link + * @param boolean $probe Do a probe if the page contains a feed link + * + * @return array feed data + */ private function feed($url, $probe = true) { $feed = fetch_url($url); $feed_data = feed_import($feed, $dummy1, $dummy2, $dummy3, true); @@ -777,6 +891,14 @@ class Probe { return $data; } + /** + * @brief Check for mail contact + * + * @param string $uri Profile link + * @param integer $uid User ID + * + * @return array mail data + */ private function mail($uri, $uid) { if (!validate_email($uri)) From edfee710feee5dd41255b6b3aa104d4a7798f3a4 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 7 Jul 2016 23:35:44 +0200 Subject: [PATCH 09/19] To-Do added --- include/Probe.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/Probe.php b/include/Probe.php index ff2341e26..8b6039a6f 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -2,6 +2,15 @@ /** * @brief This class contain functions for probing URL * + * To-Do: + * + * - OStatus links with index.php + * - Non absolute URL in feeds? + * - Check for tumblr feed? + * - more logging? + * - Workaround for misconfigured Friendica servers? + * - Check if Hubzilla servers with OStatus are detected correctly + * */ use \Friendica\Core\Config; From b954714464c3954f46afa5abcc38075897f7483d Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 8 Jul 2016 20:37:10 +0200 Subject: [PATCH 10/19] If the nick is missing then take the first part of the name --- include/Probe.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 8b6039a6f..9dc2a370b 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -5,11 +5,9 @@ * To-Do: * * - OStatus links with index.php + * - OStatus alias link * - Non absolute URL in feeds? - * - Check for tumblr feed? - * - more logging? * - Workaround for misconfigured Friendica servers? - * - Check if Hubzilla servers with OStatus are detected correctly * */ @@ -135,9 +133,13 @@ class Probe { if (!isset($data["name"])) $data["name"] = $data["url"]; - if (!isset($data["nick"])) + if (!isset($data["nick"])) { $data["nick"] = strtolower($data["name"]); + if (strpos($data['nick'], ' ')) + $data['nick'] = trim(substr($data['nick'], 0, strpos($data['nick'], ' '))); + } + if (!isset($data["network"])) $data["network"] = NETWORK_PHANTOM; From 0ccb171449b8f045f073bca29b2fa4e701a45016 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 8 Jul 2016 21:15:20 +0200 Subject: [PATCH 11/19] Probing now has a Twitter detection. --- include/Probe.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/Probe.php b/include/Probe.php index 9dc2a370b..0ce388dd7 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -197,6 +197,9 @@ class Probe { $host = substr($uri,strpos($uri, '@') + 1); $nick = substr($uri,0, strpos($uri, '@')); + if (strpos($uri, '@twitter.com')) + return array("network" => NETWORK_TWITTER); + $lrdd = self::xrd($host); if (!$lrdd) return self::mail($uri, $uid); @@ -211,6 +214,10 @@ class Probe { // todo: Ports? $host = $parts["host"]; + + if ($host == 'twitter.com') + return array("network" => NETWORK_TWITTER); + $lrdd = self::xrd($host); $path_parts = explode("/", trim($parts["path"], "/")); From f86dac652f987fd8b06fe826a7233689d6858702 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 8 Jul 2016 22:31:11 +0200 Subject: [PATCH 12/19] OStatus now has "location" and "about", probing for urls added --- include/Probe.php | 42 ++++++++++++++++++++++++++++++++---------- include/feed.php | 25 ++++++++++++++++--------- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 0ce388dd7..8a606ecae 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -232,7 +232,6 @@ class Probe { $nick = array_pop($path_parts); $addr = $nick."@".$host; } - $webfinger = false; /// @todo Do we need the prefix "acct:" or "acct://"? @@ -244,9 +243,22 @@ class Probe { if (!in_array($key, array("lrdd", "lrdd-xml", "lrdd-json"))) continue; + // Try webfinger with the address (user@domain.tld) $path = str_replace('{uri}', urlencode($addr), $link); - $webfinger = self::webfinger($path); + + // If webfinger wasn't successful then try it with the URL - possibly in the format https://... + if (!$webfinger AND ($uri != $addr)) { + $path = str_replace('{uri}', urlencode($uri), $link); + $webfinger = self::webfinger($path); + + // Since the detection with the address wasn't successful, we delete it. + if ($webfinger) { + $nick = ""; + $addr = ""; + } + } + } if (!$webfinger) return self::feed($uri); @@ -268,10 +280,10 @@ class Probe { else { // We overwrite the detected nick with our try if the previois routines hadn't detected it. // Additionally it is overwritten when the nickname doesn't make sense (contains spaces). - if (!isset($result["nick"]) OR ($result["nick"] == "") OR (strstr($result["nick"], " "))) + if ((!isset($result["nick"]) OR ($result["nick"] == "") OR (strstr($result["nick"], " "))) AND ($nick != "")) $result["nick"] = $nick; - if (!isset($result["addr"]) OR ($result["addr"] == "")) + if ((!isset($result["addr"]) OR ($result["addr"] == "")) AND ($addr != "")) $result["addr"] = $addr; } @@ -680,8 +692,13 @@ class Probe { */ private function ostatus($webfinger) { - $pubkey = ""; $data = array(); + if (is_array($webfinger["aliases"])) + foreach($webfinger["aliases"] AS $alias) + if (strstr($alias, "@")) + $data["addr"] = str_replace('acct:', '', $alias); + + $pubkey = ""; foreach ($webfinger["links"] AS $link) { if (($link["rel"] == "http://webfinger.net/rel/profile-page") AND ($link["type"] == "text/html") AND ($link["href"] != "")) @@ -736,6 +753,12 @@ class Probe { if ($feed_data["header"]["author-id"] != "") $data["alias"] = $feed_data["header"]["author-id"]; + if ($feed_data["header"]["author-location"] != "") + $data["location"] = $feed_data["header"]["author-location"]; + + if ($feed_data["header"]["author-about"] != "") + $data["about"] = $feed_data["header"]["author-about"]; + // OStatus has serious issues when the the url doesn't fit (ssl vs. non ssl) // So we take the value that we just fetched, although the other one worked as well if ($feed_data["header"]["author-link"] != "") @@ -788,25 +811,24 @@ class Probe { * @return array pump.io data */ private function pumpio($webfinger) { + $data = array(); foreach ($webfinger["links"] AS $link) { if (($link["rel"] == "http://webfinger.net/rel/profile-page") AND ($link["type"] == "text/html") AND ($link["href"] != "")) $data["url"] = $link["href"]; elseif (($link["rel"] == "activity-inbox") AND ($link["href"] != "")) - $data["activity-inbox"] = $link["href"]; + $data["notify"] = $link["href"]; elseif (($link["rel"] == "activity-outbox") AND ($link["href"] != "")) - $data["activity-outbox"] = $link["href"]; + $data["poll"] = $link["href"]; elseif (($link["rel"] == "dialback") AND ($link["href"] != "")) $data["dialback"] = $link["href"]; } - if (isset($data["activity-inbox"]) AND isset($data["activity-outbox"]) AND + if (isset($data["poll"]) AND isset($data["notify"]) AND isset($data["dialback"]) AND isset($data["url"])) { // by now we use these fields only for the network type detection // So we unset all data that isn't used at the moment - unset($data["activity-inbox"]); - unset($data["activity-outbox"]); unset($data["dialback"]); $data["network"] = NETWORK_PUMPIO; diff --git a/include/feed.php b/include/feed.php index 992d10e22..5adb2294d 100644 --- a/include/feed.php +++ b/include/feed.php @@ -59,8 +59,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($attributes->name == "href") $author["author-link"] = $attributes->textContent; - $author["author-id"] = $xpath->evaluate('/atom:feed/atom:author/atom:uri/text()')->item(0)->nodeValue; - if ($author["author-link"] == "") $author["author-link"] = $author["author-id"]; @@ -89,9 +87,22 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($value != "") $author["author-name"] = $value; - $value = $xpath->evaluate('atom:author/poco:preferredUsername/text()')->item(0)->nodeValue; - if ($value != "") - $author["author-nick"] = $value; + if ($simulate) { + $author["author-id"] = $xpath->evaluate('/atom:feed/atom:author/atom:uri/text()')->item(0)->nodeValue; + + $value = $xpath->evaluate('atom:author/poco:preferredUsername/text()')->item(0)->nodeValue; + if ($value != "") + $author["author-nick"] = $value; + + $value = $xpath->evaluate('atom:author/poco:address/poco:formatted/text()', $context)->item(0)->nodeValue; + if ($value != "") + $author["author-location"] = $value; + + $value = $xpath->evaluate('atom:author/poco:note/text()')->item(0)->nodeValue; + if ($value != "") + $author["author-about"] = $value; + + } $author["edited"] = $author["created"] = $xpath->query('/atom:feed/atom:updated/text()')->item(0)->nodeValue; @@ -131,10 +142,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { $author["owner-link"] = $contact["url"]; $author["owner-name"] = $contact["name"]; $author["owner-avatar"] = $contact["thumb"]; - - // This is no field in the item table. So we have to unset it. - unset($author["author-nick"]); - unset($author["author-id"]); } $header = array(); From 058c93a64c110e56a275ba993de71ebe07eef82b Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 8 Jul 2016 22:55:39 +0200 Subject: [PATCH 13/19] Changed documentation --- include/Probe.php | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 8a606ecae..d9778c2ee 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -1,13 +1,7 @@ Date: Sat, 9 Jul 2016 20:09:09 +0200 Subject: [PATCH 14/19] Remove obsolote functions --- include/Probe.php | 104 ++++++++++++++ include/network.php | 320 +------------------------------------------ include/salmon.php | 5 +- mod/dfrn_confirm.php | 3 +- mod/dfrn_request.php | 2 +- mod/photos.php | 3 +- mod/profiles.php | 3 +- mod/webfinger.php | 10 +- 8 files changed, 117 insertions(+), 333 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index d9778c2ee..c31b98a9a 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -75,6 +75,7 @@ class Probe { return false; $xrd_data = array(); + foreach ($links["xrd"]["link"] AS $value => $link) { if (isset($link["@attributes"])) $attributes = $link["@attributes"]; @@ -95,6 +96,109 @@ class Probe { return $xrd_data; } + /** + * @brief Perform Webfinger lookup and return DFRN data + * + * Given an email style address, perform webfinger lookup and + * return the resulting DFRN profile URL, or if no DFRN profile URL + * is located, returns an OStatus subscription template (prefixed + * with the string 'stat:' to identify it as on OStatus template). + * If this isn't an email style address just return $webbie. + * Return an empty string if email-style addresses but webfinger fails, + * or if the resultant personal XRD doesn't contain a supported + * subscription/friend-request attribute. + * + * amended 7/9/2011 to return an hcard which could save potentially loading + * a lengthy content page to scrape dfrn attributes + * + * @param string $webbie Address that should be probed + * @param string $hcard Link to the hcard - is returned by reference + * + * @return string profile link + */ + + public static function webfinger_dfrn($webbie, &$hcard) { + if (!strstr($webbie, '@')) + return $webbie; + + $profile_link = ''; + + $links = self::webfinger($webbie); + logger('webfinger_dfrn: '.$webbie.':'.print_r($links,true), LOGGER_DATA); + if (count($links)) { + foreach ($links as $link) { + if ($link['@attributes']['rel'] === NAMESPACE_DFRN) + $profile_link = $link['@attributes']['href']; + if ($link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) + $profile_link = 'stat:'.$link['@attributes']['template']; + if ($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') + $hcard = $link['@attributes']['href']; + } + } + return $profile_link; + } + + /** + * @brief Check an URI for LRDD data + * + * this is a replacement for the "lrdd" function in include/network.php. + * It isn't used in this class and has some redundancies in the code. + * When time comes we can check the existing calls for "lrdd" if we can rework them. + * + * @param string $uri Address that should be probed + * + * @return array uri data + */ + public static function lrdd($uri) { + + $lrdd = self::xrd($uri); + + if (!$lrdd) { + $parts = @parse_url($uri); + if (!$parts) + return array(); + + $host = $parts["host"]; + + $path_parts = explode("/", trim($parts["path"], "/")); + + do { + $lrdd = self::xrd($host); + $host .= "/".array_shift($path_parts); + } while (!$lrdd AND (sizeof($path_parts) > 0)); + } + + if (!$lrdd) + return array(); + + foreach ($lrdd AS $key => $link) { + if ($webfinger) + continue; + + if (!in_array($key, array("lrdd", "lrdd-xml", "lrdd-json"))) + continue; + + $path = str_replace('{uri}', urlencode($uri), $link); + $webfinger = self::webfinger($path); + } + + if (!is_array($webfinger["links"])) + return false; + + $data = array(); + + foreach ($webfinger["links"] AS $link) + $data[] = array("@attributes" => $link); + + if (is_array($webfinger["aliases"])) + foreach ($webfinger["aliases"] AS $alias) + $data[] = array("@attributes" => + array("rel" => "alias", + "href" => $alias)); + + return $data; + } + /** * @brief Fetch information about a given uri * diff --git a/include/network.php b/include/network.php index 64b6d07c6..f20445d27 100644 --- a/include/network.php +++ b/include/network.php @@ -5,7 +5,7 @@ */ require_once("include/xml.php"); - +require_once('include/Probe.php'); /** * @brief Curl wrapper @@ -371,324 +371,6 @@ function http_status_exit($val, $description = array()) { } -// Given an email style address, perform webfinger lookup and -// return the resulting DFRN profile URL, or if no DFRN profile URL -// is located, returns an OStatus subscription template (prefixed -// with the string 'stat:' to identify it as on OStatus template). -// If this isn't an email style address just return $webbie. -// Return an empty string if email-style addresses but webfinger fails, -// or if the resultant personal XRD doesn't contain a supported -// subscription/friend-request attribute. - -// amended 7/9/2011 to return an hcard which could save potentially loading -// a lengthy content page to scrape dfrn attributes - -function webfinger_dfrn($webbie,&$hcard) { - if(! strstr($webbie,'@')) { - return $webbie; - } - $profile_link = ''; - - $links = webfinger($webbie); - logger('webfinger_dfrn: ' . $webbie . ':' . print_r($links,true), LOGGER_DATA); - if(count($links)) { - foreach($links as $link) { - if($link['@attributes']['rel'] === NAMESPACE_DFRN) - $profile_link = $link['@attributes']['href']; - if($link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) - $profile_link = 'stat:' . $link['@attributes']['template']; - if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') - $hcard = $link['@attributes']['href']; - } - } - return $profile_link; -} - -/** - * @brief Perform webfinger lookup on an email style address - * - * @param string $webbi An email style address - * @param boolean $debug - * - * @return array of link attributes from the personal XRD file - * empty array on error/failure - */ -function webfinger($webbie, $debug = false) { - $host = ''; - if(strstr($webbie,'@')) { - $host = substr($webbie,strpos($webbie,'@') + 1); - } - if(strlen($host)) { - $tpl = fetch_lrdd_template($host); - logger('webfinger: lrdd template: ' . $tpl); - if(strlen($tpl)) { - $pxrd = str_replace('{uri}', urlencode('acct:' . $webbie), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - if(! count($links)) { - // try with double slashes - $pxrd = str_replace('{uri}', urlencode('acct://' . $webbie), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - } - return $links; - } - } - return array(); -} - -function lrdd($uri, $debug = false) { - - $a = get_app(); - - // default priority is host priority, host-meta first - - $priority = 'host'; - - // All we have is an email address. Resource-priority is irrelevant - // because our URI isn't directly resolvable. - - if(strstr($uri,'@')) { - return(webfinger($uri)); - } - - // get the host meta file - - $host = @parse_url($uri); - - if($host) { - $url = ((x($host,'scheme')) ? $host['scheme'] : 'http') . '://'; - $url .= $host['host'] . '/.well-known/host-meta' ; - } - else - return array(); - - logger('lrdd: constructed url: ' . $url); - - $xml = fetch_url($url); - - $headers = $a->get_curl_headers(); - - if (! $xml) - return array(); - - logger('lrdd: host_meta: ' . $xml, LOGGER_DATA); - - if(! stristr($xml,'].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - return(fetch_xrd_links($matches[1])); - break; - } - } - } - } - - - // priority 'resource' - - - $html = fetch_url($uri); - $headers = $a->get_curl_headers(); - logger('lrdd: headers=' . $headers, LOGGER_DEBUG); - - // don't try and parse raw xml as html - if(! strstr($html,'getElementsByTagName('link'); - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if($x == "lrdd") { - $pagelink = $item->getAttribute('href'); - break; - } - } - } - } - - if(isset($pagelink)) - return(fetch_xrd_links($pagelink)); - - // next look in HTTP headers - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - /// @TODO Alter the following regex to support multiple relations (space separated) - if((stristr($line,'link:')) && preg_match('/<([^>].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - $pagelink = $matches[1]; - break; - } - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return array(); - if(stristr($html,''),array('href="','"/>'),$xml); - - $h = parse_xml_string($xml); - if(! $h) - return array(); - - $arr = xml::element_to_array($h); - - $links = array(); - - if(isset($arr['xrd']['link'])) { - $link = $arr['xrd']['link']; - if(! isset($link[0])) - $links = array($link); - else - $links = $link; - } - if(isset($arr['xrd']['alias'])) { - $alias = $arr['xrd']['alias']; - if(! isset($alias[0])) - $aliases = array($alias); - else - $aliases = $alias; - if(is_array($aliases) && count($aliases)) { - foreach($aliases as $alias) { - $links[]['@attributes'] = array('rel' => 'alias' , 'href' => $alias); - } - } - } - - logger('fetch_xrd_links: ' . print_r($links,true), LOGGER_DATA); - - return $links; - -} - /** * @brief Check URL to se if ts's real * diff --git a/include/salmon.php b/include/salmon.php index 988c542e2..8341b902a 100644 --- a/include/salmon.php +++ b/include/salmon.php @@ -1,15 +1,14 @@ '; $o .= 'Lookup address: '; - $o .= ''; + $o .= ''; $o .= '

'; if(x($_GET,'addr')) { $addr = trim($_GET['addr']); - if(strpos($addr,'@' !== false)) - $res = webfinger($addr); - else - $res = lrdd($addr); + $res = Probe::lrdd($addr); $o .= '
';
 		$o .= str_replace("\n",'
',print_r($res,true)); $o .= '
'; From b9da831138667700d465f15ad50df650ab4da427 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 10 Jul 2016 09:27:40 +0200 Subject: [PATCH 15/19] Fallback for empty names --- include/Probe.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index c31b98a9a..df2246f20 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -232,10 +232,15 @@ class Probe { else $data["photo"] = App::get_baseurl().'/images/person-175.jpg'; - if (!isset($data["name"])) - $data["name"] = $data["url"]; + if (!isset($data["name"]) OR ($data["name"] == "")) { + if (isset($data["nick"])) + $data["name"] = $data["nick"]; - if (!isset($data["nick"])) { + if ($data["name"] == "") + $data["name"] = $data["url"]; + } + + if (!isset($data["nick"]) OR ($data["nick"] == "")) { $data["nick"] = strtolower($data["name"]); if (strpos($data['nick'], ' ')) From 8a11a60932115725d65791d99a817aefbde174b7 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 10 Jul 2016 09:29:43 +0200 Subject: [PATCH 16/19] Small text correction --- include/Probe.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Probe.php b/include/Probe.php index df2246f20..f8c35acbe 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -289,7 +289,7 @@ class Probe { */ private function detect($uri, $network, $uid) { if (strstr($uri, '@')) { - // If the URI starts with "mailto:" then jum directly to the mail detection + // If the URI starts with "mailto:" then jump directly to the mail detection if (strpos($url,'mailto:') !== false) { $uri = str_replace('mailto:', '', $url); return self::mail($uri, $uid); From 19d820a9c4b9c9b3e2234347b5fc68e48c8c9a75 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 10 Jul 2016 14:08:27 +0200 Subject: [PATCH 17/19] Added fcontact doc --- doc/database/db_fcontact.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/database/db_fcontact.md b/doc/database/db_fcontact.md index e1cecbf87..280160522 100644 --- a/doc/database/db_fcontact.md +++ b/doc/database/db_fcontact.md @@ -4,6 +4,7 @@ Table fcontact | Field | Description | Type | Null | Key | Default | Extra | | -------- | ------------- | ---------------- | ---- | --- | ------------------- | --------------- | | id | sequential ID | int(10) unsigned | NO | PRI | NULL | auto_increment | +| guid | unique id | varchar(64) | NO | | | | | url | | varchar(255) | NO | | | | | name | | varchar(255) | NO | | | | | photo | | varchar(255) | NO | | | | From aad3fb2d32ac7999de0b5da0f761806d20104fd9 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sun, 10 Jul 2016 18:44:48 +0200 Subject: [PATCH 18/19] Some more documentation --- include/Probe.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index f8c35acbe..8d4103aff 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -200,7 +200,7 @@ class Probe { } /** - * @brief Fetch information about a given uri + * @brief Fetch information (protocol endpoints and user information) about a given uri * * @param string $uri Address that should be probed * @param string $network Test for this specific network @@ -279,7 +279,9 @@ class Probe { } /** - * @brief Detect information about a given uri + * @brief Fetch information (protocol endpoints and user information) about a given uri + * + * This function is only called by the "uri" function that adds caching and rearranging of data. * * @param string $uri Address that should be probed * @param string $network Test for this specific network @@ -406,7 +408,7 @@ class Probe { } /** - * @brief Do a webfinger request + * @brief Do a webfinger request. For details see RFC 7033: * * @param string $url Address that should be probed * @@ -460,6 +462,8 @@ class Probe { /** * @brief Poll the noscrape page (Friendica specific) * + * "noscrape" is a faster alternative to fetching the data from the hcard. + * * @param string $noscrape Link to the noscrape page * @param array $data The already fetched data * @@ -543,7 +547,7 @@ class Probe { } /** - * @brief Fetch data from a DFRN profile page + * @brief Fetch data from a DFRN profile page and via "noscrape" * * @param string $profile Link to the profile page * From ee7f10f13086be1eaca5380942f1639773cfc9f2 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 11 Jul 2016 19:48:37 +0200 Subject: [PATCH 19/19] Some more little documentation --- include/Probe.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/Probe.php b/include/Probe.php index 8d4103aff..7cda542d0 100644 --- a/include/Probe.php +++ b/include/Probe.php @@ -408,7 +408,9 @@ class Probe { } /** - * @brief Do a webfinger request. For details see RFC 7033: + * @brief Perform a webfinger request. + * + * For details see RFC 7033: * * @param string $url Address that should be probed * @@ -460,9 +462,10 @@ class Probe { } /** - * @brief Poll the noscrape page (Friendica specific) + * @brief Poll the Friendica specific noscrape page. * - * "noscrape" is a faster alternative to fetching the data from the hcard. + * "noscrape" is a faster alternative to fetch the data from the hcard. + * This functionality was originally created for the directory. * * @param string $noscrape Link to the noscrape page * @param array $data The already fetched data