diff --git a/doc/database/db_fcontact.md b/doc/database/db_fcontact.md index e1cecbf87..280160522 100644 --- a/doc/database/db_fcontact.md +++ b/doc/database/db_fcontact.md @@ -4,6 +4,7 @@ Table fcontact | Field | Description | Type | Null | Key | Default | Extra | | -------- | ------------- | ---------------- | ---- | --- | ------------------- | --------------- | | id | sequential ID | int(10) unsigned | NO | PRI | NULL | auto_increment | +| guid | unique id | varchar(64) | NO | | | | | url | | varchar(255) | NO | | | | | name | | varchar(255) | NO | | | | | photo | | varchar(255) | NO | | | | diff --git a/include/Probe.php b/include/Probe.php new file mode 100644 index 000000000..7cda542d0 --- /dev/null +++ b/include/Probe.php @@ -0,0 +1,1121 @@ + Link to LRDD endpoint + * 'lrdd-xml' => Link to LRDD endpoint in XML format + * 'lrdd-json' => Link to LRDD endpoint in JSON format + */ + private function xrd($host) { + + $ssl_url = "https://".$host."/.well-known/host-meta"; + $url = "http://".$host."/.well-known/host-meta"; + + $xrd_timeout = Config::get('system','xrd_timeout', 20); + $redirects = 0; + + $xml = fetch_url($ssl_url, false, $redirects, $xrd_timeout, "application/xrd+xml"); + $xrd = parse_xml_string($xml, false); + + if (!is_object($xrd)) { + $xml = fetch_url($url, false, $redirects, $xrd_timeout, "application/xrd+xml"); + $xrd = parse_xml_string($xml, false); + } + if (!is_object($xrd)) + return false; + + $links = xml::element_to_array($xrd); + if (!isset($links["xrd"]["link"])) + return false; + + $xrd_data = array(); + + foreach ($links["xrd"]["link"] AS $value => $link) { + if (isset($link["@attributes"])) + $attributes = $link["@attributes"]; + elseif ($value == "@attributes") + $attributes = $link; + else + continue; + + if (($attributes["rel"] == "lrdd") AND + ($attributes["type"] == "application/xrd+xml")) + $xrd_data["lrdd-xml"] = $attributes["template"]; + elseif (($attributes["rel"] == "lrdd") AND + ($attributes["type"] == "application/json")) + $xrd_data["lrdd-json"] = $attributes["template"]; + elseif ($attributes["rel"] == "lrdd") + $xrd_data["lrdd"] = $attributes["template"]; + } + return $xrd_data; + } + + /** + * @brief Perform Webfinger lookup and return DFRN data + * + * Given an email style address, perform webfinger lookup and + * return the resulting DFRN profile URL, or if no DFRN profile URL + * is located, returns an OStatus subscription template (prefixed + * with the string 'stat:' to identify it as on OStatus template). + * If this isn't an email style address just return $webbie. + * Return an empty string if email-style addresses but webfinger fails, + * or if the resultant personal XRD doesn't contain a supported + * subscription/friend-request attribute. + * + * amended 7/9/2011 to return an hcard which could save potentially loading + * a lengthy content page to scrape dfrn attributes + * + * @param string $webbie Address that should be probed + * @param string $hcard Link to the hcard - is returned by reference + * + * @return string profile link + */ + + public static function webfinger_dfrn($webbie, &$hcard) { + if (!strstr($webbie, '@')) + return $webbie; + + $profile_link = ''; + + $links = self::webfinger($webbie); + logger('webfinger_dfrn: '.$webbie.':'.print_r($links,true), LOGGER_DATA); + if (count($links)) { + foreach ($links as $link) { + if ($link['@attributes']['rel'] === NAMESPACE_DFRN) + $profile_link = $link['@attributes']['href']; + if ($link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) + $profile_link = 'stat:'.$link['@attributes']['template']; + if ($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') + $hcard = $link['@attributes']['href']; + } + } + return $profile_link; + } + + /** + * @brief Check an URI for LRDD data + * + * this is a replacement for the "lrdd" function in include/network.php. + * It isn't used in this class and has some redundancies in the code. + * When time comes we can check the existing calls for "lrdd" if we can rework them. + * + * @param string $uri Address that should be probed + * + * @return array uri data + */ + public static function lrdd($uri) { + + $lrdd = self::xrd($uri); + + if (!$lrdd) { + $parts = @parse_url($uri); + if (!$parts) + return array(); + + $host = $parts["host"]; + + $path_parts = explode("/", trim($parts["path"], "/")); + + do { + $lrdd = self::xrd($host); + $host .= "/".array_shift($path_parts); + } while (!$lrdd AND (sizeof($path_parts) > 0)); + } + + if (!$lrdd) + return array(); + + foreach ($lrdd AS $key => $link) { + if ($webfinger) + continue; + + if (!in_array($key, array("lrdd", "lrdd-xml", "lrdd-json"))) + continue; + + $path = str_replace('{uri}', urlencode($uri), $link); + $webfinger = self::webfinger($path); + } + + if (!is_array($webfinger["links"])) + return false; + + $data = array(); + + foreach ($webfinger["links"] AS $link) + $data[] = array("@attributes" => $link); + + if (is_array($webfinger["aliases"])) + foreach ($webfinger["aliases"] AS $alias) + $data[] = array("@attributes" => + array("rel" => "alias", + "href" => $alias)); + + return $data; + } + + /** + * @brief Fetch information (protocol endpoints and user information) about a given uri + * + * @param string $uri Address that should be probed + * @param string $network Test for this specific network + * @param integer $uid User ID for the probe (only used for mails) + * @param boolean $cache Use cached values? + * + * @return array uri data + */ + public static function uri($uri, $network = "", $uid = 0, $cache = true) { + + if ($cache) { + $result = Cache::get("probe_url:".$network.":".$uri); + if (!is_null($result)) { + $result = unserialize($result); + return $result; + } + } + + if ($uid == 0) + $uid = local_user(); + + $data = self::detect($uri, $network, $uid); + + if (!isset($data["url"])) + $data["url"] = $uri; + + if ($data["photo"] != "") + $data["baseurl"] = matching_url(normalise_link($data["baseurl"]), normalise_link($data["photo"])); + else + $data["photo"] = App::get_baseurl().'/images/person-175.jpg'; + + if (!isset($data["name"]) OR ($data["name"] == "")) { + if (isset($data["nick"])) + $data["name"] = $data["nick"]; + + if ($data["name"] == "") + $data["name"] = $data["url"]; + } + + if (!isset($data["nick"]) OR ($data["nick"] == "")) { + $data["nick"] = strtolower($data["name"]); + + if (strpos($data['nick'], ' ')) + $data['nick'] = trim(substr($data['nick'], 0, strpos($data['nick'], ' '))); + } + + if (!isset($data["network"])) + $data["network"] = NETWORK_PHANTOM; + + $data = self::rearrange_data($data); + + // Only store into the cache if the value seems to be valid + if (!in_array($data['network'], array(NETWORK_PHANTOM, NETWORK_MAIL))) { + Cache::set("probe_url:".$network.":".$uri,serialize($data), CACHE_DAY); + + /// @todo temporary fix - we need a real contact update function that updates only changing fields + /// The biggest problem is the avatar picture that could have a reduced image size. + /// It should only be updated if the existing picture isn't existing anymore. + if (($data['network'] != NETWORK_FEED) AND ($mode == PROBE_NORMAL) AND + $data["name"] AND $data["nick"] AND $data["url"] AND $data["addr"] AND $data["poll"]) + q("UPDATE `contact` SET `name` = '%s', `nick` = '%s', `url` = '%s', `addr` = '%s', + `notify` = '%s', `poll` = '%s', `alias` = '%s', `success_update` = '%s' + WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", + dbesc($data["name"]), + dbesc($data["nick"]), + dbesc($data["url"]), + dbesc($data["addr"]), + dbesc($data["notify"]), + dbesc($data["poll"]), + dbesc($data["alias"]), + dbesc(datetime_convert()), + dbesc(normalise_link($data['url'])) + ); + } + return $data; + } + + /** + * @brief Fetch information (protocol endpoints and user information) about a given uri + * + * This function is only called by the "uri" function that adds caching and rearranging of data. + * + * @param string $uri Address that should be probed + * @param string $network Test for this specific network + * @param integer $uid User ID for the probe (only used for mails) + * + * @return array uri data + */ + private function detect($uri, $network, $uid) { + if (strstr($uri, '@')) { + // If the URI starts with "mailto:" then jump directly to the mail detection + if (strpos($url,'mailto:') !== false) { + $uri = str_replace('mailto:', '', $url); + return self::mail($uri, $uid); + } + + if ($network == NETWORK_MAIL) + return self::mail($uri, $uid); + + // Remove "acct:" from the URI + $uri = str_replace('acct:', '', $uri); + + $host = substr($uri,strpos($uri, '@') + 1); + $nick = substr($uri,0, strpos($uri, '@')); + + if (strpos($uri, '@twitter.com')) + return array("network" => NETWORK_TWITTER); + + $lrdd = self::xrd($host); + if (!$lrdd) + return self::mail($uri, $uid); + + $addr = $uri; + } else { + $parts = parse_url($uri); + if (!isset($parts["scheme"]) OR + !isset($parts["host"]) OR + !isset($parts["path"])) + return false; + + // todo: Ports? + $host = $parts["host"]; + + if ($host == 'twitter.com') + return array("network" => NETWORK_TWITTER); + + $lrdd = self::xrd($host); + + $path_parts = explode("/", trim($parts["path"], "/")); + + while (!$lrdd AND (sizeof($path_parts) > 1)) { + $host .= "/".array_shift($path_parts); + $lrdd = self::xrd($host); + } + if (!$lrdd) + return self::feed($uri); + + $nick = array_pop($path_parts); + $addr = $nick."@".$host; + } + $webfinger = false; + + /// @todo Do we need the prefix "acct:" or "acct://"? + + foreach ($lrdd AS $key => $link) { + if ($webfinger) + continue; + + if (!in_array($key, array("lrdd", "lrdd-xml", "lrdd-json"))) + continue; + + // Try webfinger with the address (user@domain.tld) + $path = str_replace('{uri}', urlencode($addr), $link); + $webfinger = self::webfinger($path); + + // If webfinger wasn't successful then try it with the URL - possibly in the format https://... + if (!$webfinger AND ($uri != $addr)) { + $path = str_replace('{uri}', urlencode($uri), $link); + $webfinger = self::webfinger($path); + + // Since the detection with the address wasn't successful, we delete it. + if ($webfinger) { + $nick = ""; + $addr = ""; + } + } + + } + if (!$webfinger) + return self::feed($uri); + + $result = false; + + logger("Probing ".$uri, LOGGER_DEBUG); + + if (in_array($network, array("", NETWORK_DFRN))) + $result = self::dfrn($webfinger); + if ((!$result AND ($network == "")) OR ($network == NETWORK_DIASPORA)) + $result = self::diaspora($webfinger); + if ((!$result AND ($network == "")) OR ($network == NETWORK_OSTATUS)) + $result = self::ostatus($webfinger); + if ((!$result AND ($network == "")) OR ($network == NETWORK_PUMPIO)) + $result = self::pumpio($webfinger); + if ((!$result AND ($network == "")) OR ($network == NETWORK_FEED)) + $result = self::feed($uri); + else { + // We overwrite the detected nick with our try if the previois routines hadn't detected it. + // Additionally it is overwritten when the nickname doesn't make sense (contains spaces). + if ((!isset($result["nick"]) OR ($result["nick"] == "") OR (strstr($result["nick"], " "))) AND ($nick != "")) + $result["nick"] = $nick; + + if ((!isset($result["addr"]) OR ($result["addr"] == "")) AND ($addr != "")) + $result["addr"] = $addr; + } + + logger($uri." is ".$result["network"], LOGGER_DEBUG); + + if (!isset($result["baseurl"]) OR ($result["baseurl"] == "")) { + $pos = strpos($result["url"], $host); + if ($pos) + $result["baseurl"] = substr($result["url"], 0, $pos).$host; + } + + return $result; + } + + /** + * @brief Perform a webfinger request. + * + * For details see RFC 7033: + * + * @param string $url Address that should be probed + * + * @return array webfinger data + */ + private function webfinger($url) { + + $xrd_timeout = Config::get('system','xrd_timeout', 20); + $redirects = 0; + + $data = fetch_url($url, false, $redirects, $xrd_timeout, "application/xrd+xml"); + $xrd = parse_xml_string($data, false); + + if (!is_object($xrd)) { + // If it is not XML, maybe it is JSON + $webfinger = json_decode($data, true); + + if (!isset($webfinger["links"])) + return false; + + return $webfinger; + } + + $xrd_arr = xml::element_to_array($xrd); + if (!isset($xrd_arr["xrd"]["link"])) + return false; + + $webfinger = array(); + + if (isset($xrd_arr["xrd"]["subject"])) + $webfinger["subject"] = $xrd_arr["xrd"]["subject"]; + + if (isset($xrd_arr["xrd"]["alias"])) + $webfinger["aliases"] = $xrd_arr["xrd"]["alias"]; + + $webfinger["links"] = array(); + + foreach ($xrd_arr["xrd"]["link"] AS $value => $data) { + if (isset($data["@attributes"])) + $attributes = $data["@attributes"]; + elseif ($value == "@attributes") + $attributes = $data; + else + continue; + + $webfinger["links"][] = $attributes; + } + return $webfinger; + } + + /** + * @brief Poll the Friendica specific noscrape page. + * + * "noscrape" is a faster alternative to fetch the data from the hcard. + * This functionality was originally created for the directory. + * + * @param string $noscrape Link to the noscrape page + * @param array $data The already fetched data + * + * @return array noscrape data + */ + private function poll_noscrape($noscrape, $data) { + $content = fetch_url($noscrape); + if (!$content) + return false; + + $json = json_decode($content, true); + if (!is_array($json)) + return false; + + if (isset($json["fn"])) + $data["name"] = $json["fn"]; + + if (isset($json["addr"])) + $data["addr"] = $json["addr"]; + + if (isset($json["nick"])) + $data["nick"] = $json["nick"]; + + if (isset($json["comm"])) + $data["community"] = $json["comm"]; + + if (isset($json["tags"])) { + $keywords = implode(" ", $json["tags"]); + if ($keywords != "") + $data["keywords"] = $keywords; + } + + $location = formatted_location($json); + if ($location) + $data["location"] = $location; + + if (isset($json["about"])) + $data["about"] = $json["about"]; + + if (isset($json["key"])) + $data["pubkey"] = $json["key"]; + + if (isset($json["photo"])) + $data["photo"] = $json["photo"]; + + if (isset($json["dfrn-request"])) + $data["request"] = $json["dfrn-request"]; + + if (isset($json["dfrn-confirm"])) + $data["confirm"] = $json["dfrn-confirm"]; + + if (isset($json["dfrn-notify"])) + $data["notify"] = $json["dfrn-notify"]; + + if (isset($json["dfrn-poll"])) + $data["poll"] = $json["dfrn-poll"]; + + return $data; + } + + /** + * @brief Check for valid DFRN data + * + * @param array $data DFRN data + * + * @return int Number of errors + */ + public static function valid_dfrn($data) { + $errors = 0; + if(!isset($data['key'])) + $errors ++; + if(!isset($data['dfrn-request'])) + $errors ++; + if(!isset($data['dfrn-confirm'])) + $errors ++; + if(!isset($data['dfrn-notify'])) + $errors ++; + if(!isset($data['dfrn-poll'])) + $errors ++; + return $errors; + } + + /** + * @brief Fetch data from a DFRN profile page and via "noscrape" + * + * @param string $profile Link to the profile page + * + * @return array profile data + */ + public static function profile($profile) { + + $data = array(); + + // Fetch data via noscrape - this is faster + $noscrape = str_replace(array("/hcard/", "/profile/"), "/noscrape/", $profile); + $data = self::poll_noscrape($noscrape, $data); + + if (!isset($data["notify"]) OR !isset($data["confirm"]) OR + !isset($data["request"]) OR !isset($data["poll"]) OR + !isset($data["poco"]) OR !isset($data["name"]) OR + !isset($data["photo"])) + $data = self::poll_hcard($profile, $data, true); + + $prof_data = array(); + $prof_data["addr"] = $data["addr"]; + $prof_data["nick"] = $data["nick"]; + $prof_data["dfrn-request"] = $data["request"]; + $prof_data["dfrn-confirm"] = $data["confirm"]; + $prof_data["dfrn-notify"] = $data["notify"]; + $prof_data["dfrn-poll"] = $data["poll"]; + $prof_data["dfrn-poco"] = $data["poco"]; + $prof_data["photo"] = $data["photo"]; + $prof_data["fn"] = $data["name"]; + $prof_data["key"] = $data["pubkey"]; + + return $prof_data; + } + + /** + * @brief Check for DFRN contact + * + * @param array $webfinger Webfinger data + * + * @return array DFRN data + */ + private function dfrn($webfinger) { + + $hcard = ""; + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == NAMESPACE_DFRN) AND ($link["href"] != "")) + $data["network"] = NETWORK_DFRN; + elseif (($link["rel"] == NAMESPACE_FEED) AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == "http://microformats.org/profile/hcard") AND ($link["href"] != "")) + $hcard = $link["href"]; + elseif (($link["rel"] == NAMESPACE_POCO) AND ($link["href"] != "")) + $data["poco"] = $link["href"]; + elseif (($link["rel"] == "http://webfinger.net/rel/avatar") AND ($link["href"] != "")) + $data["photo"] = $link["href"]; + + elseif (($link["rel"] == "http://joindiaspora.com/seed_location") AND ($link["href"] != "")) + $data["baseurl"] = trim($link["href"], '/'); + elseif (($link["rel"] == "http://joindiaspora.com/guid") AND ($link["href"] != "")) + $data["guid"] = $link["href"]; + elseif (($link["rel"] == "diaspora-public-key") AND ($link["href"] != "")) { + $data["pubkey"] = base64_decode($link["href"]); + + //if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + if (strstr($data["pubkey"], 'RSA ')) + $data["pubkey"] = rsatopem($data["pubkey"]); + } + } + + if (!isset($data["network"]) OR ($hcard == "")) + return false; + + // Fetch data via noscrape - this is faster + $noscrape = str_replace("/hcard/", "/noscrape/", $hcard); + $data = self::poll_noscrape($noscrape, $data); + + if (isset($data["notify"]) AND isset($data["confirm"]) AND isset($data["request"]) AND + isset($data["poll"]) AND isset($data["name"]) AND isset($data["photo"])) + return $data; + + $data = self::poll_hcard($hcard, $data, true); + + return $data; + } + + /** + * @brief Poll the hcard page (Diaspora and Friendica specific) + * + * @param string $hcard Link to the hcard page + * @param array $data The already fetched data + * @param boolean $dfrn Poll DFRN specific data + * + * @return array hcard data + */ + private function poll_hcard($hcard, $data, $dfrn = false) { + + $doc = new DOMDocument(); + if (!@$doc->loadHTMLFile($hcard)) + return false; + + $xpath = new DomXPath($doc); + + $vcards = $xpath->query("//div[contains(concat(' ', @class, ' '), ' vcard ')]"); + if (!is_object($vcards)) + return false; + + if ($vcards->length == 0) + return false; + + $vcard = $vcards->item(0); + + // We have to discard the guid from the hcard in favour of the guid from lrdd + // Reason: Hubzilla doesn't use the value "uid" in the hcard like Diaspora does. + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' uid ')]", $vcard); // */ + if (($search->length > 0) AND ($data["guid"] == "")) + $data["guid"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' nickname ')]", $vcard); // */ + if ($search->length > 0) + $data["nick"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' fn ')]", $vcard); // */ + if ($search->length > 0) + $data["name"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' searchable ')]", $vcard); // */ + if ($search->length > 0) + $data["searchable"] = $search->item(0)->nodeValue; + + $search = $xpath->query("//*[contains(concat(' ', @class, ' '), ' key ')]", $vcard); // */ + if ($search->length > 0) { + $data["pubkey"] = $search->item(0)->nodeValue; + if (strstr($data["pubkey"], 'RSA ')) + $data["pubkey"] = rsatopem($data["pubkey"]); + } + + $search = $xpath->query("//*[@id='pod_location']", $vcard); // */ + if ($search->length > 0) + $data["baseurl"] = trim($search->item(0)->nodeValue, "/"); + + $avatar = array(); + $photos = $xpath->query("//*[contains(concat(' ', @class, ' '), ' photo ') or contains(concat(' ', @class, ' '), ' avatar ')]", $vcard); // */ + foreach ($photos AS $photo) { + $attr = array(); + foreach ($photo->attributes as $attribute) + $attr[$attribute->name] = trim($attribute->value); + + if (isset($attr["src"]) AND isset($attr["width"])) + $avatar[$attr["width"]] = $attr["src"]; + } + + if (sizeof($avatar)) { + ksort($avatar); + $data["photo"] = array_pop($avatar); + } + + if ($dfrn) { + // Poll DFRN specific data + $search = $xpath->query("//link[contains(concat(' ', @rel), ' dfrn-')]"); + if ($search->length > 0) { + foreach ($search AS $link) { + //$data["request"] = $search->item(0)->nodeValue; + $attr = array(); + foreach ($link->attributes as $attribute) + $attr[$attribute->name] = trim($attribute->value); + + $data[substr($attr["rel"], 5)] = $attr["href"]; + } + } + + // Older Friendica versions had used the "uid" field differently than newer versions + if ($data["nick"] == $data["guid"]) + unset($data["guid"]); + } + + + return $data; + } + + /** + * @brief Check for Diaspora contact + * + * @param array $webfinger Webfinger data + * + * @return array Diaspora data + */ + private function diaspora($webfinger) { + + $hcard = ""; + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == "http://microformats.org/profile/hcard") AND ($link["href"] != "")) + $hcard = $link["href"]; + elseif (($link["rel"] == "http://joindiaspora.com/seed_location") AND ($link["href"] != "")) + $data["baseurl"] = trim($link["href"], '/'); + elseif (($link["rel"] == "http://joindiaspora.com/guid") AND ($link["href"] != "")) + $data["guid"] = $link["href"]; + elseif (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == NAMESPACE_FEED) AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == NAMESPACE_POCO) AND ($link["href"] != "")) + $data["poco"] = $link["href"]; + elseif (($link["rel"] == "salmon") AND ($link["href"] != "")) + $data["notify"] = $link["href"]; + elseif (($link["rel"] == "diaspora-public-key") AND ($link["href"] != "")) { + $data["pubkey"] = base64_decode($link["href"]); + + //if (strstr($data["pubkey"], 'RSA ') OR ($link["type"] == "RSA")) + if (strstr($data["pubkey"], 'RSA ')) + $data["pubkey"] = rsatopem($data["pubkey"]); + } + } + + if (!isset($data["url"]) OR ($hcard == "")) + return false; + + if (is_array($webfinger["aliases"])) + foreach ($webfinger["aliases"] AS $alias) + if (normalise_link($alias) != normalise_link($data["url"]) AND !strstr($alias, "@")) + $data["alias"] = $alias; + + // Fetch further information from the hcard + $data = self::poll_hcard($hcard, $data); + + if (!$data) + return false; + + if (isset($data["url"]) AND isset($data["guid"]) AND isset($data["baseurl"]) AND + isset($data["pubkey"]) AND ($hcard != "")) { + $data["network"] = NETWORK_DIASPORA; + + // We have to overwrite the detected value for "notify" since Hubzilla doesn't send it + $data["notify"] = $data["baseurl"]."/receive/users/".$data["guid"]; + $data["batch"] = $data["baseurl"]."/receive/public"; + } else + return false; + + return $data; + } + + /** + * @brief Check for OStatus contact + * + * @param array $webfinger Webfinger data + * + * @return array OStatus data + */ + private function ostatus($webfinger) { + + $data = array(); + if (is_array($webfinger["aliases"])) + foreach($webfinger["aliases"] AS $alias) + if (strstr($alias, "@")) + $data["addr"] = str_replace('acct:', '', $alias); + + $pubkey = ""; + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == "salmon") AND ($link["href"] != "")) + $data["notify"] = $link["href"]; + elseif (($link["rel"] == NAMESPACE_FEED) AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == "magic-public-key") AND ($link["href"] != "")) { + $pubkey = $link["href"]; + + if (substr($pubkey, 0, 5) === 'data:') { + if (strstr($pubkey, ',')) + $pubkey = substr($pubkey, strpos($pubkey, ',') + 1); + else + $pubkey = substr($pubkey, 5); + } else + $pubkey = fetch_url($pubkey); + + $key = explode(".", $pubkey); + + if (sizeof($key) >= 3) { + $m = base64url_decode($key[1]); + $e = base64url_decode($key[2]); + $data["pubkey"] = metopem($m,$e); + } + + } + } + + if (isset($data["notify"]) AND isset($data["pubkey"]) AND + isset($data["poll"]) AND isset($data["url"])) { + $data["network"] = NETWORK_OSTATUS; + } else + return false; + + // Fetch all additional data from the feed + $feed = fetch_url($data["poll"]); + $feed_data = feed_import($feed,$dummy1,$dummy2, $dummy3, true); + if (!$feed_data) + return false; + + if ($feed_data["header"]["author-name"] != "") + $data["name"] = $feed_data["header"]["author-name"]; + + if ($feed_data["header"]["author-nick"] != "") + $data["nick"] = $feed_data["header"]["author-nick"]; + + if ($feed_data["header"]["author-avatar"] != "") + $data["photo"] = $feed_data["header"]["author-avatar"]; + + if ($feed_data["header"]["author-id"] != "") + $data["alias"] = $feed_data["header"]["author-id"]; + + if ($feed_data["header"]["author-location"] != "") + $data["location"] = $feed_data["header"]["author-location"]; + + if ($feed_data["header"]["author-about"] != "") + $data["about"] = $feed_data["header"]["author-about"]; + + // OStatus has serious issues when the the url doesn't fit (ssl vs. non ssl) + // So we take the value that we just fetched, although the other one worked as well + if ($feed_data["header"]["author-link"] != "") + $data["url"] = $feed_data["header"]["author-link"]; + + /// @todo Fetch location and "about" from the feed as well + return $data; + } + + /** + * @brief Fetch data from a pump.io profile page + * + * @param string $profile Link to the profile page + * + * @return array profile data + */ + private function pumpio_profile_data($profile) { + + $doc = new DOMDocument(); + if (!@$doc->loadHTMLFile($profile)) + return false; + + $xpath = new DomXPath($doc); + + $data = array(); + + // This is ugly - but pump.io doesn't seem to know a better way for it + $data["name"] = trim($xpath->query("//h1[@class='media-header']")->item(0)->nodeValue); + $pos = strpos($data["name"], chr(10)); + if ($pos) + $data["name"] = trim(substr($data["name"], 0, $pos)); + + $avatar = $xpath->query("//img[@class='img-rounded media-object']")->item(0); + if ($avatar) + foreach ($avatar->attributes as $attribute) + if ($attribute->name == "src") + $data["photo"] = trim($attribute->value); + + $data["location"] = $xpath->query("//p[@class='location']")->item(0)->nodeValue; + $data["about"] = $xpath->query("//p[@class='summary']")->item(0)->nodeValue; + + return $data; + } + + /** + * @brief Check for pump.io contact + * + * @param array $webfinger Webfinger data + * + * @return array pump.io data + */ + private function pumpio($webfinger) { + + $data = array(); + foreach ($webfinger["links"] AS $link) { + if (($link["rel"] == "http://webfinger.net/rel/profile-page") AND + ($link["type"] == "text/html") AND ($link["href"] != "")) + $data["url"] = $link["href"]; + elseif (($link["rel"] == "activity-inbox") AND ($link["href"] != "")) + $data["notify"] = $link["href"]; + elseif (($link["rel"] == "activity-outbox") AND ($link["href"] != "")) + $data["poll"] = $link["href"]; + elseif (($link["rel"] == "dialback") AND ($link["href"] != "")) + $data["dialback"] = $link["href"]; + } + if (isset($data["poll"]) AND isset($data["notify"]) AND + isset($data["dialback"]) AND isset($data["url"])) { + + // by now we use these fields only for the network type detection + // So we unset all data that isn't used at the moment + unset($data["dialback"]); + + $data["network"] = NETWORK_PUMPIO; + } else + return false; + + $profile_data = self::pumpio_profile_data($data["url"]); + + if (!$profile_data) + return false; + + $data = array_merge($data, $profile_data); + + return $data; + } + + /** + * @brief Check page for feed link + * + * @param string $url Page link + * + * @return string feed link + */ + private function get_feed_link($url) { + $doc = new DOMDocument(); + + if (!@$doc->loadHTMLFile($url)) + return false; + + $xpath = new DomXPath($doc); + + //$feeds = $xpath->query("/html/head/link[@type='application/rss+xml']"); + $feeds = $xpath->query("/html/head/link[@type='application/rss+xml' and @rel='alternate']"); + if (!is_object($feeds)) + return false; + + if ($feeds->length == 0) + return false; + + $feed_url = ""; + + foreach ($feeds AS $feed) { + $attr = array(); + foreach ($feed->attributes as $attribute) + $attr[$attribute->name] = trim($attribute->value); + + if ($feed_url == "") + $feed_url = $attr["href"]; + } + + return $feed_url; + } + + /** + * @brief Check for feed contact + * + * @param string $url Profile link + * @param boolean $probe Do a probe if the page contains a feed link + * + * @return array feed data + */ + private function feed($url, $probe = true) { + $feed = fetch_url($url); + $feed_data = feed_import($feed, $dummy1, $dummy2, $dummy3, true); + + if (!$feed_data) { + if (!$probe) + return false; + + $feed_url = self::get_feed_link($url); + + if (!$feed_url) + return false; + + return self::feed($feed_url, false); + } + + if ($feed_data["header"]["author-name"] != "") + $data["name"] = $feed_data["header"]["author-name"]; + + if ($feed_data["header"]["author-nick"] != "") + $data["nick"] = $feed_data["header"]["author-nick"]; + + if ($feed_data["header"]["author-avatar"] != "") + $data["photo"] = $feed_data["header"]["author-avatar"]; + + if ($feed_data["header"]["author-id"] != "") + $data["alias"] = $feed_data["header"]["author-id"]; + + $data["url"] = $url; + $data["poll"] = $url; + + if ($feed_data["header"]["author-link"] != "") + $data["baseurl"] = $feed_data["header"]["author-link"]; + else + $data["baseurl"] = $data["url"]; + + $data["network"] = NETWORK_FEED; + + return $data; + } + + /** + * @brief Check for mail contact + * + * @param string $uri Profile link + * @param integer $uid User ID + * + * @return array mail data + */ + private function mail($uri, $uid) { + + if (!validate_email($uri)) + return false; + + $x = q("SELECT `prvkey` FROM `user` WHERE `uid` = %d LIMIT 1", intval($uid)); + + $r = q("SELECT * FROM `mailacct` WHERE `uid` = %d AND `server` != '' LIMIT 1", intval($uid)); + + if(count($x) && count($r)) { + $mailbox = construct_mailbox_name($r[0]); + $password = ''; + openssl_private_decrypt(hex2bin($r[0]['pass']), $password,$x[0]['prvkey']); + $mbox = email_connect($mailbox,$r[0]['user'], $password); + if(!mbox) + return false; + } + + $msgs = email_poll($mbox, $uri); + logger('searching '.$uri.', '.count($msgs).' messages found.', LOGGER_DEBUG); + + if (!count($msgs)) + return false; + + $data = array(); + + $data["addr"] = $uri; + $data["network"] = NETWORK_MAIL; + $data["name"] = substr($uri, 0, strpos($uri,'@')); + $data["nick"] = $data["name"]; + $data["photo"] = avatar_img($uri); + + $phost = substr($uri, strpos($uri,'@') + 1); + $data["url"] = 'http://'.$phost."/".$data["nick"]; + $data["notify"] = 'smtp '.random_string(); + $data["poll"] = 'email '.random_string(); + + $x = email_msg_meta($mbox, $msgs[0]); + if(stristr($x[0]->from, $uri)) + $adr = imap_rfc822_parse_adrlist($x[0]->from, ''); + elseif(stristr($x[0]->to, $uri)) + $adr = imap_rfc822_parse_adrlist($x[0]->to, ''); + if(isset($adr)) { + foreach($adr as $feadr) { + if((strcasecmp($feadr->mailbox, $data["name"]) == 0) + &&(strcasecmp($feadr->host, $phost) == 0) + && (strlen($feadr->personal))) { + + $personal = imap_mime_header_decode($feadr->personal); + $data["name"] = ""; + foreach($personal as $perspart) + if ($perspart->charset != "default") + $data["name"] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text); + else + $data["name"] .= $perspart->text; + + $data["name"] = notags($data["name"]); + } + } + } + imap_close($mbox); + + return $data; + } +} +?> diff --git a/include/Scrape.php b/include/Scrape.php index e2dc7d659..bb9af60d7 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -1,341 +1,5 @@ get_curl_headers(); - logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG); - - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return ret; - } - } - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_dfrn: parse error: ' . $e); - } - - if(! $dom) - return $ret; - - $items = $dom->getElementsByTagName('link'); - - // get DFRN link elements - - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml')) - $ret['feed_atom'] = $item->getAttribute('href'); - if(substr($x,0,5) == "dfrn-") { - $ret[$x] = $item->getAttribute('href'); - } - if($x === 'lrdd') { - $decoded = urldecode($item->getAttribute('href')); - if(preg_match('/acct:([^@]*)@/',$decoded,$matches)) - $ret['nick'] = $matches[1]; - } - } - - // Pull out hCard profile elements - - $largest_photo = 0; - - $items = $dom->getElementsByTagName('*'); - foreach($items as $item) { - if(attribute_contains($item->getAttribute('class'), 'vcard')) { - $level2 = $item->getElementsByTagName('*'); - foreach($level2 as $x) { - if(attribute_contains($x->getAttribute('class'),'uid')) - $ret['guid'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'nickname')) - $ret['nickname'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'fn')) - $ret['fn'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'searchable')) - $ret['searchable'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'key')) - $ret['key'] = $x->textContent; - if(attribute_contains($x->getAttribute('class'),'url')) - $ret['url'] = $x->textContent; - if((attribute_contains($x->getAttribute('class'),'photo')) - || (attribute_contains($x->getAttribute('class'),'avatar'))) { - $size = intval($x->getAttribute('width')); - // dfrn prefers 175, so if we find this, we set largest_size so it can't be topped. - if(($size > $largest_photo) || ($size == 175) || (! $largest_photo)) { - $ret['photo'] = $x->getAttribute('src'); - $largest_photo = (($size == 175) ? 9999 : $size); - } - } - } - } - } - return array_merge($ret, $noscrapedata); -}} - - - - - - -if(! function_exists('validate_dfrn')) { -function validate_dfrn($a) { - $errors = 0; - if(! x($a,'key')) - $errors ++; - if(! x($a,'dfrn-request')) - $errors ++; - if(! x($a,'dfrn-confirm')) - $errors ++; - if(! x($a,'dfrn-notify')) - $errors ++; - if(! x($a,'dfrn-poll')) - $errors ++; - return $errors; -}} - -if(! function_exists('scrape_meta')) { -function scrape_meta($url) { - - $a = get_app(); - - $ret = array(); - - logger('scrape_meta: url=' . $url); - - $s = fetch_url($url); - - if(! $s) - return $ret; - - $headers = $a->get_curl_headers(); - logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG); - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return ret; - } - } - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_meta: parse error: ' . $e); - } - - if(! $dom) - return $ret; - - $items = $dom->getElementsByTagName('meta'); - - // get DFRN link elements - - foreach($items as $item) { - $x = $item->getAttribute('name'); - if(substr($x,0,5) == "dfrn-") - $ret[$x] = $item->getAttribute('content'); - } - - return $ret; -}} - - -if(! function_exists('scrape_vcard')) { -function scrape_vcard($url) { - - $a = get_app(); - - $ret = array(); - - logger('scrape_vcard: url=' . $url); - - $s = fetch_url($url); - - if(! $s) - return $ret; - - $headers = $a->get_curl_headers(); - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return ret; - } - } - - try { - $dom = HTML5_Parser::parse($s); - } catch (DOMException $e) { - logger('scrape_vcard: parse error: ' . $e); - } - - if(! $dom) - return $ret; - - // Pull out hCard profile elements - - $largest_photo = 0; - - $items = $dom->getElementsByTagName('*'); - foreach($items as $item) { - if(attribute_contains($item->getAttribute('class'), 'vcard')) { - $level2 = $item->getElementsByTagName('*'); - foreach($level2 as $x) { - if(attribute_contains($x->getAttribute('class'),'fn')) - $ret['fn'] = $x->textContent; - if((attribute_contains($x->getAttribute('class'),'photo')) - || (attribute_contains($x->getAttribute('class'),'avatar'))) { - $size = intval($x->getAttribute('width')); - if(($size > $largest_photo) || (! $largest_photo)) { - $ret['photo'] = $x->getAttribute('src'); - $largest_photo = $size; - } - } - if((attribute_contains($x->getAttribute('class'),'nickname')) - || (attribute_contains($x->getAttribute('class'),'uid'))) { - $ret['nick'] = $x->textContent; - } - } - } - } - - return $ret; -}} - - -if(! function_exists('scrape_feed')) { -function scrape_feed($url) { - - $a = get_app(); - - $ret = array(); - $cookiejar = tempnam(get_temppath(), 'cookiejar-scrape-feed-'); - $s = fetch_url($url, false, $redirects, 0, Null, $cookiejar); - unlink($cookiejar); - - $headers = $a->get_curl_headers(); - $code = $a->get_curl_code(); - - logger('scrape_feed: returns: ' . $code . ' headers=' . $headers, LOGGER_DEBUG); - - if(! $s) { - logger('scrape_feed: no data returned for ' . $url); - return $ret; - } - - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - if(stristr($line,'content-type:')) { - if(stristr($line,'application/atom+xml') || stristr($s,'')) { - $ret['feed_rss'] = $url; - return $ret; - } - } - - $basename = implode('/', array_slice(explode('/',$url),0,3)) . '/'; - - $doc = new DOMDocument(); - @$doc->loadHTML($s); - $xpath = new DomXPath($doc); - - $base = $xpath->query("//base"); - foreach ($base as $node) { - $attr = array(); - - if ($node->attributes->length) - foreach ($node->attributes as $attribute) - $attr[$attribute->name] = $attribute->value; - - if ($attr["href"] != "") - $basename = $attr["href"] ; - } - - $list = $xpath->query("//link"); - foreach ($list as $node) { - $attr = array(); - - if ($node->attributes->length) - foreach ($node->attributes as $attribute) - $attr[$attribute->name] = $attribute->value; - - if (($attr["rel"] == "alternate") AND ($attr["type"] == "application/atom+xml")) - $ret["feed_atom"] = $attr["href"]; - - if (($attr["rel"] == "alternate") AND ($attr["type"] == "application/rss+xml")) - $ret["feed_rss"] = $attr["href"]; - } - - // Drupal and perhaps others only provide relative URLs. Turn them into absolute. - - if(x($ret,'feed_atom') && (! strstr($ret['feed_atom'],'://'))) - $ret['feed_atom'] = $basename . $ret['feed_atom']; - if(x($ret,'feed_rss') && (! strstr($ret['feed_rss'],'://'))) - $ret['feed_rss'] = $basename . $ret['feed_rss']; - - return $ret; -}} - +require_once('include/Probe.php'); /** * @@ -355,605 +19,17 @@ function scrape_feed($url) { * */ - -define ( 'PROBE_NORMAL', 0); -define ( 'PROBE_DIASPORA', 1); +define('PROBE_NORMAL', 0); +define('PROBE_DIASPORA', 1); function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { - require_once('include/email.php'); - $result = array(); + if ($mode == PROBE_DIASPORA) + $network = NETWORK_DIASPORA; + else + $network = ""; - if (!$url) - return $result; + $data = Probe::uri($url, $network); - $result = Cache::get("probe_url:".$mode.":".$url); - if (!is_null($result)) { - $result = unserialize($result); - return $result; - } - - $original_url = $url; - $network = null; - $diaspora = false; - $diaspora_base = ''; - $diaspora_guid = ''; - $diaspora_key = ''; - $has_lrdd = false; - $email_conversant = false; - $connectornetworks = false; - $appnet = false; - - if (strpos($url,'twitter.com')) { - $connectornetworks = true; - $network = NETWORK_TWITTER; - } - - $lastfm = ((strpos($url,'last.fm/user') !== false) ? true : false); - - $at_addr = ((strpos($url,'@') !== false) ? true : false); - - if((!$appnet) && (!$lastfm) && !$connectornetworks) { - - if(strpos($url,'mailto:') !== false && $at_addr) { - $url = str_replace('mailto:','',$url); - $links = array(); - } - else - $links = lrdd($url); - - if ((count($links) == 0) AND strstr($url, "/index.php")) { - $url = str_replace("/index.php", "", $url); - $links = lrdd($url); - } - - if (count($links)) { - $has_lrdd = true; - - logger('probe_url: found lrdd links: ' . print_r($links,true), LOGGER_DATA); - foreach($links as $link) { - if($link['@attributes']['rel'] === NAMESPACE_ZOT) - $zot = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === NAMESPACE_DFRN) - $dfrn = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'salmon') - $notify = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === NAMESPACE_FEED) - $poll = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') - $hcard = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://webfinger.net/rel/profile-page') - $profile = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://portablecontacts.net/spec/1.0') - $poco = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://joindiaspora.com/seed_location') { - $diaspora_base = unamp($link['@attributes']['href']); - $diaspora = true; - } - if($link['@attributes']['rel'] === 'http://joindiaspora.com/guid') { - $diaspora_guid = unamp($link['@attributes']['href']); - $diaspora = true; - } - if($link['@attributes']['rel'] === 'diaspora-public-key') { - $diaspora_key = base64_decode(unamp($link['@attributes']['href'])); - if(strstr($diaspora_key,'RSA ')) - $pubkey = rsatopem($diaspora_key); - else - $pubkey = $diaspora_key; - $diaspora = true; - } - if(($link['@attributes']['rel'] === 'http://ostatus.org/schema/1.0/subscribe') AND ($mode == PROBE_NORMAL)) { - $diaspora = false; - } - } - - // Status.Net can have more than one profile URL. We need to match the profile URL - // to a contact on incoming messages to prevent spam, and we won't know which one - // to match. So in case of two, one of them is stored as an alias. Only store URL's - // and not webfinger user@host aliases. If they've got more than two non-email style - // aliases, let's hope we're lucky and get one that matches the feed author-uri because - // otherwise we're screwed. - - $backup_alias = ""; - - foreach($links as $link) { - if($link['@attributes']['rel'] === 'alias') { - if(strpos($link['@attributes']['href'],'@') === false) { - if(isset($profile)) { - $alias_url = $link['@attributes']['href']; - - if(($alias_url !== $profile) AND ($backup_alias == "") AND - ($alias_url !== str_replace("/index.php", "", $profile))) - $backup_alias = $alias_url; - - if(($alias_url !== $profile) AND !strstr($alias_url, "index.php") AND - ($alias_url !== str_replace("/index.php", "", $profile))) - $alias = $alias_url; - } - else - $profile = unamp($link['@attributes']['href']); - } - } - } - - if ($alias == "") - $alias = $backup_alias; - - // If the profile is different from the url then the url is abviously an alias - if (($alias == "") AND ($profile != "") AND !$at_addr AND (normalise_link($profile) != normalise_link($url))) - $alias = $url; - } - elseif($mode == PROBE_NORMAL) { - - // Check email - - $orig_url = $url; - if((strpos($orig_url,'@')) && validate_email($orig_url)) { - $x = q("SELECT `prvkey` FROM `user` WHERE `uid` = %d LIMIT 1", - intval(local_user()) - ); - $r = q("SELECT * FROM `mailacct` WHERE `uid` = %d AND `server` != '' LIMIT 1", - intval(local_user()) - ); - if(count($x) && count($r)) { - $mailbox = construct_mailbox_name($r[0]); - $password = ''; - openssl_private_decrypt(hex2bin($r[0]['pass']),$password,$x[0]['prvkey']); - $mbox = email_connect($mailbox,$r[0]['user'],$password); - if(! $mbox) - logger('probe_url: email_connect failed.'); - unset($password); - } - if($mbox) { - $msgs = email_poll($mbox,$orig_url); - logger('probe_url: searching ' . $orig_url . ', ' . count($msgs) . ' messages found.', LOGGER_DEBUG); - if(count($msgs)) { - $addr = $orig_url; - $network = NETWORK_MAIL; - $name = substr($url,0,strpos($url,'@')); - $phost = substr($url,strpos($url,'@')+1); - $profile = 'http://' . $phost; - // fix nick character range - $vcard = array('fn' => $name, 'nick' => $name, 'photo' => avatar_img($url)); - $notify = 'smtp ' . random_string(); - $poll = 'email ' . random_string(); - $priority = 0; - $x = email_msg_meta($mbox,$msgs[0]); - if(stristr($x[0]->from,$orig_url)) - $adr = imap_rfc822_parse_adrlist($x[0]->from,''); - elseif(stristr($x[0]->to,$orig_url)) - $adr = imap_rfc822_parse_adrlist($x[0]->to,''); - if(isset($adr)) { - foreach($adr as $feadr) { - if((strcasecmp($feadr->mailbox,$name) == 0) - &&(strcasecmp($feadr->host,$phost) == 0) - && (strlen($feadr->personal))) { - - $personal = imap_mime_header_decode($feadr->personal); - $vcard['fn'] = ""; - foreach($personal as $perspart) - if ($perspart->charset != "default") - $vcard['fn'] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text); - else - $vcard['fn'] .= $perspart->text; - - $vcard['fn'] = notags($vcard['fn']); - } - } - } - } - imap_close($mbox); - } - } - } - } - - if($mode == PROBE_NORMAL) { - - if(strlen($zot)) { - $s = fetch_url($zot); - if($s) { - $j = json_decode($s); - if($j) { - $network = NETWORK_ZOT; - $vcard = array( - 'fn' => $j->fullname, - 'nick' => $j->nickname, - 'photo' => $j->photo - ); - $profile = $j->url; - $notify = $j->post; - $pubkey = $j->pubkey; - $poll = 'N/A'; - } - } - } - - - if(strlen($dfrn)) { - $ret = scrape_dfrn(($hcard) ? $hcard : $dfrn, true); - if(is_array($ret) && x($ret,'dfrn-request')) { - $network = NETWORK_DFRN; - $request = $ret['dfrn-request']; - $confirm = $ret['dfrn-confirm']; - $notify = $ret['dfrn-notify']; - $poll = $ret['dfrn-poll']; - - $vcard = array(); - $vcard['fn'] = $ret['fn']; - $vcard['nick'] = $ret['nick']; - $vcard['photo'] = $ret['photo']; - } - } - } - - // Scrape the public key from the hcard. - // Diaspora will remove it from the webfinger somewhere in the future. - if (($hcard != "") AND ($pubkey == "")) { - $ret = scrape_dfrn(($hcard) ? $hcard : $dfrn, true); - if (isset($ret["guid"])) - $diaspora_guid = $ret["guid"]; - if (isset($ret["url"])) - $diaspora_base = $ret["url"]; - - if (isset($ret["key"])) { - $hcard_key = $ret["key"]; - if(strstr($hcard_key,'RSA ')) - $pubkey = rsatopem($hcard_key); - else - $pubkey = $hcard_key; - } - } - - if(($network == "") && $diaspora_base && $diaspora_guid) { - $diaspora_notify = $diaspora_base.'receive/users/'.$diaspora_guid; - $diaspora = true; - - if($mode == PROBE_DIASPORA || !$notify || ($notify == $diaspora_notify)) { - $notify = $diaspora_notify; - $batch = $diaspora_base . 'receive/public' ; - } - if(strpos($url,'@')) - $addr = str_replace('acct:', '', $url); - } - - if($network !== NETWORK_ZOT && $network !== NETWORK_DFRN && $network !== NETWORK_MAIL) { - if($diaspora) - $network = NETWORK_DIASPORA; - elseif($has_lrdd AND ($notify)) - $network = NETWORK_OSTATUS; - - if(strpos($url,'@')) - $addr = str_replace('acct:', '', $url); - - $priority = 0; - - if($hcard && ! $vcard) { - $vcard = scrape_vcard($hcard); - - // Google doesn't use absolute url in profile photos - - if((x($vcard,'photo')) && substr($vcard['photo'],0,1) == '/') { - $h = @parse_url($hcard); - if($h) - $vcard['photo'] = $h['scheme'] . '://' . $h['host'] . $vcard['photo']; - } - - logger('probe_url: scrape_vcard: ' . print_r($vcard,true), LOGGER_DATA); - } - - if($diaspora && $addr) { - // Diaspora returns the name as the nick. As the nick will never be updated, - // let's use the Diaspora nickname (the first part of the handle) as the nick instead - $addr_parts = explode('@', $addr); - $vcard['nick'] = $addr_parts[0]; - } - - if($lastfm) { - $profile = $url; - $poll = str_replace(array('www.','last.fm/'),array('','ws.audioscrobbler.com/1.0/'),$url) . '/recenttracks.rss'; - $vcard['nick'] = basename($url); - $vcard['fn'] = $vcard['nick'] . t(' on Last.fm'); - $network = NETWORK_FEED; - } - - if(! x($vcard,'fn')) - if(x($vcard,'nick')) - $vcard['fn'] = $vcard['nick']; - - $check_feed = false; - - if(stristr($url,'tumblr.com') && (! stristr($url,'/rss'))) { - $poll = $url . '/rss'; - $check_feed = true; - // Will leave it to others to figure out how to grab the avatar, which is on the $url page in the open graph meta links - } - - if($appnet || ! $poll) - $check_feed = true; - if((! isset($vcard)) || (! x($vcard,'fn')) || (! $profile)) - $check_feed = true; - if(($at_addr) && (! count($links))) - $check_feed = false; - - if ($connectornetworks) - $check_feed = false; - - if($check_feed) { - - $feedret = scrape_feed(($poll) ? $poll : $url); - - logger('probe_url: scrape_feed ' . (($poll)? $poll : $url) . ' returns: ' . print_r($feedret,true), LOGGER_DATA); - if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) { - $poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss'])); - if(! x($vcard)) - $vcard = array(); - } - - if(x($feedret,'photo') && (! x($vcard,'photo'))) - $vcard['photo'] = $feedret['photo']; - - $cookiejar = tempnam(get_temppath(), 'cookiejar-scrape-feed-'); - $xml = fetch_url($poll, false, $redirects, 0, Null, $cookiejar); - unlink($cookiejar); - - logger('probe_url: fetch feed: ' . $poll . ' returns: ' . $xml, LOGGER_DATA); - - if ($xml == "") { - logger("scrape_feed: XML is empty for feed ".$poll); - $network = NETWORK_PHANTOM; - } else { - $data = feed_import($xml,$dummy1,$dummy2, $dummy3, true); - - if (!is_array($data)) { - logger("scrape_feed: This doesn't seem to be a feed: ".$poll); - $network = NETWORK_PHANTOM; - } else { - if (($vcard["photo"] == "") AND ($data["header"]["author-avatar"] != "")) - $vcard["photo"] = $data["header"]["author-avatar"]; - - if (($vcard["fn"] == "") AND ($data["header"]["author-name"] != "")) - $vcard["fn"] = $data["header"]["author-name"]; - - if (($vcard["nick"] == "") AND ($data["header"]["author-nick"] != "")) - $vcard["nick"] = $data["header"]["author-nick"]; - - if ($network == NETWORK_OSTATUS) { - if ($data["header"]["author-id"] != "") - $alias = $data["header"]["author-id"]; - - if ($data["header"]["author-link"] != "") - $profile = $data["header"]["author-link"]; - - } elseif(!$profile AND ($data["header"]["author-link"] != "") AND !in_array($network, array("", NETWORK_FEED))) - $profile = $data["header"]["author-link"]; - } - } - - // Workaround for misconfigured Friendica servers - if (($network == "") AND (strstr($url, "/profile/"))) { - $noscrape = str_replace("/profile/", "/noscrape/", $url); - $noscrapejson = fetch_url($noscrape); - if ($noscrapejson) { - - $network = NETWORK_DFRN; - - $poco = str_replace("/profile/", "/poco/", $url); - - $noscrapedata = json_decode($noscrapejson, true); - - if (isset($noscrapedata["addr"])) - $addr = $noscrapedata["addr"]; - - if (isset($noscrapedata["fn"])) - $vcard["fn"] = $noscrapedata["fn"]; - - if (isset($noscrapedata["key"])) - $pubkey = $noscrapedata["key"]; - - if (isset($noscrapedata["photo"])) - $vcard["photo"] = $noscrapedata["photo"]; - - if (isset($noscrapedata["dfrn-request"])) - $request = $noscrapedata["dfrn-request"]; - - if (isset($noscrapedata["dfrn-confirm"])) - $confirm = $noscrapedata["dfrn-confirm"]; - - if (isset($noscrapedata["dfrn-notify"])) - $notify = $noscrapedata["dfrn-notify"]; - - if (isset($noscrapedata["dfrn-poll"])) - $poll = $noscrapedata["dfrn-poll"]; - - } - } - - if(! $network) - $network = NETWORK_FEED; - - if(! x($vcard,'nick')) { - $vcard['nick'] = strtolower(notags(unxmlify($vcard['fn']))); - if(strpos($vcard['nick'],' ')) - $vcard['nick'] = trim(substr($vcard['nick'],0,strpos($vcard['nick'],' '))); - } - if(! $priority) - $priority = 2; - } - } - - if(! x($vcard,'photo')) { - $a = get_app(); - $vcard['photo'] = App::get_baseurl() . '/images/person-175.jpg' ; - } - - if(! $profile) - $profile = $url; - - // No human could be associated with this link, use the URL as the contact name - - if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn'))) - $vcard['fn'] = $url; - - if ($diaspora_base != "") - $baseurl = $diaspora_base; - - if (($baseurl == "") AND ($notify != "") AND ($poll != "")) { - $baseurl = matching_url(normalise_link($notify), normalise_link($poll)); - - $baseurl2 = matching_url($baseurl, normalise_link($profile)); - if ($baseurl2 != "") - $baseurl = $baseurl2; - } - - if (($baseurl == "") AND ($notify != "")) - $baseurl = matching_url(normalise_link($profile), normalise_link($notify)); - - if (($baseurl == "") AND ($poll != "")) - $baseurl = matching_url(normalise_link($profile), normalise_link($poll)); - - if (substr($baseurl, -10) == "/index.php") - $baseurl = str_replace("/index.php", "", $baseurl); - - if ($network == "") - $network = NETWORK_PHANTOM; - - $baseurl = rtrim($baseurl, "/"); - - if(strpos($url,'@') AND ($addr == "") AND ($network == NETWORK_DFRN)) - $addr = str_replace('acct:', '', $url); - - $vcard['fn'] = notags($vcard['fn']); - $vcard['nick'] = str_replace(' ','',notags($vcard['nick'])); - - $result['name'] = $vcard['fn']; - $result['nick'] = $vcard['nick']; - $result['guid'] = $diaspora_guid; - $result['url'] = $profile; - $result['addr'] = $addr; - $result['batch'] = $batch; - $result['notify'] = $notify; - $result['poll'] = $poll; - $result['request'] = $request; - $result['confirm'] = $confirm; - $result['poco'] = $poco; - $result['photo'] = $vcard['photo']; - $result['priority'] = $priority; - $result['network'] = $network; - $result['alias'] = $alias; - $result['pubkey'] = $pubkey; - $result['baseurl'] = $baseurl; - - logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG); - - if ($level == 1) { - // Trying if it maybe a diaspora account - if (($result['network'] == NETWORK_FEED) OR ($result['addr'] == "")) { - require_once('include/bbcode.php'); - $address = GetProfileUsername($url, "", true); - $result2 = probe_url($address, $mode, ++$level); - if (!in_array($result2['network'], array("", NETWORK_PHANTOM, NETWORK_FEED))) - $result = $result2; - } - - // Maybe it's some non standard GNU Social installation (Single user, subfolder or no uri rewrite) - if (($result['network'] == NETWORK_FEED) AND ($result['baseurl'] != "") AND ($result['nick'] != "")) { - $addr = $result['nick'].'@'.str_replace("http://", "", $result['baseurl']); - $result2 = probe_url($addr, $mode, ++$level); - if (!in_array($result2['network'], array("", NETWORK_PHANTOM, NETWORK_FEED))) - $result = $result2; - } - - // Quickfix for Hubzilla systems with enabled OStatus plugin - if (($result['network'] == NETWORK_DIASPORA) AND ($result["batch"] == "")) { - $result2 = probe_url($url, PROBE_DIASPORA, ++$level); - if ($result2['network'] == NETWORK_DIASPORA) { - $addr = $result["addr"]; - $result = $result2; - - if (($result["addr"] == "") AND ($addr != "")) - $result["addr"] = $addr; - } - } - } - - // Only store into the cache if the value seems to be valid - if ($result['network'] != NETWORK_PHANTOM) { - Cache::set("probe_url:".$mode.":".$original_url,serialize($result), CACHE_DAY); - - /// @todo temporary fix - we need a real contact update function that updates only changing fields - /// The biggest problem is the avatar picture that could have a reduced image size. - /// It should only be updated if the existing picture isn't existing anymore. - if (($result['network'] != NETWORK_FEED) AND ($mode == PROBE_NORMAL) AND - $result["name"] AND $result["nick"] AND $result["url"] AND $result["addr"] AND $result["poll"]) - q("UPDATE `contact` SET `name` = '%s', `nick` = '%s', `url` = '%s', `addr` = '%s', - `notify` = '%s', `poll` = '%s', `alias` = '%s', `success_update` = '%s' - WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", - dbesc($result["name"]), - dbesc($result["nick"]), - dbesc($result["url"]), - dbesc($result["addr"]), - dbesc($result["notify"]), - dbesc($result["poll"]), - dbesc($result["alias"]), - dbesc(datetime_convert()), - dbesc(normalise_link($result['url'])) - ); - } - - return $result; -} - -/** - * @brief Find the matching part between two url - * - * @param string $url1 - * @param string $url2 - * @return string The matching part - */ -function matching_url($url1, $url2) { - - if (($url1 == "") OR ($url2 == "")) - return ""; - - $url1 = normalise_link($url1); - $url2 = normalise_link($url2); - - $parts1 = parse_url($url1); - $parts2 = parse_url($url2); - - if (!isset($parts1["host"]) OR !isset($parts2["host"])) - return ""; - - if ($parts1["scheme"] != $parts2["scheme"]) - return ""; - - if ($parts1["host"] != $parts2["host"]) - return ""; - - if ($parts1["port"] != $parts2["port"]) - return ""; - - $match = $parts1["scheme"]."://".$parts1["host"]; - - if ($parts1["port"]) - $match .= ":".$parts1["port"]; - - $pathparts1 = explode("/", $parts1["path"]); - $pathparts2 = explode("/", $parts2["path"]); - - $i = 0; - $path = ""; - do { - $path1 = $pathparts1[$i]; - $path2 = $pathparts2[$i]; - - if ($path1 == $path2) - $path .= $path1."/"; - - } while (($path1 == $path2) AND ($i++ <= count($pathparts1))); - - $match .= $path; - - return normalise_link($match); + return $data; } diff --git a/include/feed.php b/include/feed.php index 992d10e22..5adb2294d 100644 --- a/include/feed.php +++ b/include/feed.php @@ -59,8 +59,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($attributes->name == "href") $author["author-link"] = $attributes->textContent; - $author["author-id"] = $xpath->evaluate('/atom:feed/atom:author/atom:uri/text()')->item(0)->nodeValue; - if ($author["author-link"] == "") $author["author-link"] = $author["author-id"]; @@ -89,9 +87,22 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($value != "") $author["author-name"] = $value; - $value = $xpath->evaluate('atom:author/poco:preferredUsername/text()')->item(0)->nodeValue; - if ($value != "") - $author["author-nick"] = $value; + if ($simulate) { + $author["author-id"] = $xpath->evaluate('/atom:feed/atom:author/atom:uri/text()')->item(0)->nodeValue; + + $value = $xpath->evaluate('atom:author/poco:preferredUsername/text()')->item(0)->nodeValue; + if ($value != "") + $author["author-nick"] = $value; + + $value = $xpath->evaluate('atom:author/poco:address/poco:formatted/text()', $context)->item(0)->nodeValue; + if ($value != "") + $author["author-location"] = $value; + + $value = $xpath->evaluate('atom:author/poco:note/text()')->item(0)->nodeValue; + if ($value != "") + $author["author-about"] = $value; + + } $author["edited"] = $author["created"] = $xpath->query('/atom:feed/atom:updated/text()')->item(0)->nodeValue; @@ -131,10 +142,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { $author["owner-link"] = $contact["url"]; $author["owner-name"] = $contact["name"]; $author["owner-avatar"] = $contact["thumb"]; - - // This is no field in the item table. So we have to unset it. - unset($author["author-nick"]); - unset($author["author-id"]); } $header = array(); diff --git a/include/network.php b/include/network.php index d27a090fe..f20445d27 100644 --- a/include/network.php +++ b/include/network.php @@ -5,7 +5,7 @@ */ require_once("include/xml.php"); - +require_once('include/Probe.php'); /** * @brief Curl wrapper @@ -371,324 +371,6 @@ function http_status_exit($val, $description = array()) { } -// Given an email style address, perform webfinger lookup and -// return the resulting DFRN profile URL, or if no DFRN profile URL -// is located, returns an OStatus subscription template (prefixed -// with the string 'stat:' to identify it as on OStatus template). -// If this isn't an email style address just return $webbie. -// Return an empty string if email-style addresses but webfinger fails, -// or if the resultant personal XRD doesn't contain a supported -// subscription/friend-request attribute. - -// amended 7/9/2011 to return an hcard which could save potentially loading -// a lengthy content page to scrape dfrn attributes - -function webfinger_dfrn($webbie,&$hcard) { - if(! strstr($webbie,'@')) { - return $webbie; - } - $profile_link = ''; - - $links = webfinger($webbie); - logger('webfinger_dfrn: ' . $webbie . ':' . print_r($links,true), LOGGER_DATA); - if(count($links)) { - foreach($links as $link) { - if($link['@attributes']['rel'] === NAMESPACE_DFRN) - $profile_link = $link['@attributes']['href']; - if($link['@attributes']['rel'] === NAMESPACE_OSTATUSSUB) - $profile_link = 'stat:' . $link['@attributes']['template']; - if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') - $hcard = $link['@attributes']['href']; - } - } - return $profile_link; -} - -/** - * @brief Perform webfinger lookup on an email style address - * - * @param string $webbi An email style address - * @param boolean $debug - * - * @return array of link attributes from the personal XRD file - * empty array on error/failure - */ -function webfinger($webbie, $debug = false) { - $host = ''; - if(strstr($webbie,'@')) { - $host = substr($webbie,strpos($webbie,'@') + 1); - } - if(strlen($host)) { - $tpl = fetch_lrdd_template($host); - logger('webfinger: lrdd template: ' . $tpl); - if(strlen($tpl)) { - $pxrd = str_replace('{uri}', urlencode('acct:' . $webbie), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - if(! count($links)) { - // try with double slashes - $pxrd = str_replace('{uri}', urlencode('acct://' . $webbie), $tpl); - logger('webfinger: pxrd: ' . $pxrd); - $links = fetch_xrd_links($pxrd); - } - return $links; - } - } - return array(); -} - -function lrdd($uri, $debug = false) { - - $a = get_app(); - - // default priority is host priority, host-meta first - - $priority = 'host'; - - // All we have is an email address. Resource-priority is irrelevant - // because our URI isn't directly resolvable. - - if(strstr($uri,'@')) { - return(webfinger($uri)); - } - - // get the host meta file - - $host = @parse_url($uri); - - if($host) { - $url = ((x($host,'scheme')) ? $host['scheme'] : 'http') . '://'; - $url .= $host['host'] . '/.well-known/host-meta' ; - } - else - return array(); - - logger('lrdd: constructed url: ' . $url); - - $xml = fetch_url($url); - - $headers = $a->get_curl_headers(); - - if (! $xml) - return array(); - - logger('lrdd: host_meta: ' . $xml, LOGGER_DATA); - - if(! stristr($xml,'].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - return(fetch_xrd_links($matches[1])); - break; - } - } - } - } - - - // priority 'resource' - - - $html = fetch_url($uri); - $headers = $a->get_curl_headers(); - logger('lrdd: headers=' . $headers, LOGGER_DEBUG); - - // don't try and parse raw xml as html - if(! strstr($html,'getElementsByTagName('link'); - foreach($items as $item) { - $x = $item->getAttribute('rel'); - if($x == "lrdd") { - $pagelink = $item->getAttribute('href'); - break; - } - } - } - } - - if(isset($pagelink)) - return(fetch_xrd_links($pagelink)); - - // next look in HTTP headers - - $lines = explode("\n",$headers); - if(count($lines)) { - foreach($lines as $line) { - /// @TODO Alter the following regex to support multiple relations (space separated) - if((stristr($line,'link:')) && preg_match('/<([^>].*)>.*rel\=[\'\"]lrdd[\'\"]/',$line,$matches)) { - $pagelink = $matches[1]; - break; - } - // don't try and run feeds through the html5 parser - if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) - return array(); - if(stristr($html,''),array('href="','"/>'),$xml); - - $h = parse_xml_string($xml); - if(! $h) - return array(); - - $arr = xml::element_to_array($h); - - $links = array(); - - if(isset($arr['xrd']['link'])) { - $link = $arr['xrd']['link']; - if(! isset($link[0])) - $links = array($link); - else - $links = $link; - } - if(isset($arr['xrd']['alias'])) { - $alias = $arr['xrd']['alias']; - if(! isset($alias[0])) - $aliases = array($alias); - else - $aliases = $alias; - if(is_array($aliases) && count($aliases)) { - foreach($aliases as $alias) { - $links[]['@attributes'] = array('rel' => 'alias' , 'href' => $alias); - } - } - } - - logger('fetch_xrd_links: ' . print_r($links,true), LOGGER_DATA); - - return $links; - -} - /** * @brief Check URL to se if ts's real * @@ -1123,7 +805,7 @@ function short_link($url) { * This function encodes an array to json format * and adds an application/json HTTP header to the output. * After finishing the process is getting killed. - * + * * @param array $x The input content */ function json_return_and_die($x) { @@ -1131,3 +813,57 @@ function json_return_and_die($x) { echo json_encode($x); killme(); } + +/** + * @brief Find the matching part between two url + * + * @param string $url1 + * @param string $url2 + * @return string The matching part + */ +function matching_url($url1, $url2) { + + if (($url1 == "") OR ($url2 == "")) + return ""; + + $url1 = normalise_link($url1); + $url2 = normalise_link($url2); + + $parts1 = parse_url($url1); + $parts2 = parse_url($url2); + + if (!isset($parts1["host"]) OR !isset($parts2["host"])) + return ""; + + if ($parts1["scheme"] != $parts2["scheme"]) + return ""; + + if ($parts1["host"] != $parts2["host"]) + return ""; + + if ($parts1["port"] != $parts2["port"]) + return ""; + + $match = $parts1["scheme"]."://".$parts1["host"]; + + if ($parts1["port"]) + $match .= ":".$parts1["port"]; + + $pathparts1 = explode("/", $parts1["path"]); + $pathparts2 = explode("/", $parts2["path"]); + + $i = 0; + $path = ""; + do { + $path1 = $pathparts1[$i]; + $path2 = $pathparts2[$i]; + + if ($path1 == $path2) + $path .= $path1."/"; + + } while (($path1 == $path2) AND ($i++ <= count($pathparts1))); + + $match .= $path; + + return normalise_link($match); +} diff --git a/include/salmon.php b/include/salmon.php index 988c542e2..8341b902a 100644 --- a/include/salmon.php +++ b/include/salmon.php @@ -1,15 +1,14 @@ '; $o .= 'Lookup address: '; - $o .= ''; + $o .= ''; $o .= '

'; if(x($_GET,'addr')) { $addr = trim($_GET['addr']); - if(strpos($addr,'@' !== false)) - $res = webfinger($addr); - else - $res = lrdd($addr); + $res = Probe::lrdd($addr); $o .= '
';
 		$o .= str_replace("\n",'
',print_r($res,true)); $o .= '
';