diff --git a/src/Model/GContact.php b/src/Model/GContact.php index 23415d107..6e7cdb72f 100644 --- a/src/Model/GContact.php +++ b/src/Model/GContact.php @@ -6,6 +6,8 @@ */ namespace Friendica\Model; +use DOMDocument; +use DOMXPath; use Exception; use Friendica\Core\Config; use Friendica\Core\Logger; @@ -14,6 +16,7 @@ use Friendica\Core\System; use Friendica\Core\Worker; use Friendica\Database\DBA; use Friendica\Network\Probe; +use Friendica\Protocol\ActivityPub; use Friendica\Protocol\PortableContact; use Friendica\Util\DateTimeFormat; use Friendica\Util\Network; @@ -190,7 +193,7 @@ class GContact } if ((!isset($gcontact['network']) || !isset($gcontact['name']) || !isset($gcontact['addr']) || !isset($gcontact['photo']) || !isset($gcontact['server_url']) || $alternate) - && PortableContact::reachable($gcontact['url'], $gcontact['server_url'], $gcontact['network'], false) + && GServer::reachable($gcontact['url'], $gcontact['server_url'], $gcontact['network'], false) ) { $data = Probe::uri($gcontact['url']); @@ -859,7 +862,170 @@ class GContact return $gcontact_id; } + /** + * Set the last date that the contact had posted something + * + * @param string $data Probing result + * @param bool $force force updating + */ + public static function setLastUpdate(array $data, bool $force = false) + { + // Fetch the global contact + $gcontact = DBA::selectFirst('gcontact', ['created', 'updated', 'last_contact', 'last_failure'], + ['nurl' => Strings::normaliseLink($data['url'])]); + if (!DBA::isResult($gcontact)) { + return; + } + if (!$force && !PortableContact::updateNeeded($gcontact['created'], $gcontact['updated'], $gcontact['last_failure'], $gcontact['last_contact'])) { + Logger::info("Don't update profile", ['url' => $data['url'], 'updated' => $gcontact['updated']]); + return; + } + + if (self::updateFromNoScrape($data)) { + return; + } + + // When the profile doesn't have got a feed, then we exit here + if (empty($data['poll'])) { + return; + } + + if ($data['network'] == Protocol::ACTIVITYPUB) { + self::updateFromOutbox($data['poll'], $data); + } else { + self::updateFromFeed($data); + } + } + + /** + * Update a global contact via the "noscrape" endpoint + * + * @param string $data Probing result + * + * @return bool 'true' if update was successful or the server was unreachable + */ + private static function updateFromNoScrape(array $data) + { + // Check the 'noscrape' endpoint when it is a Friendica server + $gserver = DBA::selectFirst('gserver', ['noscrape'], ["`nurl` = ? AND `noscrape` != ''", + Strings::normaliseLink($data['baseurl'])]); + if (!DBA::isResult($gserver)) { + return false; + } + + $curlResult = Network::curl($gserver['noscrape'] . '/' . $data['nick']); + + if ($curlResult->isSuccess() && !empty($curlResult->getBody())) { + $noscrape = json_decode($curlResult->getBody(), true); + if (!empty($noscrape)) { + $noscrape['updated'] = DateTimeFormat::utc($noscrape['updated'], DateTimeFormat::MYSQL); + $fields = ['last_contact' => DateTimeFormat::utcNow(), 'updated' => $noscrape['updated']]; + DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]); + return true; + } + } elseif ($curlResult->isTimeout()) { + // On a timeout return the existing value, but mark the contact as failure + $fields = ['last_failure' => DateTimeFormat::utcNow()]; + DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]); + return true; + } + return false; + } + + /** + * Update a global contact via an ActivityPub Outbox + * + * @param string $data Probing result + */ + private static function updateFromOutbox(string $feed, array $data) + { + $outbox = ActivityPub::fetchContent($feed); + if (empty($outbox)) { + return; + } + + if (!empty($outbox['orderedItems'])) { + $items = $outbox['orderedItems']; + } elseif (!empty($outbox['first']['orderedItems'])) { + $items = $outbox['first']['orderedItems']; + } elseif (!empty($outbox['first'])) { + self::updateFromOutbox($outbox['first'], $data); + return; + } else { + $items = []; + } + + $last_updated = ''; + + foreach ($items as $activity) { + if ($last_updated < $activity['published']) { + $last_updated = $activity['published']; + } + } + + if (empty($last_updated)) { + return; + } + + $fields = ['last_contact' => DateTimeFormat::utcNow(), 'updated' => $last_updated]; + DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]); + } + + /** + * Update a global contact via an XML feed + * + * @param string $data Probing result + */ + private static function updateFromFeed(array $data) + { + // Search for the newest entry in the feed + $curlResult = Network::curl($data['poll']); + if (!$curlResult->isSuccess()) { + $fields = ['last_failure' => DateTimeFormat::utcNow()]; + DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); + + Logger::info("Profile wasn't reachable (no feed)", ['url' => $data['url']]); + return; + } + + $doc = new DOMDocument(); + @$doc->loadXML($curlResult->getBody()); + + $xpath = new DOMXPath($doc); + $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + + $entries = $xpath->query('/atom:feed/atom:entry'); + + $last_updated = ''; + + foreach ($entries as $entry) { + $published_item = $xpath->query('atom:published/text()', $entry)->item(0); + $updated_item = $xpath->query('atom:updated/text()' , $entry)->item(0); + $published = !empty($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null; + $updated = !empty($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null; + + if (empty($published) || empty($updated)) { + Logger::notice('Invalid entry for XPath.', ['entry' => $entry, 'url' => $data['url']]); + continue; + } + + if ($last_updated < $published) { + $last_updated = $published; + } + + if ($last_updated < $updated) { + $last_updated = $updated; + } + } + + if (empty($last_updated)) { + return; + } + + $fields = ['last_contact' => DateTimeFormat::utcNow(), 'updated' => $last_updated]; + DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($data['url'])]); + } /** * @brief Updates the gcontact entry from a given public contact id * @@ -976,7 +1142,9 @@ class GContact * * @param string $url profile link * @param boolean $force Optional forcing of network probing (otherwise we use the cached data) - * @return void + * + * @return boolean 'true' when contact had been updated + * * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \ImagickException */ @@ -985,13 +1153,20 @@ class GContact $data = Probe::uri($url, $force); if (in_array($data["network"], [Protocol::PHANTOM])) { - Logger::log("Invalid network for contact url ".$data["url"]." - Called by: ".System::callstack(), Logger::DEBUG); - return; + $fields = ['last_failure' => DateTimeFormat::utcNow()]; + DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($url)]); + Logger::info('Invalid network for contact', ['url' => $data['url'], 'callstack' => System::callstack()]); + return false; } $data["server_url"] = $data["baseurl"]; self::update($data); + + // Set the date of the latest post + self::setLastUpdate($data, $force); + + return true; } /** diff --git a/src/Model/GServer.php b/src/Model/GServer.php index 77d556357..28c0007b9 100644 --- a/src/Model/GServer.php +++ b/src/Model/GServer.php @@ -26,6 +26,29 @@ use Friendica\Network\Probe; */ class GServer { + /** + * Checks if the given server is reachable + * + * @param string $profile URL of the given profile + * @param string $server URL of the given server (If empty, taken from profile) + * @param string $network Network value that is used, when detection failed + * @param boolean $force Force an update. + * + * @return boolean 'true' if server seems vital + */ + public static function reachable(string $profile, string $server = '', string $network = '', bool $force = false) + { + if ($server == '') { + $server = Contact::getBasepath($profile); + } + + if ($server == '') { + return true; + } + + return self::check($server, $network, $force); + } + /** * Checks the state of the given server. * @@ -661,21 +684,20 @@ class GServer private static function detectNetworkViaContacts(string $url, array $serverdata) { $contacts = ''; - $fields = ['nurl', 'url']; - $gcontacts = DBA::select('gcontact', $fields, ['server_url' => [$url, $serverdata['nurl']]]); + $gcontacts = DBA::select('gcontact', ['url', 'nurl'], ['server_url' => [$url, $serverdata['nurl']]]); while ($gcontact = DBA::fetch($gcontacts)) { $contacts[$gcontact['nurl']] = $gcontact['url']; } DBA::close($gcontacts); - $apcontacts = DBA::select('apcontact', $fields, ['baseurl' => [$url, $serverdata['nurl']]]); + $apcontacts = DBA::select('apcontact', ['url'], ['baseurl' => [$url, $serverdata['nurl']]]); while ($gcontact = DBA::fetch($gcontacts)) { - $contacts[$apcontact['nurl']] = $apcontact['url']; + $contacts[Strings::normaliseLink($apcontact['url'])] = $apcontact['url']; } DBA::close($apcontacts); - $pcontacts = DBA::select('contact', $fields, ['uid' => 0, 'baseurl' => [$url, $serverdata['nurl']]]); + $pcontacts = DBA::select('contact', ['url', 'nurl'], ['uid' => 0, 'baseurl' => [$url, $serverdata['nurl']]]); while ($gcontact = DBA::fetch($gcontacts)) { $contacts[$pcontact['nurl']] = $pcontact['url']; } diff --git a/src/Protocol/PortableContact.php b/src/Protocol/PortableContact.php index 235c17ff5..228a96762 100644 --- a/src/Protocol/PortableContact.php +++ b/src/Protocol/PortableContact.php @@ -214,271 +214,11 @@ class PortableContact DBA::delete('glink', $condition); } - public static function reachable($profile, $server = "", $network = "", $force = false) - { - if ($server == "") { - $server = Contact::getBasepath($profile); - } - - if ($server == "") { - return true; - } - - return GServer::check($server, $network, $force); - } - public static function alternateOStatusUrl($url) { return(preg_match("=https?://.+/user/\d+=ism", $url, $matches)); } - public static function lastUpdated($profile, $force = false) - { - $gcontacts = q( - "SELECT * FROM `gcontact` WHERE `nurl` = '%s'", - DBA::escape(Strings::normaliseLink($profile)) - ); - - if (!DBA::isResult($gcontacts)) { - return false; - } - - $contact = ["url" => $profile]; - - if ($gcontacts[0]["created"] <= DBA::NULL_DATETIME) { - $contact['created'] = DateTimeFormat::utcNow(); - } - - $server_url = ''; - if ($force) { - $server_url = Strings::normaliseLink(Contact::getBasepath($profile)); - } - - if (($server_url == '') && ($gcontacts[0]["server_url"] != "")) { - $server_url = $gcontacts[0]["server_url"]; - } - - if (!$force && (($server_url == '') || ($gcontacts[0]["server_url"] == $gcontacts[0]["nurl"]))) { - $server_url = Strings::normaliseLink(Contact::getBasepath($profile)); - } - - if (!in_array($gcontacts[0]["network"], [Protocol::DFRN, Protocol::DIASPORA, Protocol::FEED, Protocol::OSTATUS, ""])) { - Logger::log("Profile ".$profile.": Network type ".$gcontacts[0]["network"]." can't be checked", Logger::DEBUG); - return false; - } - - if ($server_url != "") { - if (!GServer::check($server_url, $gcontacts[0]["network"], $force)) { - if ($force) { - $fields = ['last_failure' => DateTimeFormat::utcNow()]; - DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); - } - - Logger::log("Profile ".$profile.": Server ".$server_url." wasn't reachable.", Logger::DEBUG); - return false; - } - $contact['server_url'] = $server_url; - } - - if (in_array($gcontacts[0]["network"], ["", Protocol::FEED])) { - $server = q( - "SELECT `network` FROM `gserver` WHERE `nurl` = '%s' AND `network` != ''", - DBA::escape(Strings::normaliseLink($server_url)) - ); - - if ($server) { - $contact['network'] = $server[0]["network"]; - } else { - return false; - } - } - - // noscrape is really fast so we don't cache the call. - if (($server_url != "") && ($gcontacts[0]["nick"] != "")) { - // Use noscrape if possible - $server = q("SELECT `noscrape`, `network` FROM `gserver` WHERE `nurl` = '%s' AND `noscrape` != ''", DBA::escape(Strings::normaliseLink($server_url))); - - if ($server) { - $curlResult = Network::curl($server[0]["noscrape"]."/".$gcontacts[0]["nick"]); - - if ($curlResult->isSuccess() && ($curlResult->getBody() != "")) { - $noscrape = json_decode($curlResult->getBody(), true); - - if (is_array($noscrape)) { - $contact["network"] = $server[0]["network"]; - - if (isset($noscrape["fn"])) { - $contact["name"] = $noscrape["fn"]; - } - if (isset($noscrape["comm"])) { - $contact["community"] = $noscrape["comm"]; - } - if (isset($noscrape["tags"])) { - $keywords = implode(" ", $noscrape["tags"]); - if ($keywords != "") { - $contact["keywords"] = $keywords; - } - } - - $location = Profile::formatLocation($noscrape); - if ($location) { - $contact["location"] = $location; - } - if (isset($noscrape["dfrn-notify"])) { - $contact["notify"] = $noscrape["dfrn-notify"]; - } - // Remove all fields that are not present in the gcontact table - unset($noscrape["fn"]); - unset($noscrape["key"]); - unset($noscrape["homepage"]); - unset($noscrape["comm"]); - unset($noscrape["tags"]); - unset($noscrape["locality"]); - unset($noscrape["region"]); - unset($noscrape["country-name"]); - unset($noscrape["contacts"]); - unset($noscrape["dfrn-request"]); - unset($noscrape["dfrn-confirm"]); - unset($noscrape["dfrn-notify"]); - unset($noscrape["dfrn-poll"]); - - // Set the date of the last contact - /// @todo By now the function "update_gcontact" doesn't work with this field - //$contact["last_contact"] = DateTimeFormat::utcNow(); - - $contact = array_merge($contact, $noscrape); - - GContact::update($contact); - - if (!empty($noscrape["updated"])) { - $fields = ['last_contact' => DateTimeFormat::utcNow()]; - DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); - - Logger::log("Profile ".$profile." was last updated at ".$noscrape["updated"]." (noscrape)", Logger::DEBUG); - - return $noscrape["updated"]; - } - } - } - } - } - - // If we only can poll the feed, then we only do this once a while - if (!$force && !self::updateNeeded($gcontacts[0]["created"], $gcontacts[0]["updated"], $gcontacts[0]["last_failure"], $gcontacts[0]["last_contact"])) { - Logger::log("Profile ".$profile." was last updated at ".$gcontacts[0]["updated"]." (cached)", Logger::DEBUG); - - GContact::update($contact); - return $gcontacts[0]["updated"]; - } - - $data = Probe::uri($profile); - - // Is the profile link the alternate OStatus link notation? (http://domain.tld/user/4711) - // Then check the other link and delete this one - if (($data["network"] == Protocol::OSTATUS) && self::alternateOStatusUrl($profile) - && (Strings::normaliseLink($profile) == Strings::normaliseLink($data["alias"])) - && (Strings::normaliseLink($profile) != Strings::normaliseLink($data["url"])) - ) { - // Delete the old entry - DBA::delete('gcontact', ['nurl' => Strings::normaliseLink($profile)]); - - $gcontact = array_merge($gcontacts[0], $data); - - $gcontact["server_url"] = $data["baseurl"]; - - try { - $gcontact = GContact::sanitize($gcontact); - GContact::update($gcontact); - - self::lastUpdated($data["url"], $force); - } catch (Exception $e) { - Logger::log($e->getMessage(), Logger::DEBUG); - } - - Logger::log("Profile ".$profile." was deleted", Logger::DEBUG); - return false; - } - - if (($data["poll"] == "") || (in_array($data["network"], [Protocol::FEED, Protocol::PHANTOM]))) { - $fields = ['last_failure' => DateTimeFormat::utcNow()]; - DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); - - Logger::log("Profile ".$profile." wasn't reachable (profile)", Logger::DEBUG); - return false; - } - - $contact = array_merge($contact, $data); - - $contact["server_url"] = $data["baseurl"]; - - GContact::update($contact); - - $curlResult = Network::curl($data["poll"]); - - if (!$curlResult->isSuccess()) { - $fields = ['last_failure' => DateTimeFormat::utcNow()]; - DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); - - Logger::log("Profile ".$profile." wasn't reachable (no feed)", Logger::DEBUG); - return false; - } - - $doc = new DOMDocument(); - /// @TODO Avoid error supression here - @$doc->loadXML($curlResult->getBody()); - - $xpath = new DOMXPath($doc); - $xpath->registerNamespace('atom', "http://www.w3.org/2005/Atom"); - - $entries = $xpath->query('/atom:feed/atom:entry'); - - $last_updated = ""; - - foreach ($entries as $entry) { - $published_item = $xpath->query('atom:published/text()', $entry)->item(0); - $updated_item = $xpath->query('atom:updated/text()' , $entry)->item(0); - $published = isset($published_item->nodeValue) ? DateTimeFormat::utc($published_item->nodeValue) : null; - $updated = isset($updated_item->nodeValue) ? DateTimeFormat::utc($updated_item->nodeValue) : null; - - if (!isset($published) || !isset($updated)) { - Logger::notice('Invalid entry for XPath.', ['entry' => $entry, 'profile' => $profile]); - continue; - } - - if ($last_updated < $published) { - $last_updated = $published; - } - - if ($last_updated < $updated) { - $last_updated = $updated; - } - } - - // Maybe there aren't any entries. Then check if it is a valid feed - if ($last_updated == "") { - if ($xpath->query('/atom:feed')->length > 0) { - $last_updated = DBA::NULL_DATETIME; - } - } - - $fields = ['last_contact' => DateTimeFormat::utcNow()]; - - if (!empty($last_updated)) { - $fields['updated'] = $last_updated; - } - - DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); - - if (($gcontacts[0]["generation"] == 0)) { - $fields = ['generation' => 9]; - DBA::update('gcontact', $fields, ['nurl' => Strings::normaliseLink($profile)]); - } - - Logger::log("Profile ".$profile." was last updated at ".$last_updated, Logger::DEBUG); - - return $last_updated; - } - public static function updateNeeded($created, $updated, $last_failure, $last_contact) { $now = strtotime(DateTimeFormat::utcNow()); diff --git a/src/Worker/CronJobs.php b/src/Worker/CronJobs.php index 1bc3ccd0a..311ec59a5 100644 --- a/src/Worker/CronJobs.php +++ b/src/Worker/CronJobs.php @@ -16,11 +16,11 @@ use Friendica\Database\DBA; use Friendica\Database\PostUpdate; use Friendica\Model\Contact; use Friendica\Model\GContact; +use Friendica\Model\GServer; use Friendica\Model\Nodeinfo; use Friendica\Model\Photo; use Friendica\Model\User; use Friendica\Network\Probe; -use Friendica\Protocol\PortableContact; use Friendica\Util\Network; use Friendica\Util\Proxy as ProxyUtils; use Friendica\Util\Strings; @@ -254,7 +254,7 @@ class CronJobs return; } - if (!PortableContact::reachable($contact["url"])) { + if (!GServer::reachable($contact["url"])) { continue; } diff --git a/src/Worker/DiscoverPoCo.php b/src/Worker/DiscoverPoCo.php index 5ce4c97d6..d495b4c2a 100644 --- a/src/Worker/DiscoverPoCo.php +++ b/src/Worker/DiscoverPoCo.php @@ -64,7 +64,7 @@ class DiscoverPoCo if ($mode == 8) { if ($param1 != "") { - PortableContact::lastUpdated($param1, true); + GContact::updateFromProbe($param1, true); } } elseif ($mode == 7) { if (!empty($param4)) { @@ -232,7 +232,7 @@ class DiscoverPoCo continue; } // Update the contact - PortableContact::lastUpdated($jj->url); + GContact::updateFromProbe($jj->url); continue; } diff --git a/src/Worker/GProbe.php b/src/Worker/GProbe.php index 49638c7c9..3c7136347 100644 --- a/src/Worker/GProbe.php +++ b/src/Worker/GProbe.php @@ -57,7 +57,7 @@ class GProbe { } if (DBA::isResult($r)) { // Check for accessibility and do a poco discovery - if (PortableContact::lastUpdated($r[0]['url'], true) && ($r[0]["network"] == Protocol::DFRN)) { + if (GContact::updateFromProbe($r[0]['url'], true) && ($r[0]["network"] == Protocol::DFRN)) { PortableContact::loadWorker(0, 0, $r[0]['id'], str_replace('/profile/', '/poco/', $r[0]['url'])); } }