From 1f28cbd2c6bb9910a6f9c030320b081693251912 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 18 Nov 2017 11:02:46 +0000 Subject: [PATCH] And now DiscoverPoCo.php --- include/discover_poco.php | 299 ------------------------------ mod/dirfind.php | 2 +- src/Protocol/PortableContact.php | 10 +- src/Worker/Cron.php | 8 +- src/Worker/DiscoverPoCo.php | 304 +++++++++++++++++++++++++++++++ 5 files changed, 314 insertions(+), 309 deletions(-) delete mode 100644 include/discover_poco.php create mode 100644 src/Worker/DiscoverPoCo.php diff --git a/include/discover_poco.php b/include/discover_poco.php deleted file mode 100644 index fd72d6940..000000000 --- a/include/discover_poco.php +++ /dev/null @@ -1,299 +0,0 @@ -: Searches for "search pattern" in the directory. "search pattern" is url encoded. - - checkcontact: Updates gcontact entries - - suggestions: Discover other servers for their contacts. - - server : Searches for the poco server list. "poco url" is base64 encoded. - - update_server: Frequently check the first 250 servers for vitality. - - update_server_directory: Discover the given server id for their contacts - - PortableContact::load: Load POCO data from a given POCO address - - check_profile: Update remote profile data - */ - - if (($argc > 2) && ($argv[1] == "dirsearch")) { - $search = urldecode($argv[2]); - $mode = 1; - } elseif (($argc == 2) && ($argv[1] == "checkcontact")) { - $mode = 2; - } elseif (($argc == 2) && ($argv[1] == "suggestions")) { - $mode = 3; - } elseif (($argc == 3) && ($argv[1] == "server")) { - $mode = 4; - } elseif (($argc == 2) && ($argv[1] == "update_server")) { - $mode = 5; - } elseif (($argc == 3) && ($argv[1] == "update_server_directory")) { - $mode = 6; - } elseif (($argc > 5) && ($argv[1] == "load")) { - $mode = 7; - } elseif (($argc == 3) && ($argv[1] == "check_profile")) { - $mode = 8; - } elseif ($argc == 1) { - $search = ""; - $mode = 0; - } else { - logger("Unknown or missing parameter ".$argv[1]."\n"); - return; - } - - logger('start '.$search); - - if ($mode == 8) { - if ($argv[2] != "") { - PortableContact::lastUpdated($argv[2], true); - } - } elseif ($mode == 7) { - if ($argc == 6) { - $url = $argv[5]; - } else { - $url = ''; - } - PortableContact::load(intval($argv[2]), intval($argv[3]), intval($argv[4]), $url); - } elseif ($mode == 6) { - PortableContact::discoverSingleServer(intval($argv[2])); - } elseif ($mode == 5) { - update_server(); - } elseif ($mode == 4) { - $server_url = $argv[2]; - if ($server_url == "") { - return; - } - $server_url = filter_var($server_url, FILTER_SANITIZE_URL); - if (substr(normalise_link($server_url), 0, 7) != "http://") { - return; - } - $result = "Checking server ".$server_url." - "; - $ret = PortableContact::checkServer($server_url); - if ($ret) { - $result .= "success"; - } else { - $result .= "failed"; - } - logger($result, LOGGER_DEBUG); - } elseif ($mode == 3) { - GlobalContact::updateSuggestions(); - } elseif (($mode == 2) && Config::get('system', 'poco_completion')) { - discover_users(); - } elseif (($mode == 1) && ($search != "") && Config::get('system', 'poco_local_search')) { - discover_directory($search); - gs_search_user($search); - } elseif (($mode == 0) && ($search == "") && (Config::get('system', 'poco_discovery') > 0)) { - // Query Friendica and Hubzilla servers for their users - PortableContact::discover(); - - // Query GNU Social servers for their users ("statistics" addon has to be enabled on the GS server) - if (!Config::get('system', 'ostatus_disabled')) { - GlobalContact::discoverGsUsers(); - } - } - - logger('end '.$search); - - return; -} - -/** - * @brief Updates the first 250 servers - * - */ -function update_server() { - $r = q("SELECT `url`, `created`, `last_failure`, `last_contact` FROM `gserver` ORDER BY rand()"); - - if (!DBM::is_result($r)) { - return; - } - - $updated = 0; - - foreach ($r AS $server) { - if (!PortableContact::updateNeeded($server["created"], "", $server["last_failure"], $server["last_contact"])) { - continue; - } - logger('Update server status for server '.$server["url"], LOGGER_DEBUG); - - Worker::add(PRIORITY_LOW, "discover_poco", "server", $server["url"]); - - if (++$updated > 250) { - return; - } - } -} - -function discover_users() { - logger("Discover users", LOGGER_DEBUG); - - $starttime = time(); - - $users = q("SELECT `url`, `created`, `updated`, `last_failure`, `last_contact`, `server_url`, `network` FROM `gcontact` - WHERE `last_contact` < UTC_TIMESTAMP - INTERVAL 1 MONTH AND - `last_failure` < UTC_TIMESTAMP - INTERVAL 1 MONTH AND - `network` IN ('%s', '%s', '%s', '%s', '') ORDER BY rand()", - dbesc(NETWORK_DFRN), dbesc(NETWORK_DIASPORA), - dbesc(NETWORK_OSTATUS), dbesc(NETWORK_FEED)); - - if (!$users) { - return; - } - $checked = 0; - - foreach ($users AS $user) { - - $urlparts = parse_url($user["url"]); - if (!isset($urlparts["scheme"])) { - q("UPDATE `gcontact` SET `network` = '%s' WHERE `nurl` = '%s'", - dbesc(NETWORK_PHANTOM), dbesc(normalise_link($user["url"]))); - continue; - } - - if (in_array($urlparts["host"], array("www.facebook.com", "facebook.com", "twitter.com", - "identi.ca", "alpha.app.net"))) { - $networks = array("www.facebook.com" => NETWORK_FACEBOOK, - "facebook.com" => NETWORK_FACEBOOK, - "twitter.com" => NETWORK_TWITTER, - "identi.ca" => NETWORK_PUMPIO, - "alpha.app.net" => NETWORK_APPNET); - - q("UPDATE `gcontact` SET `network` = '%s' WHERE `nurl` = '%s'", - dbesc($networks[$urlparts["host"]]), dbesc(normalise_link($user["url"]))); - continue; - } - - $server_url = PortableContact::detectServer($user["url"]); - $force_update = false; - - if ($user["server_url"] != "") { - - $force_update = (normalise_link($user["server_url"]) != normalise_link($server_url)); - - $server_url = $user["server_url"]; - } - - if ((($server_url == "") && ($user["network"] == NETWORK_FEED)) || $force_update || PortableContact::checkServer($server_url, $user["network"])) { - logger('Check profile '.$user["url"]); - Worker::add(PRIORITY_LOW, "discover_poco", "check_profile", $user["url"]); - - if (++$checked > 100) { - return; - } - } else { - q("UPDATE `gcontact` SET `last_failure` = '%s' WHERE `nurl` = '%s'", - dbesc(datetime_convert()), dbesc(normalise_link($user["url"]))); - } - - // Quit the loop after 3 minutes - if (time() > ($starttime + 180)) { - return; - } - } -} - -function discover_directory($search) { - - $data = Cache::get("dirsearch:".$search); - if (!is_null($data)) { - // Only search for the same item every 24 hours - if (time() < $data + (60 * 60 * 24)) { - logger("Already searched for ".$search." in the last 24 hours", LOGGER_DEBUG); - return; - } - } - - $x = fetch_url(get_server()."/lsearch?p=1&n=500&search=".urlencode($search)); - $j = json_decode($x); - - if (count($j->results)) { - foreach ($j->results as $jj) { - // Check if the contact already exists - $exists = q("SELECT `id`, `last_contact`, `last_failure`, `updated` FROM `gcontact` WHERE `nurl` = '%s'", normalise_link($jj->url)); - if (DBM::is_result($exists)) { - logger("Profile ".$jj->url." already exists (".$search.")", LOGGER_DEBUG); - - if (($exists[0]["last_contact"] < $exists[0]["last_failure"]) && - ($exists[0]["updated"] < $exists[0]["last_failure"])) { - continue; - } - // Update the contact - PortableContact::lastUpdated($jj->url); - continue; - } - - $server_url = PortableContact::detectServer($jj->url); - if ($server_url != '') { - if (!PortableContact::checkServer($server_url)) { - logger("Friendica server ".$server_url." doesn't answer.", LOGGER_DEBUG); - continue; - } - logger("Friendica server ".$server_url." seems to be okay.", LOGGER_DEBUG); - } - - $data = Probe::uri($jj->url); - if ($data["network"] == NETWORK_DFRN) { - logger("Profile ".$jj->url." is reachable (".$search.")", LOGGER_DEBUG); - logger("Add profile ".$jj->url." to local directory (".$search.")", LOGGER_DEBUG); - - if ($jj->tags != "") { - $data["keywords"] = $jj->tags; - } - - $data["server_url"] = $data["baseurl"]; - - GlobalContact::update($data); - } else { - logger("Profile ".$jj->url." is not responding or no Friendica contact - but network ".$data["network"], LOGGER_DEBUG); - } - } - } - Cache::set("dirsearch:".$search, time(), CACHE_DAY); -} - -/** - * @brief Search for GNU Social user with gstools.org - * - * @param str $search User name - */ -function gs_search_user($search) { - - // Currently disabled, since the service isn't available anymore. - // It is not removed since I hope that there will be a successor. - return false; - - $a = get_app(); - - $url = "http://gstools.org/api/users_search/".urlencode($search); - - $result = z_fetch_url($url); - if (!$result["success"]) { - return false; - } - - $contacts = json_decode($result["body"]); - - if ($contacts->status == 'ERROR') { - return false; - } - - /// @TODO AS is considered as a notation for constants (as they usually being written all upper-case) - /// @TODO find all those and convert to all lower-case which is a keyword then - foreach ($contacts->data AS $user) { - $contact = Probe::uri($user->site_address."/".$user->name); - if ($contact["network"] != NETWORK_PHANTOM) { - $contact["about"] = $user->description; - GlobalContact::update($contact); - } - } -} diff --git a/mod/dirfind.php b/mod/dirfind.php index 84fe6f0ff..89df7c885 100644 --- a/mod/dirfind.php +++ b/mod/dirfind.php @@ -169,7 +169,7 @@ function dirfind_content(App $a, $prefix = "") { } // Add found profiles from the global directory to the local directory - Worker::add(PRIORITY_LOW, 'discover_poco', "dirsearch", urlencode($search)); + Worker::add(PRIORITY_LOW, 'DiscoverPoCo', "dirsearch", urlencode($search)); } else { $p = (($a->pager['page'] != 1) ? '&p=' . $a->pager['page'] : ''); diff --git a/src/Protocol/PortableContact.php b/src/Protocol/PortableContact.php index 826d92159..1c0ce15a8 100644 --- a/src/Protocol/PortableContact.php +++ b/src/Protocol/PortableContact.php @@ -51,7 +51,7 @@ class PortableContact public static function loadWorker($cid, $uid = 0, $zcid = 0, $url = null) { // Call the function "load" via the worker - Worker::add(PRIORITY_LOW, "discover_poco", "load", (int)$cid, (int)$uid, (int)$zcid, $url); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "load", (int)$cid, (int)$uid, (int)$zcid, $url); } /** @@ -1317,7 +1317,7 @@ class PortableContact $r = q("SELECT `nurl` FROM `gserver` WHERE `nurl` = '%s'", dbesc(normalise_link($server_url))); if (!DBM::is_result($r)) { logger("Call server check for server ".$server_url, LOGGER_DEBUG); - Worker::add(PRIORITY_LOW, "discover_poco", "server", $server_url); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "server", $server_url); } } } @@ -1340,7 +1340,7 @@ class PortableContact $servers = json_decode($serverdata); foreach ($servers->pods as $server) { - Worker::add(PRIORITY_LOW, "discover_poco", "server", "https://".$server->host); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "server", "https://".$server->host); } } @@ -1353,7 +1353,7 @@ class PortableContact foreach ($servers as $server) { $url = (is_null($server->https_score) ? 'http' : 'https').'://'.$server->name; - Worker::add(PRIORITY_LOW, "discover_poco", "server", $url); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "server", $url); } } } @@ -1462,7 +1462,7 @@ class PortableContact } logger('Update directory from server '.$server['url'].' with ID '.$server['id'], LOGGER_DEBUG); - Worker::add(PRIORITY_LOW, "discover_poco", "update_server_directory", (int)$server['id']); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "update_server_directory", (int)$server['id']); if (!$complete && (--$no_of_queries == 0)) { break; diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php index 21b76903b..b3f0f1f69 100644 --- a/src/Worker/Cron.php +++ b/src/Worker/Cron.php @@ -42,10 +42,10 @@ Class Cron { Worker::add(PRIORITY_NEGLIGIBLE, "queue"); // run the process to discover global contacts in the background - Worker::add(PRIORITY_LOW, "discover_poco"); + Worker::add(PRIORITY_LOW, "DiscoverPoCo"); // run the process to update locally stored global contacts in the background - Worker::add(PRIORITY_LOW, "discover_poco", "checkcontact"); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "checkcontact"); // Expire and remove user entries Worker::add(PRIORITY_MEDIUM, "CronJobs", "expire_and_remove_users"); @@ -73,9 +73,9 @@ Class Cron { Worker::add(PRIORITY_LOW, "CronJobs", "update_contact_birthdays"); - Worker::add(PRIORITY_LOW, "discover_poco", "update_server"); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "update_server"); - Worker::add(PRIORITY_LOW, "discover_poco", "suggestions"); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "suggestions"); Config::set('system', 'last_expire_day', $d2); diff --git a/src/Worker/DiscoverPoCo.php b/src/Worker/DiscoverPoCo.php new file mode 100644 index 000000000..b54c61a6f --- /dev/null +++ b/src/Worker/DiscoverPoCo.php @@ -0,0 +1,304 @@ +: Searches for "search pattern" in the directory. "search pattern" is url encoded. + - checkcontact: Updates gcontact entries + - suggestions: Discover other servers for their contacts. + - server : Searches for the poco server list. "poco url" is base64 encoded. + - update_server: Frequently check the first 250 servers for vitality. + - update_server_directory: Discover the given server id for their contacts + - PortableContact::load: Load POCO data from a given POCO address + - check_profile: Update remote profile data + */ + + if ($command == "dirsearch") { + $search = urldecode($param1); + $mode = 1; + } elseif ($command == "checkcontact") { + $mode = 2; + } elseif ($command == "suggestions") { + $mode = 3; + } elseif ($command == "server") { + $mode = 4; + } elseif ($command == "update_server") { + $mode = 5; + } elseif ($command == "update_server_directory") { + $mode = 6; + } elseif ($command == "load") { + $mode = 7; + } elseif ($command == "check_profile") { + $mode = 8; + } elseif ($command == '') { + $search = ""; + $mode = 0; + } else { + logger("Unknown or missing parameter ".$command."\n"); + return; + } + + logger('start '.$search); + + if ($mode == 8) { + if ($param1 != "") { + PortableContact::lastUpdated($param1, true); + } + } elseif ($mode == 7) { + if (!empty($param4)) { + $url = $param4; + } else { + $url = ''; + } + PortableContact::load(intval($param1), intval($param2), intval($param3), $url); + } elseif ($mode == 6) { + PortableContact::discoverSingleServer(intval($param1)); + } elseif ($mode == 5) { + self::updateServer(); + } elseif ($mode == 4) { + $server_url = $param1; + if ($server_url == "") { + return; + } + $server_url = filter_var($server_url, FILTER_SANITIZE_URL); + if (substr(normalise_link($server_url), 0, 7) != "http://") { + return; + } + $result = "Checking server ".$server_url." - "; + $ret = PortableContact::checkServer($server_url); + if ($ret) { + $result .= "success"; + } else { + $result .= "failed"; + } + logger($result, LOGGER_DEBUG); + } elseif ($mode == 3) { + GlobalContact::updateSuggestions(); + } elseif (($mode == 2) && Config::get('system', 'poco_completion')) { + self::discoverUsers(); + } elseif (($mode == 1) && ($search != "") && Config::get('system', 'poco_local_search')) { + self::discoverDirectory($search); + self::gsSearchUser($search); + } elseif (($mode == 0) && ($search == "") && (Config::get('system', 'poco_discovery') > 0)) { + // Query Friendica and Hubzilla servers for their users + PortableContact::discover(); + + // Query GNU Social servers for their users ("statistics" addon has to be enabled on the GS server) + if (!Config::get('system', 'ostatus_disabled')) { + GlobalContact::discoverGsUsers(); + } + } + + logger('end '.$search); + + return; + } + + /** + * @brief Updates the first 250 servers + * + */ + private static function updateServer() { + $r = q("SELECT `url`, `created`, `last_failure`, `last_contact` FROM `gserver` ORDER BY rand()"); + + if (!DBM::is_result($r)) { + return; + } + + $updated = 0; + + foreach ($r AS $server) { + if (!PortableContact::updateNeeded($server["created"], "", $server["last_failure"], $server["last_contact"])) { + continue; + } + logger('Update server status for server '.$server["url"], LOGGER_DEBUG); + + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "server", $server["url"]); + + if (++$updated > 250) { + return; + } + } + } + + private static function discoverUsers() { + logger("Discover users", LOGGER_DEBUG); + + $starttime = time(); + + $users = q("SELECT `url`, `created`, `updated`, `last_failure`, `last_contact`, `server_url`, `network` FROM `gcontact` + WHERE `last_contact` < UTC_TIMESTAMP - INTERVAL 1 MONTH AND + `last_failure` < UTC_TIMESTAMP - INTERVAL 1 MONTH AND + `network` IN ('%s', '%s', '%s', '%s', '') ORDER BY rand()", + dbesc(NETWORK_DFRN), dbesc(NETWORK_DIASPORA), + dbesc(NETWORK_OSTATUS), dbesc(NETWORK_FEED)); + + if (!$users) { + return; + } + $checked = 0; + + foreach ($users AS $user) { + + $urlparts = parse_url($user["url"]); + if (!isset($urlparts["scheme"])) { + q("UPDATE `gcontact` SET `network` = '%s' WHERE `nurl` = '%s'", + dbesc(NETWORK_PHANTOM), dbesc(normalise_link($user["url"]))); + continue; + } + + if (in_array($urlparts["host"], array("www.facebook.com", "facebook.com", "twitter.com", + "identi.ca", "alpha.app.net"))) { + $networks = array("www.facebook.com" => NETWORK_FACEBOOK, + "facebook.com" => NETWORK_FACEBOOK, + "twitter.com" => NETWORK_TWITTER, + "identi.ca" => NETWORK_PUMPIO, + "alpha.app.net" => NETWORK_APPNET); + + q("UPDATE `gcontact` SET `network` = '%s' WHERE `nurl` = '%s'", + dbesc($networks[$urlparts["host"]]), dbesc(normalise_link($user["url"]))); + continue; + } + + $server_url = PortableContact::detectServer($user["url"]); + $force_update = false; + + if ($user["server_url"] != "") { + + $force_update = (normalise_link($user["server_url"]) != normalise_link($server_url)); + + $server_url = $user["server_url"]; + } + + if ((($server_url == "") && ($user["network"] == NETWORK_FEED)) || $force_update || PortableContact::checkServer($server_url, $user["network"])) { + logger('Check profile '.$user["url"]); + Worker::add(PRIORITY_LOW, "DiscoverPoCo", "check_profile", $user["url"]); + + if (++$checked > 100) { + return; + } + } else { + q("UPDATE `gcontact` SET `last_failure` = '%s' WHERE `nurl` = '%s'", + dbesc(datetime_convert()), dbesc(normalise_link($user["url"]))); + } + + // Quit the loop after 3 minutes + if (time() > ($starttime + 180)) { + return; + } + } + } + + private static function discoverDirectory($search) { + + $data = Cache::get("dirsearch:".$search); + if (!is_null($data)) { + // Only search for the same item every 24 hours + if (time() < $data + (60 * 60 * 24)) { + logger("Already searched for ".$search." in the last 24 hours", LOGGER_DEBUG); + return; + } + } + + $x = fetch_url(get_server()."/lsearch?p=1&n=500&search=".urlencode($search)); + $j = json_decode($x); + + if (count($j->results)) { + foreach ($j->results as $jj) { + // Check if the contact already exists + $exists = q("SELECT `id`, `last_contact`, `last_failure`, `updated` FROM `gcontact` WHERE `nurl` = '%s'", normalise_link($jj->url)); + if (DBM::is_result($exists)) { + logger("Profile ".$jj->url." already exists (".$search.")", LOGGER_DEBUG); + + if (($exists[0]["last_contact"] < $exists[0]["last_failure"]) && + ($exists[0]["updated"] < $exists[0]["last_failure"])) { + continue; + } + // Update the contact + PortableContact::lastUpdated($jj->url); + continue; + } + + $server_url = PortableContact::detectServer($jj->url); + if ($server_url != '') { + if (!PortableContact::checkServer($server_url)) { + logger("Friendica server ".$server_url." doesn't answer.", LOGGER_DEBUG); + continue; + } + logger("Friendica server ".$server_url." seems to be okay.", LOGGER_DEBUG); + } + + $data = Probe::uri($jj->url); + if ($data["network"] == NETWORK_DFRN) { + logger("Profile ".$jj->url." is reachable (".$search.")", LOGGER_DEBUG); + logger("Add profile ".$jj->url." to local directory (".$search.")", LOGGER_DEBUG); + + if ($jj->tags != "") { + $data["keywords"] = $jj->tags; + } + + $data["server_url"] = $data["baseurl"]; + + GlobalContact::update($data); + } else { + logger("Profile ".$jj->url." is not responding or no Friendica contact - but network ".$data["network"], LOGGER_DEBUG); + } + } + } + Cache::set("dirsearch:".$search, time(), CACHE_DAY); + } + + /** + * @brief Search for GNU Social user with gstools.org + * + * @param str $search User name + */ + private static function gsSearchUser($search) { + + // Currently disabled, since the service isn't available anymore. + // It is not removed since I hope that there will be a successor. + return false; + + $a = get_app(); + + $url = "http://gstools.org/api/users_search/".urlencode($search); + + $result = z_fetch_url($url); + if (!$result["success"]) { + return false; + } + + $contacts = json_decode($result["body"]); + + if ($contacts->status == 'ERROR') { + return false; + } + + /// @TODO AS is considered as a notation for constants (as they usually being written all upper-case) + /// @TODO find all those and convert to all lower-case which is a keyword then + foreach ($contacts->data AS $user) { + $contact = Probe::uri($user->site_address."/".$user->name); + if ($contact["network"] != NETWORK_PHANTOM) { + $contact["about"] = $user->description; + GlobalContact::update($contact); + } + } + } +}