From 28f97845caecd1c51cf0d005a9dd1d3cbe9c299c Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 07:00:55 +0200 Subject: [PATCH 1/7] Some more checks that the gcontact table contains valid data --- include/Scrape.php | 5 ++++- include/socgraph.php | 21 +++++++++++++++++++-- mod/item.php | 2 +- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/include/Scrape.php b/include/Scrape.php index 84257f4811..542adc22ab 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -799,6 +799,9 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { if (substr($baseurl, -10) == "/index.php") $baseurl = str_replace("/index.php", "", $baseurl); + if ($network == "") + $network = NETWORK_PHANTOM; + $baseurl = rtrim($baseurl, "/"); if(strpos($url,'@') AND ($addr == "") AND ($network == NETWORK_DFRN)) @@ -846,7 +849,7 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { } // Only store into the cache if the value seems to be valid - if (!in_array($result['network'], array(NETWORK_PHANTOM, ""))) { + if ($result['network'] != NETWORK_PHANTOM) { Cache::set("probe_url:".$mode.":".$original_url,serialize($result), CACHE_DAY); /// @todo temporary fix - we need a real contact update function that updates only changing fields diff --git a/include/socgraph.php b/include/socgraph.php index 402fee8593..f7dd1ad93b 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1377,6 +1377,17 @@ function clean_contact_url($url) { return $new_url; } +function fix_alternate_contact_address(&$contact) { + if (($contact["network"] == NETWORK_OSTATUS) AND poco_alternate_ostatus_url($contact["url"])) { + $data = probe_url($contact["url"]); + if (!in_array($contact["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) { + $contact["url"] = $data["url"]; + $contact["addr"] = $data["addr"]; + $contact["server_url"] = $data["baseurl"]; + } + } +} + /** * @brief Fetch the gcontact id, add an entry if not existed * @@ -1387,12 +1398,15 @@ function get_gcontact_id($contact) { $gcontact_id = 0; - if ($contact["network"] == NETWORK_PHANTOM) + if (in_array($contact["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) return false; if ($contact["network"] == NETWORK_STATUSNET) $contact["network"] = NETWORK_OSTATUS; + // Replace alternate OStatus user format with the primary one + fix_alternate_contact_address($contact); + // Remove unwanted parts from the contact url (e.g. "?zrl=...") $contact["url"] = clean_contact_url($contact["url"]); @@ -1477,6 +1491,9 @@ function update_gcontact($contact) { if ($contact["network"] == NETWORK_STATUSNET) $contact["network"] = NETWORK_OSTATUS; + // Replace alternate OStatus user format with the primary one + fix_alternate_contact_address($contact); + if (!isset($contact["updated"])) $contact["updated"] = datetime_convert(); @@ -1571,7 +1588,7 @@ function update_gcontact($contact) { function update_gcontact_from_probe($url) { $data = probe_url($url); - if ($data["network"] == NETWORK_PHANTOM) + if (in_array($data["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) return; update_gcontact($data); diff --git a/mod/item.php b/mod/item.php index ffb486a7db..60e6931583 100644 --- a/mod/item.php +++ b/mod/item.php @@ -1147,7 +1147,7 @@ function handle_tag($a, &$body, &$inform, &$str_tags, $profile_uid, $tag, $netwo if (!$r) { $probed = probe_url($name); - if (isset($probed["url"])) { + if ($result['network'] != NETWORK_PHANTOM) { update_gcontact($probed); $r = q("SELECT `url`, `name`, `nick`, `network`, `alias`, `notify` FROM `gcontact` WHERE `nurl` = '%s' LIMIT 1", dbesc(normalise_link($probed["url"]))); From bc0ba8a0d7e45f53ae7a29b84e8a271cb18f1ee9 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 07:26:18 +0200 Subject: [PATCH 2/7] Added description --- include/socgraph.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/socgraph.php b/include/socgraph.php index f7dd1ad93b..d13dc1cd63 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1377,12 +1377,19 @@ function clean_contact_url($url) { return $new_url; } +/** + * @brief Replace alternate OStatus user format with the primary one + * + * @param arr $contact contact array (called by reference) + */ function fix_alternate_contact_address(&$contact) { if (($contact["network"] == NETWORK_OSTATUS) AND poco_alternate_ostatus_url($contact["url"])) { $data = probe_url($contact["url"]); - if (!in_array($contact["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) { + if ($contact["network"] == NETWORK_OSTATUS) { + logger("Fix primary url from ".$contact["url"]." to ".$data["url"], LOGGER_DEBUG); $contact["url"] = $data["url"]; $contact["addr"] = $data["addr"]; + $contact["alias"] = $data["alias"]; $contact["server_url"] = $data["baseurl"]; } } From c6fb6c559a3b0445866e49917faf4d3515d0f6b8 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 07:48:21 +0200 Subject: [PATCH 3/7] Improved logging --- include/socgraph.php | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/socgraph.php b/include/socgraph.php index d13dc1cd63..54ed62c0e5 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1374,6 +1374,9 @@ function clean_contact_url($url) { if (isset($parts["path"])) $new_url .= $parts["path"]; + if ($new_url != $url) + logger("Cleaned contact url ".$url." to ".$new_url." - Called by: ".App::callstack(), LOGGER_DEBUG); + return $new_url; } @@ -1386,7 +1389,7 @@ function fix_alternate_contact_address(&$contact) { if (($contact["network"] == NETWORK_OSTATUS) AND poco_alternate_ostatus_url($contact["url"])) { $data = probe_url($contact["url"]); if ($contact["network"] == NETWORK_OSTATUS) { - logger("Fix primary url from ".$contact["url"]." to ".$data["url"], LOGGER_DEBUG); + logger("Fix primary url from ".$contact["url"]." to ".$data["url"]." - Called by: ".App::callstack(), LOGGER_DEBUG); $contact["url"] = $data["url"]; $contact["addr"] = $data["addr"]; $contact["alias"] = $data["alias"]; @@ -1405,8 +1408,10 @@ function get_gcontact_id($contact) { $gcontact_id = 0; - if (in_array($contact["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) + if (in_array($contact["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) { + logger("Invalid network for contact url ".$contact["url"]." - Called by: ".App::callstack(), LOGGER_DEBUG); return false; + } if ($contact["network"] == NETWORK_STATUSNET) $contact["network"] = NETWORK_OSTATUS; @@ -1595,8 +1600,10 @@ function update_gcontact($contact) { function update_gcontact_from_probe($url) { $data = probe_url($url); - if (in_array($data["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) + if (in_array($data["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) { + logger("Invalid network for contact url ".$data["url"]." - Called by: ".App::callstack(), LOGGER_DEBUG); return; + } update_gcontact($data); } From 919728126a434c4101bef45a98d37410a63f51fa Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 07:58:52 +0200 Subject: [PATCH 4/7] Bugfix: We checked in the wrong array ... --- include/gprobe.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gprobe.php b/include/gprobe.php index dfa9137d7d..bd8503835b 100644 --- a/include/gprobe.php +++ b/include/gprobe.php @@ -58,7 +58,7 @@ function gprobe_run(&$argv, &$argc){ if (is_null($result)) Cache::set("gprobe:".$urlparts["host"],serialize($arr)); - if (!in_array($result["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) + if (!in_array($arr["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) update_gcontact($arr); $r = q("SELECT `id`, `url`, `network` FROM `gcontact` WHERE `nurl` = '%s' ORDER BY `id` LIMIT 1", From ea3ecd83eace8f21b6ed6fc42764db70a0b38da5 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 08:27:11 +0200 Subject: [PATCH 5/7] We have to store feeds in the gcontact table ... --- include/socgraph.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/socgraph.php b/include/socgraph.php index 54ed62c0e5..9bbafecfff 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1408,7 +1408,7 @@ function get_gcontact_id($contact) { $gcontact_id = 0; - if (in_array($contact["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) { + if (in_array($contact["network"], array(NETWORK_PHANTOM))) { logger("Invalid network for contact url ".$contact["url"]." - Called by: ".App::callstack(), LOGGER_DEBUG); return false; } @@ -1600,7 +1600,7 @@ function update_gcontact($contact) { function update_gcontact_from_probe($url) { $data = probe_url($url); - if (in_array($data["network"], array(NETWORK_FEED, NETWORK_PHANTOM))) { + if (in_array($data["network"], array(NETWORK_PHANTOM))) { logger("Invalid network for contact url ".$data["url"]." - Called by: ".App::callstack(), LOGGER_DEBUG); return; } From 7f8aee2baa3852b042a6875d00dd39d868973a2c Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 09:31:36 +0200 Subject: [PATCH 6/7] Don't clean the contact url of feeds --- include/socgraph.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/socgraph.php b/include/socgraph.php index 9bbafecfff..be11894d9e 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1420,7 +1420,8 @@ function get_gcontact_id($contact) { fix_alternate_contact_address($contact); // Remove unwanted parts from the contact url (e.g. "?zrl=...") - $contact["url"] = clean_contact_url($contact["url"]); + if ($contact["network"] != NETWORK_FEED) + $contact["url"] = clean_contact_url($contact["url"]); $r = q("SELECT `id` FROM `gcontact` WHERE `nurl` = '%s' ORDER BY `id` LIMIT 2", dbesc(normalise_link($contact["url"]))); From 378d444fc371fde7ee1df27eeac95e5f8fba0fcb Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 20 Apr 2016 09:38:06 +0200 Subject: [PATCH 7/7] Only do the cleaning on specific networks --- include/socgraph.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/socgraph.php b/include/socgraph.php index be11894d9e..b7d6f29344 100644 --- a/include/socgraph.php +++ b/include/socgraph.php @@ -1420,7 +1420,7 @@ function get_gcontact_id($contact) { fix_alternate_contact_address($contact); // Remove unwanted parts from the contact url (e.g. "?zrl=...") - if ($contact["network"] != NETWORK_FEED) + if (in_array($contact["network"], array(NETWORK_DFRN, NETWORK_DIASPORA, NETWORK_OSTATUS))) $contact["url"] = clean_contact_url($contact["url"]); $r = q("SELECT `id` FROM `gcontact` WHERE `nurl` = '%s' ORDER BY `id` LIMIT 2",