From 24afcdd5dd6354fb34a37aec82b6144f4a76840c Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Thu, 24 Mar 2016 08:35:06 +0100 Subject: [PATCH 1/4] Scrape: Always take the first alias --- include/Scrape.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Scrape.php b/include/Scrape.php index e8e9a97a16..03d21047e7 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -444,7 +444,7 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { if($link['@attributes']['rel'] === 'alias') { if(strpos($link['@attributes']['href'],'@') === false) { if(isset($profile)) { - if($link['@attributes']['href'] !== $profile) + if(($link['@attributes']['href'] !== $profile) AND ($alias == "")) $alias = unamp($link['@attributes']['href']); } else From 4a5a964d8959883b73c541af1fe6474fd835e1b9 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Wed, 30 Mar 2016 12:46:10 +0200 Subject: [PATCH 2/4] "Scrape" now respects the new url formats with "index.php" --- include/Scrape.php | 49 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/include/Scrape.php b/include/Scrape.php index 03d21047e7..deff0b080f 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -356,7 +356,7 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { $result = array(); - if(! $url) + if (!$url) return $result; $result = Cache::get("probe_url:".$mode.":".$url); @@ -365,6 +365,7 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { return $result; } + $original_url = $url; $network = null; $diaspora = false; $diaspora_base = ''; @@ -393,7 +394,12 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { else $links = lrdd($url); - if(count($links)) { + if ((count($links) == 0) AND strstr($url, "/index.php")) { + $url = str_replace("/index.php", "", $url); + $links = lrdd($url); + } + + if (count($links)) { $has_lrdd = true; logger('probe_url: found lrdd links: ' . print_r($links,true), LOGGER_DATA); @@ -440,12 +446,21 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { // aliases, let's hope we're lucky and get one that matches the feed author-uri because // otherwise we're screwed. + $backup_alias = ""; + foreach($links as $link) { if($link['@attributes']['rel'] === 'alias') { if(strpos($link['@attributes']['href'],'@') === false) { if(isset($profile)) { - if(($link['@attributes']['href'] !== $profile) AND ($alias == "")) - $alias = unamp($link['@attributes']['href']); + $alias_url = $link['@attributes']['href']; + + if(($alias_url !== $profile) AND ($backup_alias == "") AND + ($alias_url !== str_replace("/index.php", "", $profile))) + $backup_alias = $alias_url; + + if(($alias_url !== $profile) AND !strstr($alias_url, "index.php") AND + ($alias_url !== str_replace("/index.php", "", $profile))) + $alias = $alias_url; } else $profile = unamp($link['@attributes']['href']); @@ -453,6 +468,9 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { } } + if ($alias == "") + $alias = $backup_alias; + // If the profile is different from the url then the url is abviously an alias if (($alias == "") AND ($profile != "") AND !$at_addr AND (normalise_link($profile) != normalise_link($url))) $alias = $url; @@ -769,6 +787,9 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { if (($baseurl == "") AND ($poll != "")) $baseurl = matching_url(normalise_link($profile), normalise_link($poll)); + if (substr($baseurl, -10) == "/index.php") + $baseurl = str_replace("/index.php", "", $baseurl); + $baseurl = rtrim($baseurl, "/"); if(strpos($url,'@') AND ($addr == "") AND ($network == NETWORK_DFRN)) @@ -816,8 +837,24 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { } // Only store into the cache if the value seems to be valid - if ($result['network'] != NETWORK_PHANTOM) - Cache::set("probe_url:".$mode.":".$url,serialize($result), CACHE_DAY); + if ($result['network'] != NETWORK_PHANTOM) { + Cache::set("probe_url:".$mode.":".$original_url,serialize($result), CACHE_DAY); + + /// @todo temporary fix - we need a real contact update function that updates only changing fields + /// The biggest problem is the avatar picture that could have a reduced image size. + /// It should only be updated if the existing picture isn't existing anymore. + if (($result['network'] != NETWORK_FEED) AND $result["addr"] AND $result["name"] AND $result["nick"]) + q("UPDATE `contact` SET `addr` = '%s', `alias` = '%s', `name` = '%s', `nick` = '%s', + `name-date` = '%s', `uri-date` = '%s' WHERE `nurl` = '%s' AND NOT `self`", + dbesc($result["addr"]), + dbesc($result["alias"]), + dbesc($result["name"]), + dbesc($result["nick"]), + dbesc(datetime_convert()), + dbesc(datetime_convert()), + dbesc(normalise_link($result['url'])) + ); + } return $result; } From eeb462cd0460d029a8b9e29f3dd122eb16befec1 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 1 Apr 2016 21:41:37 +0200 Subject: [PATCH 3/4] Only update the contact entry with uid=0 --- include/Scrape.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Scrape.php b/include/Scrape.php index deff0b080f..3fead0c415 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -845,7 +845,7 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { /// It should only be updated if the existing picture isn't existing anymore. if (($result['network'] != NETWORK_FEED) AND $result["addr"] AND $result["name"] AND $result["nick"]) q("UPDATE `contact` SET `addr` = '%s', `alias` = '%s', `name` = '%s', `nick` = '%s', - `name-date` = '%s', `uri-date` = '%s' WHERE `nurl` = '%s' AND NOT `self`", + `name-date` = '%s', `uri-date` = '%s' WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", dbesc($result["addr"]), dbesc($result["alias"]), dbesc($result["name"]), From bc05984786547a1440261be932d8d7f5498d74ae Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Sat, 2 Apr 2016 09:06:10 +0200 Subject: [PATCH 4/4] Only update contact when scrape runs with "probe_normal" --- include/Scrape.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/Scrape.php b/include/Scrape.php index 3fead0c415..9913f360d6 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -843,15 +843,15 @@ function probe_url($url, $mode = PROBE_NORMAL, $level = 1) { /// @todo temporary fix - we need a real contact update function that updates only changing fields /// The biggest problem is the avatar picture that could have a reduced image size. /// It should only be updated if the existing picture isn't existing anymore. - if (($result['network'] != NETWORK_FEED) AND $result["addr"] AND $result["name"] AND $result["nick"]) + if (($result['network'] != NETWORK_FEED) AND ($mode == PROBE_NORMAL) AND + $result["addr"] AND $result["name"] AND $result["nick"]) q("UPDATE `contact` SET `addr` = '%s', `alias` = '%s', `name` = '%s', `nick` = '%s', - `name-date` = '%s', `uri-date` = '%s' WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", + `success_update` = '%s' WHERE `nurl` = '%s' AND NOT `self` AND `uid` = 0", dbesc($result["addr"]), dbesc($result["alias"]), dbesc($result["name"]), dbesc($result["nick"]), dbesc(datetime_convert()), - dbesc(datetime_convert()), dbesc(normalise_link($result['url'])) ); }