From 896fd3fc91204be4e1032469e77fceb187c6ddd3 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 14 Jul 2022 05:39:25 +0000 Subject: [PATCH] Use the redirect value --- src/Model/GServer.php | 92 +++++++++++++++++++++++-------------------- src/Util/Network.php | 23 ++++++++--- 2 files changed, 66 insertions(+), 49 deletions(-) diff --git a/src/Model/GServer.php b/src/Model/GServer.php index fc70b4fe47..3ea07dbc6f 100644 --- a/src/Model/GServer.php +++ b/src/Model/GServer.php @@ -44,8 +44,8 @@ use Friendica\Util\JsonLD; use Friendica\Util\Network; use Friendica\Util\Strings; use Friendica\Util\XML; -use GuzzleHttp\Exception\TransferException; use Friendica\Network\HTTPException; +use GuzzleHttp\Psr7\Uri; /** * This class handles GServer related functions @@ -311,7 +311,7 @@ class GServer unset($urlparts['pass']); unset($urlparts['query']); unset($urlparts['fragment']); - return Network::unparseURL($urlparts); + return (string)Uri::fromParts($urlparts); } /** @@ -337,50 +337,56 @@ class GServer return false; } - if (!Network::isUrlValid($url)) { - self::setFailure($url); - return false; - } - // If the URL missmatches, then we mark the old entry as failure - if (Strings::normaliseLink($url) != Strings::normaliseLink($original_url)) { + if (!Strings::compareLink($url, $original_url)) { self::setFailure($original_url); - self::detect($url, $network, $only_nodeinfo); - return false; - } - - // On a redirect follow the new host but mark the old one as failure - try { - $finalurl = rtrim(DI::httpClient()->finalUrl($url), '/'); - } catch (TransferException $exception) { - Logger::notice('Error fetching final URL.', ['url' => $url, 'exception' => $exception]); - self::setFailure($url); - return false; - } - - if (empty($finalurl)) { - Logger::notice('Empty redirected URL.', ['url' => $url]); - return false; - } - - // We only follow redirects when the path stays the same or the target url has no path. - // Some systems have got redirects on their landing page to a single account page. This check handles it. - if (((parse_url($url, PHP_URL_HOST) != parse_url($finalurl, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) == parse_url($finalurl, PHP_URL_PATH))) || - (((parse_url($url, PHP_URL_HOST) != parse_url($finalurl, PHP_URL_HOST)) || (parse_url($url, PHP_URL_PATH) != parse_url($finalurl, PHP_URL_PATH))) && empty(parse_url($finalurl, PHP_URL_PATH)))) { - Logger::info('Found redirect. Mark old entry as failure', ['old' => $url, 'new' => $finalurl]); - self::setFailure($url); - self::detect($finalurl, $network, $only_nodeinfo); - return false; - } - - if ((parse_url($url, PHP_URL_HOST) == parse_url($finalurl, PHP_URL_HOST)) && - (parse_url($url, PHP_URL_PATH) == parse_url($finalurl, PHP_URL_PATH)) && - (parse_url($url, PHP_URL_SCHEME) != parse_url($finalurl, PHP_URL_SCHEME))) { - if (!Network::isUrlValid($finalurl)) { - self::setFailure($finalurl); - } else { - $url = $finalurl; + if (!self::getID($url, true)) { + self::detect($url, $network, $only_nodeinfo); } + return false; + } + + $valid_url = Network::isUrlValid($url); + if (!$valid_url) { + self::setFailure($url); + return false; + } else { + $valid_url = rtrim($valid_url, '/'); + } + + if (!Strings::compareLink($url, $valid_url)) { + // We only follow redirects when the path stays the same or the target url has no path. + // Some systems have got redirects on their landing page to a single account page. This check handles it. + if (((parse_url($url, PHP_URL_HOST) != parse_url($valid_url, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) == parse_url($valid_url, PHP_URL_PATH))) || + (((parse_url($url, PHP_URL_HOST) != parse_url($valid_url, PHP_URL_HOST)) || (parse_url($url, PHP_URL_PATH) != parse_url($valid_url, PHP_URL_PATH))) && empty(parse_url($valid_url, PHP_URL_PATH)))) { + Logger::debug('Found redirect. Mark old entry as failure', ['old' => $url, 'new' => $valid_url]); + self::setFailure($url); + if (!self::getID($valid_url, true)) { + self::detect($valid_url, $network, $only_nodeinfo); + } + return false; + } + + if ((parse_url($url, PHP_URL_HOST) != parse_url($valid_url, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) != parse_url($valid_url, PHP_URL_PATH)) && + (parse_url($url, PHP_URL_PATH) == '')) { + Logger::debug('Found redirect. Mark old entry as failure and redirect to the basepath.', ['old' => $url, 'new' => $valid_url]); + $parts = parse_url($valid_url); + unset($parts['path']); + $valid_url = (string)Uri::fromParts($parts); + + self::setFailure($url); + if (!self::getID($valid_url, true)) { + self::detect($valid_url, $network, $only_nodeinfo); + } + return false; + } + Logger::debug('Found redirect, but ignore it.', ['old' => $url, 'new' => $valid_url]); + } + + if ((parse_url($url, PHP_URL_HOST) == parse_url($valid_url, PHP_URL_HOST)) && + (parse_url($url, PHP_URL_PATH) == parse_url($valid_url, PHP_URL_PATH)) && + (parse_url($url, PHP_URL_SCHEME) != parse_url($valid_url, PHP_URL_SCHEME))) { + $url = $valid_url; } $in_webroot = empty(parse_url($url, PHP_URL_PATH)); diff --git a/src/Util/Network.php b/src/Util/Network.php index 92b5778498..1a9f7cb98a 100644 --- a/src/Util/Network.php +++ b/src/Util/Network.php @@ -26,6 +26,7 @@ use Friendica\Core\Logger; use Friendica\DI; use Friendica\Model\Contact; use Friendica\Network\HTTPClient\Client\HttpClientAccept; +use Friendica\Network\HTTPClient\Client\HttpClientOptions; use Friendica\Network\HTTPException\NotModifiedException; use GuzzleHttp\Psr7\Uri; @@ -74,6 +75,22 @@ class Network return false; } + if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) { + $curlResult = DI::httpClient()->head($url, [HttpClientOptions::TIMEOUT => $xrd_timeout]); + + // Workaround for systems that can't handle a HEAD request. Don't retry on timeouts. + if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) { + $curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, [HttpClientOptions::TIMEOUT => $xrd_timeout]); + } + + if (!$curlResult->isSuccess()) { + Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]); + return false; + } elseif ($curlResult->isRedirectUrl()) { + $url = $curlResult->getRedirectUrl(); + } + } + // Check if the certificate is valid for this hostname if (parse_url($url, PHP_URL_SCHEME) == 'https') { $port = parse_url($url, PHP_URL_PORT) ?? 443; @@ -111,12 +128,6 @@ class Network return false; } } - if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) { - if (!ParseUrl::getContentType($url, HttpClientAccept::DEFAULT, $xrd_timeout)) { - Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]); - return false; - } - } return $url; }