Use the redirect value
This commit is contained in:
parent
dad05f1c76
commit
896fd3fc91
2 changed files with 66 additions and 49 deletions
|
@ -44,8 +44,8 @@ use Friendica\Util\JsonLD;
|
|||
use Friendica\Util\Network;
|
||||
use Friendica\Util\Strings;
|
||||
use Friendica\Util\XML;
|
||||
use GuzzleHttp\Exception\TransferException;
|
||||
use Friendica\Network\HTTPException;
|
||||
use GuzzleHttp\Psr7\Uri;
|
||||
|
||||
/**
|
||||
* This class handles GServer related functions
|
||||
|
@ -311,7 +311,7 @@ class GServer
|
|||
unset($urlparts['pass']);
|
||||
unset($urlparts['query']);
|
||||
unset($urlparts['fragment']);
|
||||
return Network::unparseURL($urlparts);
|
||||
return (string)Uri::fromParts($urlparts);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -337,50 +337,56 @@ class GServer
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!Network::isUrlValid($url)) {
|
||||
self::setFailure($url);
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the URL missmatches, then we mark the old entry as failure
|
||||
if (Strings::normaliseLink($url) != Strings::normaliseLink($original_url)) {
|
||||
if (!Strings::compareLink($url, $original_url)) {
|
||||
self::setFailure($original_url);
|
||||
if (!self::getID($url, true)) {
|
||||
self::detect($url, $network, $only_nodeinfo);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// On a redirect follow the new host but mark the old one as failure
|
||||
try {
|
||||
$finalurl = rtrim(DI::httpClient()->finalUrl($url), '/');
|
||||
} catch (TransferException $exception) {
|
||||
Logger::notice('Error fetching final URL.', ['url' => $url, 'exception' => $exception]);
|
||||
$valid_url = Network::isUrlValid($url);
|
||||
if (!$valid_url) {
|
||||
self::setFailure($url);
|
||||
return false;
|
||||
} else {
|
||||
$valid_url = rtrim($valid_url, '/');
|
||||
}
|
||||
|
||||
if (empty($finalurl)) {
|
||||
Logger::notice('Empty redirected URL.', ['url' => $url]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Strings::compareLink($url, $valid_url)) {
|
||||
// We only follow redirects when the path stays the same or the target url has no path.
|
||||
// Some systems have got redirects on their landing page to a single account page. This check handles it.
|
||||
if (((parse_url($url, PHP_URL_HOST) != parse_url($finalurl, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) == parse_url($finalurl, PHP_URL_PATH))) ||
|
||||
(((parse_url($url, PHP_URL_HOST) != parse_url($finalurl, PHP_URL_HOST)) || (parse_url($url, PHP_URL_PATH) != parse_url($finalurl, PHP_URL_PATH))) && empty(parse_url($finalurl, PHP_URL_PATH)))) {
|
||||
Logger::info('Found redirect. Mark old entry as failure', ['old' => $url, 'new' => $finalurl]);
|
||||
if (((parse_url($url, PHP_URL_HOST) != parse_url($valid_url, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) == parse_url($valid_url, PHP_URL_PATH))) ||
|
||||
(((parse_url($url, PHP_URL_HOST) != parse_url($valid_url, PHP_URL_HOST)) || (parse_url($url, PHP_URL_PATH) != parse_url($valid_url, PHP_URL_PATH))) && empty(parse_url($valid_url, PHP_URL_PATH)))) {
|
||||
Logger::debug('Found redirect. Mark old entry as failure', ['old' => $url, 'new' => $valid_url]);
|
||||
self::setFailure($url);
|
||||
self::detect($finalurl, $network, $only_nodeinfo);
|
||||
if (!self::getID($valid_url, true)) {
|
||||
self::detect($valid_url, $network, $only_nodeinfo);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((parse_url($url, PHP_URL_HOST) == parse_url($finalurl, PHP_URL_HOST)) &&
|
||||
(parse_url($url, PHP_URL_PATH) == parse_url($finalurl, PHP_URL_PATH)) &&
|
||||
(parse_url($url, PHP_URL_SCHEME) != parse_url($finalurl, PHP_URL_SCHEME))) {
|
||||
if (!Network::isUrlValid($finalurl)) {
|
||||
self::setFailure($finalurl);
|
||||
} else {
|
||||
$url = $finalurl;
|
||||
if ((parse_url($url, PHP_URL_HOST) != parse_url($valid_url, PHP_URL_HOST)) && (parse_url($url, PHP_URL_PATH) != parse_url($valid_url, PHP_URL_PATH)) &&
|
||||
(parse_url($url, PHP_URL_PATH) == '')) {
|
||||
Logger::debug('Found redirect. Mark old entry as failure and redirect to the basepath.', ['old' => $url, 'new' => $valid_url]);
|
||||
$parts = parse_url($valid_url);
|
||||
unset($parts['path']);
|
||||
$valid_url = (string)Uri::fromParts($parts);
|
||||
|
||||
self::setFailure($url);
|
||||
if (!self::getID($valid_url, true)) {
|
||||
self::detect($valid_url, $network, $only_nodeinfo);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
Logger::debug('Found redirect, but ignore it.', ['old' => $url, 'new' => $valid_url]);
|
||||
}
|
||||
|
||||
if ((parse_url($url, PHP_URL_HOST) == parse_url($valid_url, PHP_URL_HOST)) &&
|
||||
(parse_url($url, PHP_URL_PATH) == parse_url($valid_url, PHP_URL_PATH)) &&
|
||||
(parse_url($url, PHP_URL_SCHEME) != parse_url($valid_url, PHP_URL_SCHEME))) {
|
||||
$url = $valid_url;
|
||||
}
|
||||
|
||||
$in_webroot = empty(parse_url($url, PHP_URL_PATH));
|
||||
|
|
|
@ -26,6 +26,7 @@ use Friendica\Core\Logger;
|
|||
use Friendica\DI;
|
||||
use Friendica\Model\Contact;
|
||||
use Friendica\Network\HTTPClient\Client\HttpClientAccept;
|
||||
use Friendica\Network\HTTPClient\Client\HttpClientOptions;
|
||||
use Friendica\Network\HTTPException\NotModifiedException;
|
||||
use GuzzleHttp\Psr7\Uri;
|
||||
|
||||
|
@ -74,6 +75,22 @@ class Network
|
|||
return false;
|
||||
}
|
||||
|
||||
if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) {
|
||||
$curlResult = DI::httpClient()->head($url, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
|
||||
|
||||
// Workaround for systems that can't handle a HEAD request. Don't retry on timeouts.
|
||||
if (!$curlResult->isSuccess() && ($curlResult->getReturnCode() >= 400) && !in_array($curlResult->getReturnCode(), [408, 504])) {
|
||||
$curlResult = DI::httpClient()->get($url, HttpClientAccept::DEFAULT, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
|
||||
}
|
||||
|
||||
if (!$curlResult->isSuccess()) {
|
||||
Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]);
|
||||
return false;
|
||||
} elseif ($curlResult->isRedirectUrl()) {
|
||||
$url = $curlResult->getRedirectUrl();
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the certificate is valid for this hostname
|
||||
if (parse_url($url, PHP_URL_SCHEME) == 'https') {
|
||||
$port = parse_url($url, PHP_URL_PORT) ?? 443;
|
||||
|
@ -111,12 +128,6 @@ class Network
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if (in_array(parse_url($url, PHP_URL_SCHEME), ['https', 'http'])) {
|
||||
if (!ParseUrl::getContentType($url, HttpClientAccept::DEFAULT, $xrd_timeout)) {
|
||||
Logger::notice('Url not reachable', ['host' => $host, 'url' => $url]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue