Use mattwright/urlresolver for HTTPClient::finalUrl()
This commit is contained in:
parent
e576af218b
commit
8385ee7a61
|
@ -69,7 +69,8 @@
|
||||||
"npm-asset/perfect-scrollbar": "0.6.16",
|
"npm-asset/perfect-scrollbar": "0.6.16",
|
||||||
"npm-asset/textcomplete": "^0.18.2",
|
"npm-asset/textcomplete": "^0.18.2",
|
||||||
"npm-asset/typeahead.js": "^0.11.1",
|
"npm-asset/typeahead.js": "^0.11.1",
|
||||||
"minishlink/web-push": "^6.0"
|
"minishlink/web-push": "^6.0",
|
||||||
|
"mattwright/urlresolver": "^2.0"
|
||||||
},
|
},
|
||||||
"repositories": [
|
"repositories": [
|
||||||
{
|
{
|
||||||
|
|
48
composer.lock
generated
48
composer.lock
generated
|
@ -4,7 +4,7 @@
|
||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "7d6dee6e449da931e8fe209e61b2e78e",
|
"content-hash": "c9e0a9eacc23d884012042eeab01cc8b",
|
||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "asika/simple-console",
|
"name": "asika/simple-console",
|
||||||
|
@ -1133,6 +1133,52 @@
|
||||||
],
|
],
|
||||||
"time": "2017-07-19T15:11:19+00:00"
|
"time": "2017-07-19T15:11:19+00:00"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "mattwright/urlresolver",
|
||||||
|
"version": "2.0",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/mattwright/URLResolver.php.git",
|
||||||
|
"reference": "416039192cb6d9158bdacd68349bceff8739b857"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/mattwright/URLResolver.php/zipball/416039192cb6d9158bdacd68349bceff8739b857",
|
||||||
|
"reference": "416039192cb6d9158bdacd68349bceff8739b857",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"ext-curl": "*",
|
||||||
|
"ext-mbstring": "*",
|
||||||
|
"php": ">=5.3"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"mattwright\\": "."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Matt Wright",
|
||||||
|
"email": "mw@mattwright.com"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "PHP class that attempts to resolve URLs to a final, canonical link.",
|
||||||
|
"homepage": "https://github.com/mattwright/URLResolver.php",
|
||||||
|
"keywords": [
|
||||||
|
"canonical",
|
||||||
|
"link",
|
||||||
|
"redirect",
|
||||||
|
"resolve",
|
||||||
|
"url"
|
||||||
|
],
|
||||||
|
"time": "2019-01-18T00:59:34+00:00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "michelf/php-markdown",
|
"name": "michelf/php-markdown",
|
||||||
"version": "1.9.0",
|
"version": "1.9.0",
|
||||||
|
|
|
@ -10,6 +10,7 @@ use Friendica\Network\IHTTPClient;
|
||||||
use Friendica\Util\Profiler;
|
use Friendica\Util\Profiler;
|
||||||
use GuzzleHttp\Client;
|
use GuzzleHttp\Client;
|
||||||
use GuzzleHttp\RequestOptions;
|
use GuzzleHttp\RequestOptions;
|
||||||
|
use mattwright\URLResolver;
|
||||||
use Psr\Http\Message\RequestInterface;
|
use Psr\Http\Message\RequestInterface;
|
||||||
use Psr\Http\Message\ResponseInterface;
|
use Psr\Http\Message\ResponseInterface;
|
||||||
use Psr\Http\Message\UriInterface;
|
use Psr\Http\Message\UriInterface;
|
||||||
|
@ -85,6 +86,13 @@ class HTTPClientFactory extends BaseFactory
|
||||||
],
|
],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return new HTTPClient($logger, $this->profiler, $this->config, $userAgent, $guzzle);
|
$resolver = new URLResolver();
|
||||||
|
$resolver->setUserAgent($userAgent);
|
||||||
|
$resolver->setMaxRedirects(10);
|
||||||
|
$resolver->setRequestTimeout(10);
|
||||||
|
// if the file is too large then exit
|
||||||
|
$resolver->setMaxResponseDataSize(1000000);
|
||||||
|
|
||||||
|
return new HTTPClient($logger, $this->profiler, $guzzle, $resolver);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,6 @@
|
||||||
|
|
||||||
namespace Friendica\Network;
|
namespace Friendica\Network;
|
||||||
|
|
||||||
use DOMDocument;
|
|
||||||
use DomXPath;
|
|
||||||
use Friendica\Core\Config\IConfig;
|
|
||||||
use Friendica\Core\System;
|
use Friendica\Core\System;
|
||||||
use Friendica\Util\Network;
|
use Friendica\Util\Network;
|
||||||
use Friendica\Util\Profiler;
|
use Friendica\Util\Profiler;
|
||||||
|
@ -32,6 +29,7 @@ use GuzzleHttp\Cookie\FileCookieJar;
|
||||||
use GuzzleHttp\Exception\RequestException;
|
use GuzzleHttp\Exception\RequestException;
|
||||||
use GuzzleHttp\Exception\TransferException;
|
use GuzzleHttp\Exception\TransferException;
|
||||||
use GuzzleHttp\RequestOptions;
|
use GuzzleHttp\RequestOptions;
|
||||||
|
use mattwright\URLResolver;
|
||||||
use Psr\Http\Message\ResponseInterface;
|
use Psr\Http\Message\ResponseInterface;
|
||||||
use Psr\Log\LoggerInterface;
|
use Psr\Log\LoggerInterface;
|
||||||
|
|
||||||
|
@ -44,20 +42,17 @@ class HTTPClient implements IHTTPClient
|
||||||
private $logger;
|
private $logger;
|
||||||
/** @var Profiler */
|
/** @var Profiler */
|
||||||
private $profiler;
|
private $profiler;
|
||||||
/** @var IConfig */
|
|
||||||
private $config;
|
|
||||||
/** @var string */
|
|
||||||
private $userAgent;
|
|
||||||
/** @var Client */
|
/** @var Client */
|
||||||
private $client;
|
private $client;
|
||||||
|
/** @var URLResolver */
|
||||||
|
private $resolver;
|
||||||
|
|
||||||
public function __construct(LoggerInterface $logger, Profiler $profiler, IConfig $config, string $userAgent, Client $client)
|
public function __construct(LoggerInterface $logger, Profiler $profiler, Client $client, URLResolver $resolver)
|
||||||
{
|
{
|
||||||
$this->logger = $logger;
|
$this->logger = $logger;
|
||||||
$this->profiler = $profiler;
|
$this->profiler = $profiler;
|
||||||
$this->config = $config;
|
$this->client = $client;
|
||||||
$this->userAgent = $userAgent;
|
$this->resolver = $resolver;
|
||||||
$this->client = $client;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,6 +92,11 @@ class HTTPClient implements IHTTPClient
|
||||||
return CurlResult::createErrorCurl($url);
|
return CurlResult::createErrorCurl($url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Network::isRedirectBlocked($url)) {
|
||||||
|
$this->logger->info('Domain should not be redirected.', ['url' => $url]);
|
||||||
|
return CurlResult::createErrorCurl($url);
|
||||||
|
}
|
||||||
|
|
||||||
$conf = [];
|
$conf = [];
|
||||||
|
|
||||||
if (!empty($opts['cookiejar'])) {
|
if (!empty($opts['cookiejar'])) {
|
||||||
|
@ -197,10 +197,12 @@ class HTTPClient implements IHTTPClient
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
|
public function finalUrl(string $url)
|
||||||
{
|
{
|
||||||
|
$this->profiler->startRecording('network');
|
||||||
|
|
||||||
if (Network::isLocalLink($url)) {
|
if (Network::isLocalLink($url)) {
|
||||||
$this->logger->info('Local link', ['url' => $url, 'callstack' => System::callstack(20)]);
|
$this->logger->debug('Local link', ['url' => $url, 'callstack' => System::callstack(20)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Network::isUrlBlocked($url)) {
|
if (Network::isUrlBlocked($url)) {
|
||||||
|
@ -215,104 +217,19 @@ class HTTPClient implements IHTTPClient
|
||||||
|
|
||||||
$url = Network::stripTrackingQueryParams($url);
|
$url = Network::stripTrackingQueryParams($url);
|
||||||
|
|
||||||
if ($depth > 10) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
$url = trim($url, "'");
|
$url = trim($url, "'");
|
||||||
|
|
||||||
$this->profiler->startRecording('network');
|
// Designate a temporary file that will store cookies during the session.
|
||||||
|
// Some websites test the browser for cookie support, so this enhances results.
|
||||||
|
$this->resolver->setCookieJar(tempnam(get_temppath() , 'url_resolver-'));
|
||||||
|
|
||||||
$ch = curl_init();
|
$urlResult = $this->resolver->resolveURL($url);
|
||||||
curl_setopt($ch, CURLOPT_URL, $url);
|
|
||||||
curl_setopt($ch, CURLOPT_HEADER, 1);
|
|
||||||
curl_setopt($ch, CURLOPT_NOBODY, 1);
|
|
||||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
|
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
|
|
||||||
|
|
||||||
curl_exec($ch);
|
if ($urlResult->didErrorOccur()) {
|
||||||
$curl_info = @curl_getinfo($ch);
|
throw new TransferException($urlResult->getErrorMessageString());
|
||||||
$http_code = $curl_info['http_code'];
|
|
||||||
curl_close($ch);
|
|
||||||
|
|
||||||
$this->profiler->stopRecording();
|
|
||||||
|
|
||||||
if ($http_code == 0) {
|
|
||||||
return $url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in_array($http_code, ['301', '302'])) {
|
return $urlResult->getURL();
|
||||||
if (!empty($curl_info['redirect_url'])) {
|
|
||||||
return $this->finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
|
|
||||||
} elseif (!empty($curl_info['location'])) {
|
|
||||||
return $this->finalUrl($curl_info['location'], ++$depth, $fetchbody);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for redirects in the meta elements of the body if there are no redirects in the header.
|
|
||||||
if (!$fetchbody) {
|
|
||||||
return $this->finalUrl($url, ++$depth, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// if the file is too large then exit
|
|
||||||
if ($curl_info["download_content_length"] > 1000000) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if it isn't a HTML file then exit
|
|
||||||
if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->profiler->startRecording('network');
|
|
||||||
|
|
||||||
$ch = curl_init();
|
|
||||||
curl_setopt($ch, CURLOPT_URL, $url);
|
|
||||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
|
||||||
curl_setopt($ch, CURLOPT_NOBODY, 0);
|
|
||||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
|
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent);
|
|
||||||
|
|
||||||
$body = curl_exec($ch);
|
|
||||||
curl_close($ch);
|
|
||||||
|
|
||||||
$this->profiler->stopRecording();
|
|
||||||
|
|
||||||
if (trim($body) == "") {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for redirect in meta elements
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
@$doc->loadHTML($body);
|
|
||||||
|
|
||||||
$xpath = new DomXPath($doc);
|
|
||||||
|
|
||||||
$list = $xpath->query("//meta[@content]");
|
|
||||||
foreach ($list as $node) {
|
|
||||||
$attr = [];
|
|
||||||
if ($node->attributes->length) {
|
|
||||||
foreach ($node->attributes as $attribute) {
|
|
||||||
$attr[$attribute->name] = $attribute->value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (@$attr["http-equiv"] == 'refresh') {
|
|
||||||
$path = $attr["content"];
|
|
||||||
$pathinfo = explode(";", $path);
|
|
||||||
foreach ($pathinfo as $value) {
|
|
||||||
if (substr(strtolower($value), 0, 4) == "url=") {
|
|
||||||
return $this->finalUrl(substr($value, 4), ++$depth);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $url;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -104,14 +104,10 @@ interface IHTTPClient
|
||||||
* through HTTP code or meta refresh tags. Stops after 10 redirections.
|
* through HTTP code or meta refresh tags. Stops after 10 redirections.
|
||||||
*
|
*
|
||||||
* @param string $url A user-submitted URL
|
* @param string $url A user-submitted URL
|
||||||
* @param int $depth The current redirection recursion level (internal)
|
|
||||||
* @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
|
|
||||||
*
|
*
|
||||||
* @return string A canonical URL
|
* @return string A canonical URL
|
||||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||||
* @see ParseUrl::getSiteinfo
|
* @see ParseUrl::getSiteinfo
|
||||||
*
|
|
||||||
* @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
|
|
||||||
*/
|
*/
|
||||||
public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false);
|
public function finalUrl(string $url);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue