Move "Network::finalUrl" to "HTTPRequest" class

This commit is contained in:
nupplaPhil 2020-03-04 22:30:24 +01:00 committed by Hypolite Petovan
parent 57587efe58
commit 60e18736b0
3 changed files with 123 additions and 124 deletions

View file

@ -21,10 +21,13 @@
namespace Friendica\Network; namespace Friendica\Network;
use DOMDocument;
use DomXPath;
use Friendica\App; use Friendica\App;
use Friendica\Core\Config\IConfig; use Friendica\Core\Config\IConfig;
use Friendica\Core\Logger; use Friendica\Core\Logger;
use Friendica\Core\System; use Friendica\Core\System;
use Friendica\DI;
use Friendica\Util\Network; use Friendica\Util\Network;
use Friendica\Util\Profiler; use Friendica\Util\Profiler;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
@ -323,6 +326,124 @@ class HTTPRequest
return $curlResponse; return $curlResponse;
} }
/**
* Returns the original URL of the provided URL
*
* This function strips tracking query params and follows redirections, either
* through HTTP code or meta refresh tags. Stops after 10 redirections.
*
* @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
*
* @see ParseUrl::getSiteinfo
*
* @param string $url A user-submitted URL
* @param int $depth The current redirection recursion level (internal)
* @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
* @return string A canonical URL
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
{
$url = Network::stripTrackingQueryParams($url);
if ($depth > 10) {
return $url;
}
$url = trim($url, "'");
$stamp1 = microtime(true);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info['http_code'];
curl_close($ch);
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
if ($http_code == 0) {
return $url;
}
if (in_array($http_code, ['301', '302'])) {
if (!empty($curl_info['redirect_url'])) {
return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
} elseif (!empty($curl_info['location'])) {
return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
}
}
// Check for redirects in the meta elements of the body if there are no redirects in the header.
if (!$fetchbody) {
return self::finalUrl($url, ++$depth, true);
}
// if the file is too large then exit
if ($curl_info["download_content_length"] > 1000000) {
return $url;
}
// if it isn't a HTML file then exit
if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
return $url;
}
$stamp1 = microtime(true);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_NOBODY, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
$body = curl_exec($ch);
curl_close($ch);
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
if (trim($body) == "") {
return $url;
}
// Check for redirect in meta elements
$doc = new DOMDocument();
@$doc->loadHTML($body);
$xpath = new DomXPath($doc);
$list = $xpath->query("//meta[@content]");
foreach ($list as $node) {
$attr = [];
if ($node->attributes->length) {
foreach ($node->attributes as $attribute) {
$attr[$attribute->name] = $attribute->value;
}
}
if (@$attr["http-equiv"] == 'refresh') {
$path = $attr["content"];
$pathinfo = explode(";", $path);
foreach ($pathinfo as $value) {
if (substr(strtolower($value), 0, 4) == "url=") {
return self::finalUrl(substr($value, 4), ++$depth);
}
}
}
}
return $url;
}
/** /**
* Curl wrapper * Curl wrapper
* *

View file

@ -35,6 +35,7 @@ use Friendica\Model\Contact;
use Friendica\Model\Item; use Friendica\Model\Item;
use Friendica\Model\Tag; use Friendica\Model\Tag;
use Friendica\Model\User; use Friendica\Model\User;
use Friendica\Network\HTTPRequest;
use Friendica\Util\DateTimeFormat; use Friendica\Util\DateTimeFormat;
use Friendica\Util\Network; use Friendica\Util\Network;
use Friendica\Util\ParseUrl; use Friendica\Util\ParseUrl;
@ -350,7 +351,7 @@ class Feed
$orig_plink = $item["plink"]; $orig_plink = $item["plink"];
$item["plink"] = Network::finalUrl($item["plink"]); $item["plink"] = HTTPRequest::finalUrl($item["plink"]);
$item["parent-uri"] = $item["uri"]; $item["parent-uri"] = $item["uri"];

View file

@ -21,11 +21,8 @@
namespace Friendica\Util; namespace Friendica\Util;
use DOMDocument;
use DomXPath;
use Friendica\Core\Hook; use Friendica\Core\Hook;
use Friendica\Core\Logger; use Friendica\Core\Logger;
use Friendica\Core\System;
use Friendica\DI; use Friendica\DI;
class Network class Network
@ -314,126 +311,6 @@ class Network
return self::unparseURL($parts); return self::unparseURL($parts);
} }
/**
* Returns the original URL of the provided URL
*
* This function strips tracking query params and follows redirections, either
* through HTTP code or meta refresh tags. Stops after 10 redirections.
*
* @todo Remove the $fetchbody parameter that generates an extraneous HEAD request
*
* @see ParseUrl::getSiteinfo
*
* @param string $url A user-submitted URL
* @param int $depth The current redirection recursion level (internal)
* @param bool $fetchbody Wether to fetch the body or not after the HEAD requests
* @return string A canonical URL
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
{
$a = DI::app();
$url = self::stripTrackingQueryParams($url);
if ($depth > 10) {
return $url;
}
$url = trim($url, "'");
$stamp1 = microtime(true);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info['http_code'];
curl_close($ch);
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
if ($http_code == 0) {
return $url;
}
if (in_array($http_code, ['301', '302'])) {
if (!empty($curl_info['redirect_url'])) {
return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
} elseif (!empty($curl_info['location'])) {
return self::finalUrl($curl_info['location'], ++$depth, $fetchbody);
}
}
// Check for redirects in the meta elements of the body if there are no redirects in the header.
if (!$fetchbody) {
return(self::finalUrl($url, ++$depth, true));
}
// if the file is too large then exit
if ($curl_info["download_content_length"] > 1000000) {
return $url;
}
// if it isn't a HTML file then exit
if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
return $url;
}
$stamp1 = microtime(true);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_NOBODY, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent());
$body = curl_exec($ch);
curl_close($ch);
DI::profiler()->saveTimestamp($stamp1, "network", System::callstack());
if (trim($body) == "") {
return $url;
}
// Check for redirect in meta elements
$doc = new DOMDocument();
@$doc->loadHTML($body);
$xpath = new DomXPath($doc);
$list = $xpath->query("//meta[@content]");
foreach ($list as $node) {
$attr = [];
if ($node->attributes->length) {
foreach ($node->attributes as $attribute) {
$attr[$attribute->name] = $attribute->value;
}
}
if (@$attr["http-equiv"] == 'refresh') {
$path = $attr["content"];
$pathinfo = explode(";", $path);
foreach ($pathinfo as $value) {
if (substr(strtolower($value), 0, 4) == "url=") {
return self::finalUrl(substr($value, 4), ++$depth);
}
}
}
}
return $url;
}
/** /**
* Find the matching part between two url * Find the matching part between two url
* *