diff --git a/src/Network/HTTPRequest.php b/src/Network/HTTPRequest.php index 1a0048b2a1..08ac203f5a 100644 --- a/src/Network/HTTPRequest.php +++ b/src/Network/HTTPRequest.php @@ -21,10 +21,13 @@ namespace Friendica\Network; +use DOMDocument; +use DomXPath; use Friendica\App; use Friendica\Core\Config\IConfig; use Friendica\Core\Logger; use Friendica\Core\System; +use Friendica\DI; use Friendica\Util\Network; use Friendica\Util\Profiler; use Psr\Log\LoggerInterface; @@ -323,6 +326,124 @@ class HTTPRequest return $curlResponse; } + /** + * Returns the original URL of the provided URL + * + * This function strips tracking query params and follows redirections, either + * through HTTP code or meta refresh tags. Stops after 10 redirections. + * + * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request + * + * @see ParseUrl::getSiteinfo + * + * @param string $url A user-submitted URL + * @param int $depth The current redirection recursion level (internal) + * @param bool $fetchbody Wether to fetch the body or not after the HEAD requests + * @return string A canonical URL + * @throws \Friendica\Network\HTTPException\InternalServerErrorException + */ + public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false) + { + $url = Network::stripTrackingQueryParams($url); + + if ($depth > 10) { + return $url; + } + + $url = trim($url, "'"); + + $stamp1 = microtime(true); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 1); + curl_setopt($ch, CURLOPT_NOBODY, 1); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent()); + + curl_exec($ch); + $curl_info = @curl_getinfo($ch); + $http_code = $curl_info['http_code']; + curl_close($ch); + + DI::profiler()->saveTimestamp($stamp1, "network", System::callstack()); + + if ($http_code == 0) { + return $url; + } + + if (in_array($http_code, ['301', '302'])) { + if (!empty($curl_info['redirect_url'])) { + return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody); + } elseif (!empty($curl_info['location'])) { + return self::finalUrl($curl_info['location'], ++$depth, $fetchbody); + } + } + + // Check for redirects in the meta elements of the body if there are no redirects in the header. + if (!$fetchbody) { + return self::finalUrl($url, ++$depth, true); + } + + // if the file is too large then exit + if ($curl_info["download_content_length"] > 1000000) { + return $url; + } + + // if it isn't a HTML file then exit + if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) { + return $url; + } + + $stamp1 = microtime(true); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_NOBODY, 0); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent()); + + $body = curl_exec($ch); + curl_close($ch); + + DI::profiler()->saveTimestamp($stamp1, "network", System::callstack()); + + if (trim($body) == "") { + return $url; + } + + // Check for redirect in meta elements + $doc = new DOMDocument(); + @$doc->loadHTML($body); + + $xpath = new DomXPath($doc); + + $list = $xpath->query("//meta[@content]"); + foreach ($list as $node) { + $attr = []; + if ($node->attributes->length) { + foreach ($node->attributes as $attribute) { + $attr[$attribute->name] = $attribute->value; + } + } + + if (@$attr["http-equiv"] == 'refresh') { + $path = $attr["content"]; + $pathinfo = explode(";", $path); + foreach ($pathinfo as $value) { + if (substr(strtolower($value), 0, 4) == "url=") { + return self::finalUrl(substr($value, 4), ++$depth); + } + } + } + } + + return $url; + } + /** * Curl wrapper * diff --git a/src/Protocol/Feed.php b/src/Protocol/Feed.php index a665b7c85a..a609ae2963 100644 --- a/src/Protocol/Feed.php +++ b/src/Protocol/Feed.php @@ -35,6 +35,7 @@ use Friendica\Model\Contact; use Friendica\Model\Item; use Friendica\Model\Tag; use Friendica\Model\User; +use Friendica\Network\HTTPRequest; use Friendica\Util\DateTimeFormat; use Friendica\Util\Network; use Friendica\Util\ParseUrl; @@ -350,7 +351,7 @@ class Feed $orig_plink = $item["plink"]; - $item["plink"] = Network::finalUrl($item["plink"]); + $item["plink"] = HTTPRequest::finalUrl($item["plink"]); $item["parent-uri"] = $item["uri"]; diff --git a/src/Util/Network.php b/src/Util/Network.php index a8b216b34b..7795b0cd29 100644 --- a/src/Util/Network.php +++ b/src/Util/Network.php @@ -21,11 +21,8 @@ namespace Friendica\Util; -use DOMDocument; -use DomXPath; use Friendica\Core\Hook; use Friendica\Core\Logger; -use Friendica\Core\System; use Friendica\DI; class Network @@ -314,126 +311,6 @@ class Network return self::unparseURL($parts); } - /** - * Returns the original URL of the provided URL - * - * This function strips tracking query params and follows redirections, either - * through HTTP code or meta refresh tags. Stops after 10 redirections. - * - * @todo Remove the $fetchbody parameter that generates an extraneous HEAD request - * - * @see ParseUrl::getSiteinfo - * - * @param string $url A user-submitted URL - * @param int $depth The current redirection recursion level (internal) - * @param bool $fetchbody Wether to fetch the body or not after the HEAD requests - * @return string A canonical URL - * @throws \Friendica\Network\HTTPException\InternalServerErrorException - */ - public static function finalUrl(string $url, int $depth = 1, bool $fetchbody = false) - { - $a = DI::app(); - - $url = self::stripTrackingQueryParams($url); - - if ($depth > 10) { - return $url; - } - - $url = trim($url, "'"); - - $stamp1 = microtime(true); - - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 1); - curl_setopt($ch, CURLOPT_NOBODY, 1); - curl_setopt($ch, CURLOPT_TIMEOUT, 10); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent()); - - curl_exec($ch); - $curl_info = @curl_getinfo($ch); - $http_code = $curl_info['http_code']; - curl_close($ch); - - DI::profiler()->saveTimestamp($stamp1, "network", System::callstack()); - - if ($http_code == 0) { - return $url; - } - - if (in_array($http_code, ['301', '302'])) { - if (!empty($curl_info['redirect_url'])) { - return self::finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody); - } elseif (!empty($curl_info['location'])) { - return self::finalUrl($curl_info['location'], ++$depth, $fetchbody); - } - } - - // Check for redirects in the meta elements of the body if there are no redirects in the header. - if (!$fetchbody) { - return(self::finalUrl($url, ++$depth, true)); - } - - // if the file is too large then exit - if ($curl_info["download_content_length"] > 1000000) { - return $url; - } - - // if it isn't a HTML file then exit - if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) { - return $url; - } - - $stamp1 = microtime(true); - - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 0); - curl_setopt($ch, CURLOPT_NOBODY, 0); - curl_setopt($ch, CURLOPT_TIMEOUT, 10); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_USERAGENT, DI::httpRequest()->getUserAgent()); - - $body = curl_exec($ch); - curl_close($ch); - - DI::profiler()->saveTimestamp($stamp1, "network", System::callstack()); - - if (trim($body) == "") { - return $url; - } - - // Check for redirect in meta elements - $doc = new DOMDocument(); - @$doc->loadHTML($body); - - $xpath = new DomXPath($doc); - - $list = $xpath->query("//meta[@content]"); - foreach ($list as $node) { - $attr = []; - if ($node->attributes->length) { - foreach ($node->attributes as $attribute) { - $attr[$attribute->name] = $attribute->value; - } - } - - if (@$attr["http-equiv"] == 'refresh') { - $path = $attr["content"]; - $pathinfo = explode(";", $path); - foreach ($pathinfo as $value) { - if (substr(strtolower($value), 0, 4) == "url=") { - return self::finalUrl(substr($value, 4), ++$depth); - } - } - } - } - - return $url; - } - /** * Find the matching part between two url *