From a64948b7f76fc7b23f05d12fd23809ed6d45ff79 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Fri, 4 Apr 2014 10:55:30 +0200 Subject: [PATCH] New function "original_url" (moved from an addon) that follows all redirects to the original url. --- include/network.php | 70 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/include/network.php b/include/network.php index 4bc50974b0..b85775ba2b 100644 --- a/include/network.php +++ b/include/network.php @@ -189,7 +189,7 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) $base = substr($base,strlen($chunk)); } - if($http_code == 301 || $http_code == 302 || $http_code == 303) { + if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { $matches = array(); preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); $newurl = trim(array_pop($matches)); @@ -1105,3 +1105,71 @@ function xml2array($contents, $namespaces = true, $get_attributes=1, $priority = return($xml_array); } + +function original_url($url, $depth=1, $fetchbody = false) { + if ($depth > 10) + return($url); + + $url = trim($url, "'"); + + $siteinfo = array(); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 1); + + if ($fetchbody) + curl_setopt($ch, CURLOPT_NOBODY, 0); + else + curl_setopt($ch, CURLOPT_NOBODY, 1); + + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0'); + + $header = curl_exec($ch); + $curl_info = @curl_getinfo($ch); + $http_code = $curl_info['http_code']; + curl_close($ch); + + if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302")) + AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) { + if ($curl_info['redirect_url'] != "") + return(original_url($curl_info['redirect_url'], ++$depth, $fetchbody)); + else + return(original_url($curl_info['location'], ++$depth, $fetchbody)); + } + + $pos = strpos($header, "\r\n\r\n"); + + if ($pos) + $body = trim(substr($header, $pos)); + else + $body = $header; + + if (trim($body) == "") + return(original_url($url, ++$depth, true)); + + $doc = new DOMDocument(); + @$doc->loadHTML($body); + + $xpath = new DomXPath($doc); + + $list = $xpath->query("//meta[@content]"); + foreach ($list as $node) { + $attr = array(); + if ($node->attributes->length) + foreach ($node->attributes as $attribute) + $attr[$attribute->name] = $attribute->value; + + if (@$attr["http-equiv"] == 'refresh') { + $path = $attr["content"]; + $pathinfo = explode(";", $path); + $content = ""; + foreach ($pathinfo AS $value) + if (substr(strtolower($value), 0, 4) == "url=") + return(original_url(substr($value, 4), ++$depth)); + } + } + + return($url); +}