diff --git a/include/network.php b/include/network.php index 6a37f4a549..190dce326e 100644 --- a/include/network.php +++ b/include/network.php @@ -1159,13 +1159,8 @@ function original_url($url, $depth=1, $fetchbody = false) { $siteinfo = array(); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 1); - - if ($fetchbody) - curl_setopt($ch, CURLOPT_NOBODY, 0); - else - curl_setopt($ch, CURLOPT_NOBODY, 1); - + curl_setopt($ch, CURLOPT_HEADER, 1); + curl_setopt($ch, CURLOPT_NOBODY, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); @@ -1183,16 +1178,33 @@ function original_url($url, $depth=1, $fetchbody = false) { return(original_url($curl_info['location'], ++$depth, $fetchbody)); } - $pos = strpos($header, "\r\n\r\n"); + // Check for redirects in the meta elements of the body if there are no redirects in the header. + if (!$fetchbody) + return(original_url($url, ++$depth, true)); - if ($pos) - $body = trim(substr($header, $pos)); - else - $body = $header; + // if the file is too large then exit + if ($curl_info["download_content_length"] > 1000000) + return($url); + + // if it isn't a HTML file then exit + if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html")) + return($url); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_NOBODY, 0); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + + $body = curl_exec($ch); + curl_close($ch); if (trim($body) == "") - return(original_url($url, ++$depth, true)); + return($url); + // Check for redirect in meta elements $doc = new DOMDocument(); @$doc->loadHTML($body); diff --git a/mod/parse_url.php b/mod/parse_url.php index 06e59b8691..9df35c7612 100644 --- a/mod/parse_url.php +++ b/mod/parse_url.php @@ -73,7 +73,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 1); - curl_setopt($ch, CURLOPT_NOBODY, 0); + curl_setopt($ch, CURLOPT_NOBODY, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 3); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); @@ -93,6 +93,28 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co return($siteinfo); } + // if the file is too large then exit + if ($curl_info["download_content_length"] > 1000000) + return($siteinfo); + + // if it isn't a HTML file then exit + if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html")) + return($siteinfo); + + // Now fetch the body as well + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 1); + curl_setopt($ch, CURLOPT_NOBODY, 0); + curl_setopt($ch, CURLOPT_TIMEOUT, 10); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent()); + + $header = curl_exec($ch); + $curl_info = @curl_getinfo($ch); + $http_code = $curl_info['http_code']; + curl_close($ch); + if ($do_oembed) { require_once("include/oembed.php");