Merge pull request #1280 from annando/1501-too-large-file-in-redirection

Memory problems because of too large files
2015-01-04 20:56:49 +01:00 · 2015-01-04 20:56:49 +01:00 · a774d72d97
parent d9f28b9b1c 47129c01f8
commit a774d72d97
2 changed files with 48 additions and 14 deletions
--- a/include/network.php
+++ b/include/network.php
@ -1159,13 +1159,8 @@ function original_url($url, $depth=1, $fetchbody = false) {
        $siteinfo = array();
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
-        curl_setopt($ch, CURLOPT_HEADER, 1);
-
-        if ($fetchbody)
-                curl_setopt($ch, CURLOPT_NOBODY, 0);
-        else
-                curl_setopt($ch, CURLOPT_NOBODY, 1);
-
+	curl_setopt($ch, CURLOPT_HEADER, 1);
+	curl_setopt($ch, CURLOPT_NOBODY, 1);
        curl_setopt($ch, CURLOPT_TIMEOUT, 10);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 	curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
@ -1183,16 +1178,33 @@ function original_url($url, $depth=1, $fetchbody = false) {
                        return(original_url($curl_info['location'], ++$depth, $fetchbody));
        }

-        $pos = strpos($header, "\r\n\r\n");
+	// Check for redirects in the meta elements of the body if there are no redirects in the header.
+	if (!$fetchbody)
+		return(original_url($url, ++$depth, true));

-        if ($pos)
-                $body = trim(substr($header, $pos));
-        else
-                $body = $header;
+	// if the file is too large then exit
+	if ($curl_info["download_content_length"] > 1000000)
+		return($url);
+
+	// if it isn't a HTML file then exit
+	if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html"))
+		return($url);
+
+	$ch = curl_init();
+	curl_setopt($ch, CURLOPT_URL, $url);
+	curl_setopt($ch, CURLOPT_HEADER, 0);
+	curl_setopt($ch, CURLOPT_NOBODY, 0);
+	curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+	curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
+
+	$body = curl_exec($ch);
+	curl_close($ch);

        if (trim($body) == "")
-                return(original_url($url, ++$depth, true));
+		return($url);

+	// Check for redirect in meta elements
        $doc = new DOMDocument();
        @$doc->loadHTML($body);

--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@ -73,7 +73,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
 	$ch = curl_init();
 	curl_setopt($ch, CURLOPT_URL, $url);
 	curl_setopt($ch, CURLOPT_HEADER, 1);
-	curl_setopt($ch, CURLOPT_NOBODY, 0);
+	curl_setopt($ch, CURLOPT_NOBODY, 1);
 	curl_setopt($ch, CURLOPT_TIMEOUT, 3);
 	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 	//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
@ -93,6 +93,28 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
 		return($siteinfo);
 	}

+	// if the file is too large then exit
+	if ($curl_info["download_content_length"] > 1000000)
+		return($siteinfo);
+
+	// if it isn't a HTML file then exit
+	if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html"))
+		return($siteinfo);
+
+	// Now fetch the body as well
+	$ch = curl_init();
+	curl_setopt($ch, CURLOPT_URL, $url);
+	curl_setopt($ch, CURLOPT_HEADER, 1);
+	curl_setopt($ch, CURLOPT_NOBODY, 0);
+	curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+	curl_setopt($ch, CURLOPT_USERAGENT, $a->get_useragent());
+
+	$header = curl_exec($ch);
+	$curl_info = @curl_getinfo($ch);
+        $http_code = $curl_info['http_code'];
+	curl_close($ch);
+
 	if ($do_oembed) {
 		require_once("include/oembed.php");