Merge pull request #10137 from annando/empty-body

Avoid HTML parsing of an empty body
2021-04-10 08:10:23 +02:00 · 2021-04-10 08:10:23 +02:00 · 43c6563123
commit 43c6563123
parent 3e2fa7867c 5a00902e11
7 changed files with 26 additions and 9 deletions
--- a/src/Content/OEmbed.php
+++ b/src/Content/OEmbed.php
@ -98,7 +98,7 @@ class OEmbed
 			if (!in_array($ext, $noexts)) {
 				// try oembed autodiscovery
 				$html_text = DI::httpRequest()->fetch($embedurl, 15, 'text/*');
-				if ($html_text) {
+				if (!empty($html_text)) {
 					$dom = new DOMDocument();
 					if (@$dom->loadHTML($html_text)) {
 						$xpath = new DOMXPath($dom);
--- a/src/Content/Text/BBCode.php
+++ b/src/Content/Text/BBCode.php
@ -1178,6 +1178,10 @@ class BBCode

 				// if its not a picture then look if its a page that contains a picture link
 				$body = DI::httpRequest()->fetch($match[1]);
+				if (empty($body)) {
+					DI::cache()->set($cache_key, $text);
+					return $text;
+				}
 		
 				$doc = new DOMDocument();
 				@$doc->loadHTML($body);
@ -1214,8 +1218,6 @@ class BBCode

 	private static function cleanPictureLinksCallback($match)
 	{
-		$a = DI::app();
-
 		// When the picture link is the own photo path then we can avoid fetching the link
 		$own_photo_url = preg_quote(Strings::normaliseLink(DI::baseUrl()->get()) . '/photos/');
 		if (preg_match('|' . $own_photo_url . '.*?/image/|', Strings::normaliseLink($match[1]))) {
@ -1257,6 +1259,10 @@ class BBCode

 			// if its not a picture then look if its a page that contains a picture link
 			$body = DI::httpRequest()->fetch($match[1]);
+			if (empty($body)) {
+				DI::cache()->set($cache_key, $text);
+				return $text;
+			}

 			$doc = new DOMDocument();
 			@$doc->loadHTML($body);
--- a/src/Content/Text/HTML.php
+++ b/src/Content/Text/HTML.php
@ -605,6 +605,10 @@ class HTML
 		// Collecting all links
 		$urls = self::collectURLs($message);

+		if (empty($message)) {
+			return '';
+		}
+
 		@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);

 		self::tagToBBCode($doc, 'html', [], '', '');
--- a/src/Model/GServer.php
+++ b/src/Model/GServer.php
@ -1475,6 +1475,10 @@ class GServer
 	 */
 	private static function analyseRootBody($curlResult, array $serverdata, string $url)
 	{
+		if (empty($curlResult->getBody())) {
+			return $serverdata;
+		}
+
 		$doc = new DOMDocument();
 		@$doc->loadHTML($curlResult->getBody());
 		$xpath = new DOMXPath($doc);
--- a/src/Network/Probe.php
+++ b/src/Network/Probe.php
@ -439,6 +439,9 @@ class Probe
 		}

 		$body = $curlResult->getBody();
+		if (empty($body)) {
+			return false;
+		}

 		$doc = new DOMDocument();
 		@$doc->loadHTML($body);
@ -1274,7 +1277,7 @@ class Probe
 			return [];
 		}
 		$content = $curlResult->getBody();
-		if (!$content) {
+		if (empty($content)) {
 			return [];
 		}

@ -1610,7 +1613,7 @@ class Probe
 	private static function pumpioProfileData($profile_link)
 	{
 		$curlResult = DI::httpRequest()->get($profile_link);
-		if (!$curlResult->isSuccess()) {
+		if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
 			return [];
 		}

--- a/src/Protocol/OStatus.php
+++ b/src/Protocol/OStatus.php
@ -735,7 +735,7 @@ class OStatus

 		$curlResult = DI::httpRequest()->get($conversation, ['accept_content' => 'application/atom+xml, text/html']);

-		if (!$curlResult->isSuccess()) {
+		if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
 			return;
 		}

@ -928,7 +928,7 @@ class OStatus
 		$stored = false;
 		$curlResult = DI::httpRequest()->get($related, ['accept_content' => 'application/atom+xml, text/html']);

-		if (!$curlResult->isSuccess()) {
+		if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
 			return;
 		}

--- a/src/Util/ParseUrl.php
+++ b/src/Util/ParseUrl.php
@ -214,7 +214,7 @@ class ParseUrl
 		}

 		$curlResult = DI::httpRequest()->get($url);
-		if (!$curlResult->isSuccess()) {
+		if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
 			return $siteinfo;
 		}