1
1
Fork 0

Avoid HTML parsing of an empty body

This commit is contained in:
Michael 2021-04-10 05:46:19 +00:00
commit 5a00902e11
7 changed files with 26 additions and 9 deletions

View file

@ -98,7 +98,7 @@ class OEmbed
if (!in_array($ext, $noexts)) { if (!in_array($ext, $noexts)) {
// try oembed autodiscovery // try oembed autodiscovery
$html_text = DI::httpRequest()->fetch($embedurl, 15, 'text/*'); $html_text = DI::httpRequest()->fetch($embedurl, 15, 'text/*');
if ($html_text) { if (!empty($html_text)) {
$dom = new DOMDocument(); $dom = new DOMDocument();
if (@$dom->loadHTML($html_text)) { if (@$dom->loadHTML($html_text)) {
$xpath = new DOMXPath($dom); $xpath = new DOMXPath($dom);

View file

@ -1178,7 +1178,11 @@ class BBCode
// if its not a picture then look if its a page that contains a picture link // if its not a picture then look if its a page that contains a picture link
$body = DI::httpRequest()->fetch($match[1]); $body = DI::httpRequest()->fetch($match[1]);
if (empty($body)) {
DI::cache()->set($cache_key, $text);
return $text;
}
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML($body); @$doc->loadHTML($body);
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
@ -1214,8 +1218,6 @@ class BBCode
private static function cleanPictureLinksCallback($match) private static function cleanPictureLinksCallback($match)
{ {
$a = DI::app();
// When the picture link is the own photo path then we can avoid fetching the link // When the picture link is the own photo path then we can avoid fetching the link
$own_photo_url = preg_quote(Strings::normaliseLink(DI::baseUrl()->get()) . '/photos/'); $own_photo_url = preg_quote(Strings::normaliseLink(DI::baseUrl()->get()) . '/photos/');
if (preg_match('|' . $own_photo_url . '.*?/image/|', Strings::normaliseLink($match[1]))) { if (preg_match('|' . $own_photo_url . '.*?/image/|', Strings::normaliseLink($match[1]))) {
@ -1257,6 +1259,10 @@ class BBCode
// if its not a picture then look if its a page that contains a picture link // if its not a picture then look if its a page that contains a picture link
$body = DI::httpRequest()->fetch($match[1]); $body = DI::httpRequest()->fetch($match[1]);
if (empty($body)) {
DI::cache()->set($cache_key, $text);
return $text;
}
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML($body); @$doc->loadHTML($body);

View file

@ -605,6 +605,10 @@ class HTML
// Collecting all links // Collecting all links
$urls = self::collectURLs($message); $urls = self::collectURLs($message);
if (empty($message)) {
return '';
}
@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
self::tagToBBCode($doc, 'html', [], '', ''); self::tagToBBCode($doc, 'html', [], '', '');

View file

@ -1475,6 +1475,10 @@ class GServer
*/ */
private static function analyseRootBody($curlResult, array $serverdata, string $url) private static function analyseRootBody($curlResult, array $serverdata, string $url)
{ {
if (empty($curlResult->getBody())) {
return $serverdata;
}
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML($curlResult->getBody()); @$doc->loadHTML($curlResult->getBody());
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);

View file

@ -439,6 +439,9 @@ class Probe
} }
$body = $curlResult->getBody(); $body = $curlResult->getBody();
if (empty($body)) {
return false;
}
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML($body); @$doc->loadHTML($body);
@ -1274,7 +1277,7 @@ class Probe
return []; return [];
} }
$content = $curlResult->getBody(); $content = $curlResult->getBody();
if (!$content) { if (empty($content)) {
return []; return [];
} }
@ -1610,7 +1613,7 @@ class Probe
private static function pumpioProfileData($profile_link) private static function pumpioProfileData($profile_link)
{ {
$curlResult = DI::httpRequest()->get($profile_link); $curlResult = DI::httpRequest()->get($profile_link);
if (!$curlResult->isSuccess()) { if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return []; return [];
} }

View file

@ -735,7 +735,7 @@ class OStatus
$curlResult = DI::httpRequest()->get($conversation, ['accept_content' => 'application/atom+xml, text/html']); $curlResult = DI::httpRequest()->get($conversation, ['accept_content' => 'application/atom+xml, text/html']);
if (!$curlResult->isSuccess()) { if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return; return;
} }
@ -928,7 +928,7 @@ class OStatus
$stored = false; $stored = false;
$curlResult = DI::httpRequest()->get($related, ['accept_content' => 'application/atom+xml, text/html']); $curlResult = DI::httpRequest()->get($related, ['accept_content' => 'application/atom+xml, text/html']);
if (!$curlResult->isSuccess()) { if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return; return;
} }

View file

@ -214,7 +214,7 @@ class ParseUrl
} }
$curlResult = DI::httpRequest()->get($url); $curlResult = DI::httpRequest()->get($url);
if (!$curlResult->isSuccess()) { if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return $siteinfo; return $siteinfo;
} }