Avoid HTML parsing of an empty body

This commit is contained in:
Michael 2021-04-10 05:46:19 +00:00
parent 3e2fa7867c
commit 5a00902e11
7 changed files with 26 additions and 9 deletions

View file

@ -98,7 +98,7 @@ class OEmbed
if (!in_array($ext, $noexts)) {
// try oembed autodiscovery
$html_text = DI::httpRequest()->fetch($embedurl, 15, 'text/*');
if ($html_text) {
if (!empty($html_text)) {
$dom = new DOMDocument();
if (@$dom->loadHTML($html_text)) {
$xpath = new DOMXPath($dom);

View file

@ -1178,6 +1178,10 @@ class BBCode
// if its not a picture then look if its a page that contains a picture link
$body = DI::httpRequest()->fetch($match[1]);
if (empty($body)) {
DI::cache()->set($cache_key, $text);
return $text;
}
$doc = new DOMDocument();
@$doc->loadHTML($body);
@ -1214,8 +1218,6 @@ class BBCode
private static function cleanPictureLinksCallback($match)
{
$a = DI::app();
// When the picture link is the own photo path then we can avoid fetching the link
$own_photo_url = preg_quote(Strings::normaliseLink(DI::baseUrl()->get()) . '/photos/');
if (preg_match('|' . $own_photo_url . '.*?/image/|', Strings::normaliseLink($match[1]))) {
@ -1257,6 +1259,10 @@ class BBCode
// if its not a picture then look if its a page that contains a picture link
$body = DI::httpRequest()->fetch($match[1]);
if (empty($body)) {
DI::cache()->set($cache_key, $text);
return $text;
}
$doc = new DOMDocument();
@$doc->loadHTML($body);

View file

@ -605,6 +605,10 @@ class HTML
// Collecting all links
$urls = self::collectURLs($message);
if (empty($message)) {
return '';
}
@$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
self::tagToBBCode($doc, 'html', [], '', '');

View file

@ -1475,6 +1475,10 @@ class GServer
*/
private static function analyseRootBody($curlResult, array $serverdata, string $url)
{
if (empty($curlResult->getBody())) {
return $serverdata;
}
$doc = new DOMDocument();
@$doc->loadHTML($curlResult->getBody());
$xpath = new DOMXPath($doc);

View file

@ -439,6 +439,9 @@ class Probe
}
$body = $curlResult->getBody();
if (empty($body)) {
return false;
}
$doc = new DOMDocument();
@$doc->loadHTML($body);
@ -1274,7 +1277,7 @@ class Probe
return [];
}
$content = $curlResult->getBody();
if (!$content) {
if (empty($content)) {
return [];
}
@ -1610,7 +1613,7 @@ class Probe
private static function pumpioProfileData($profile_link)
{
$curlResult = DI::httpRequest()->get($profile_link);
if (!$curlResult->isSuccess()) {
if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return [];
}

View file

@ -735,7 +735,7 @@ class OStatus
$curlResult = DI::httpRequest()->get($conversation, ['accept_content' => 'application/atom+xml, text/html']);
if (!$curlResult->isSuccess()) {
if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return;
}
@ -928,7 +928,7 @@ class OStatus
$stored = false;
$curlResult = DI::httpRequest()->get($related, ['accept_content' => 'application/atom+xml, text/html']);
if (!$curlResult->isSuccess()) {
if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return;
}

View file

@ -214,7 +214,7 @@ class ParseUrl
}
$curlResult = DI::httpRequest()->get($url);
if (!$curlResult->isSuccess()) {
if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
return $siteinfo;
}