Add new xpathQuote and checkRelMeLink methods to Content\Text\HTML class

- Add tests for both methods
This commit is contained in:
Hypolite Petovan 2022-12-23 02:41:22 -05:00
parent b898dd1c00
commit 91c4ab2359
9 changed files with 270 additions and 0 deletions

View file

@ -33,6 +33,7 @@ use Friendica\Util\Network;
use Friendica\Util\Strings;
use Friendica\Util\XML;
use League\HTMLToMarkdown\HtmlConverter;
use Psr\Http\Message\UriInterface;
class HTML
{
@ -1007,4 +1008,51 @@ class HTML
return $text;
}
/**
* XPath arbitrary string quoting
*
* @see https://stackoverflow.com/a/45228168
* @param string $value
* @return string
*/
public static function xpathQuote(string $value): string
{
if (false === strpos($value, '"')) {
return '"' . $value . '"';
}
if (false === strpos($value, "'")) {
return "'" . $value . "'";
}
// if the value contains both single and double quotes, construct an
// expression that concatenates all non-double-quote substrings with
// the quotes, e.g.:
//
// concat("'foo'", '"', "bar")
return 'concat(' . implode(', \'"\', ', array_map(['self', 'xpathQuote'], explode('"', $value))) . ')';
}
/**
* Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute
*
* XHTML Friends Network http://gmpg.org/xfn/
*
* @param DOMDocument $doc
* @param UriInterface $meUrl
* @return bool
*/
public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool
{
$xpath = new \DOMXpath($doc);
// This expression checks that "me" is among the space-delimited values of the "rel" attribute.
// And that the href attribute contains exactly the provided URL
$expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]";
$result = $xpath->query($expression);
return $result !== false && $result->length > 0;
}
}

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
</head>
<body>
<a href="https://example.com/profile/me" rel="nofollow me">My Profile</a>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
</head>
<body>
<a href="https://example.com/profile/me" rel="noreferrer me nofollow">My Profile</a>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
</head>
<body>
<a href="https://example.com/profile/me" rel="me nofollow">My Profile</a>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
</head>
<body>
<a href="https://example.com/profile/me2" rel="me">My Profile</a>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
</head>
<body>
<a href="https://example.com/profile/me" rel="me">My Profile</a>
</body>
</html>

View file

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
<link href="https://example.com/profile/me" rel="media"/>
</head>
<body>
</body>
</html>

View file

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Remote page</title>
<link href="https://example.com/profile/me" rel="me"/>
</head>
<body>
</body>
</html>

View file

@ -25,6 +25,8 @@ use Exception;
use Friendica\Content\Text\HTML;
use Friendica\Network\HTTPException\InternalServerErrorException;
use Friendica\Test\FixtureTest;
use GuzzleHttp\Psr7\Uri;
use Psr\Http\Message\UriInterface;
class HTMLTest extends FixtureTest
{
@ -105,4 +107,152 @@ its surprisingly good",
self::assertEquals($expectedBBCode, $actual);
}
public function dataXpathQuote(): array
{
return [
'no quotes' => [
'value' => "foo",
],
'double quotes only' => [
'value' => "\"foo",
],
'single quotes only' => [
'value' => "'foo",
],
'both; double quotes in mid-string' => [
'value' => "'foo\"bar",
],
'multiple double quotes in mid-string' => [
'value' => "'foo\"bar\"baz",
],
'string ends with double quotes' => [
'value' => "'foo\"",
],
'string ends with run of double quotes' => [
'value' => "'foo\"\"",
],
'string begins with double quotes' => [
'value' => "\"'foo",
],
'string begins with run of double quotes' => [
'value' => "\"\"'foo",
],
'run of double quotes in mid-string' => [
'value' => "'foo\"\"bar",
],
];
}
/**
* @dataProvider dataXpathQuote
* @param string $value
* @return void
* @throws \DOMException
*/
public function testXpathQuote(string $value)
{
$dom = new \DOMDocument();
$element = $dom->createElement('test');
$attribute = $dom->createAttribute('value');
$attribute->value = $value;
$element->appendChild($attribute);
$dom->appendChild($element);
$xpath = new \DOMXPath($dom);
$result = $xpath->query('//test[@value = ' . HTML::xpathQuote($value) . ']');
$this->assertInstanceOf(\DOMNodeList::class, $result);
$this->assertEquals(1, $result->length);
}
public function dataCheckRelMeLink(): array
{
$aSingleRelValue = new \DOMDocument();
$aSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value.html');
$aMultipleRelValueStart = new \DOMDocument();
$aMultipleRelValueStart->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-start.html');
$aMultipleRelValueMiddle = new \DOMDocument();
$aMultipleRelValueMiddle->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-middle.html');
$aMultipleRelValueEnd = new \DOMDocument();
$aMultipleRelValueEnd->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-end.html');
$linkSingleRelValue = new \DOMDocument();
$linkSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value.html');
$meUrl = new Uri('https://example.com/profile/me');
return [
'a-single-rel-value' => [
'doc' => $aSingleRelValue,
'meUrl' => $meUrl
],
'a-multiple-rel-value-start' => [
'doc' => $aMultipleRelValueStart,
'meUrl' => $meUrl
],
'a-multiple-rel-value-middle' => [
'doc' => $aMultipleRelValueMiddle,
'meUrl' => $meUrl
],
'a-multiple-rel-value-end' => [
'doc' => $aMultipleRelValueEnd,
'meUrl' => $meUrl
],
'link-single-rel-value' => [
'doc' => $linkSingleRelValue,
'meUrl' => $meUrl
],
];
}
/**
* @dataProvider dataCheckRelMeLink
* @param \DOMDocument $doc
* @param UriInterface $meUrl
* @return void
*/
public function testCheckRelMeLink(\DOMDocument $doc, UriInterface $meUrl)
{
$this->assertTrue(HTML::checkRelMeLink($doc, $meUrl));
}
public function dataCheckRelMeLinkFail(): array
{
$aSingleRelValueFail = new \DOMDocument();
$aSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value-fail.html');
$linkSingleRelValueFail = new \DOMDocument();
$linkSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value-fail.html');
$meUrl = new Uri('https://example.com/profile/me');
return [
'a-single-rel-value-fail' => [
'doc' => $aSingleRelValueFail,
'meUrl' => $meUrl
],
'link-single-rel-value-fail' => [
'doc' => $linkSingleRelValueFail,
'meUrl' => $meUrl
],
];
}
/**
* @dataProvider dataCheckRelMeLinkFail
* @param \DOMDocument $doc
* @param UriInterface $meUrl
* @return void
*/
public function testCheckRelMeLinkFail(\DOMDocument $doc, UriInterface $meUrl)
{
$this->assertFalse(HTML::checkRelMeLink($doc, $meUrl));
}
}