Merge pull request #12513 from MrPetovan/bug/12507-rel-meeee
Add call to HTML::checkRelMeLink in Worker\CheckRelMeProfileLink
This commit is contained in:
commit
8fd0d4cdc0
10 changed files with 303 additions and 39 deletions
|
@ -33,6 +33,7 @@ use Friendica\Util\Network;
|
||||||
use Friendica\Util\Strings;
|
use Friendica\Util\Strings;
|
||||||
use Friendica\Util\XML;
|
use Friendica\Util\XML;
|
||||||
use League\HTMLToMarkdown\HtmlConverter;
|
use League\HTMLToMarkdown\HtmlConverter;
|
||||||
|
use Psr\Http\Message\UriInterface;
|
||||||
|
|
||||||
class HTML
|
class HTML
|
||||||
{
|
{
|
||||||
|
@ -1007,4 +1008,51 @@ class HTML
|
||||||
|
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XPath arbitrary string quoting
|
||||||
|
*
|
||||||
|
* @see https://stackoverflow.com/a/45228168
|
||||||
|
* @param string $value
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function xpathQuote(string $value): string
|
||||||
|
{
|
||||||
|
if (false === strpos($value, '"')) {
|
||||||
|
return '"' . $value . '"';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (false === strpos($value, "'")) {
|
||||||
|
return "'" . $value . "'";
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the value contains both single and double quotes, construct an
|
||||||
|
// expression that concatenates all non-double-quote substrings with
|
||||||
|
// the quotes, e.g.:
|
||||||
|
//
|
||||||
|
// concat("'foo'", '"', "bar")
|
||||||
|
return 'concat(' . implode(', \'"\', ', array_map(['self', 'xpathQuote'], explode('"', $value))) . ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the provided URL is present in the DOM document in an element with the rel="me" attribute
|
||||||
|
*
|
||||||
|
* XHTML Friends Network http://gmpg.org/xfn/
|
||||||
|
*
|
||||||
|
* @param DOMDocument $doc
|
||||||
|
* @param UriInterface $meUrl
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public static function checkRelMeLink(DOMDocument $doc, UriInterface $meUrl): bool
|
||||||
|
{
|
||||||
|
$xpath = new \DOMXpath($doc);
|
||||||
|
|
||||||
|
// This expression checks that "me" is among the space-delimited values of the "rel" attribute.
|
||||||
|
// And that the href attribute contains exactly the provided URL
|
||||||
|
$expression = "//*[contains(concat(' ', normalize-space(@rel), ' '), ' me ')][@href = " . self::xpathQuote($meUrl) . "]";
|
||||||
|
|
||||||
|
$result = $xpath->query($expression);
|
||||||
|
|
||||||
|
return $result !== false && $result->length > 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,14 +22,14 @@
|
||||||
namespace Friendica\Worker;
|
namespace Friendica\Worker;
|
||||||
|
|
||||||
use DOMDocument;
|
use DOMDocument;
|
||||||
use Friendica\DI;
|
use Friendica\Content\Text\HTML;
|
||||||
use Friendica\Core\Logger;
|
use Friendica\Core\Logger;
|
||||||
|
use Friendica\DI;
|
||||||
use Friendica\Model\Profile;
|
use Friendica\Model\Profile;
|
||||||
use Friendica\Model\User;
|
use Friendica\Model\User;
|
||||||
use Friendica\Network\HTTPClient\Client\HttpClientAccept;
|
use Friendica\Network\HTTPClient\Client\HttpClientAccept;
|
||||||
use Friendica\Network\HTTPClient\Client\HttpClientOptions;
|
use Friendica\Network\HTTPClient\Client\HttpClientOptions;
|
||||||
use Friendica\Util\Network;
|
use GuzzleHttp\Psr7\Uri;
|
||||||
use Friendica\Util\Strings;
|
|
||||||
|
|
||||||
/* This class is used to verify the homepage link of a user profile.
|
/* This class is used to verify the homepage link of a user profile.
|
||||||
* To do so, we look for rel="me" links in the given homepage, if one
|
* To do so, we look for rel="me" links in the given homepage, if one
|
||||||
|
@ -56,43 +56,37 @@ class CheckRelMeProfileLink
|
||||||
{
|
{
|
||||||
Logger::notice('Verifying the homepage', ['uid' => $uid]);
|
Logger::notice('Verifying the homepage', ['uid' => $uid]);
|
||||||
Profile::update(['homepage_verified' => false], $uid);
|
Profile::update(['homepage_verified' => false], $uid);
|
||||||
$homepageUrlVerified = false;
|
|
||||||
$owner = User::getOwnerDataById($uid);
|
$owner = User::getOwnerDataById($uid);
|
||||||
if (!empty($owner['homepage'])) {
|
if (empty($owner['homepage'])) {
|
||||||
|
Logger::notice('The user has no homepage link.', ['uid' => $uid]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$xrd_timeout = DI::config()->get('system', 'xrd_timeout');
|
$xrd_timeout = DI::config()->get('system', 'xrd_timeout');
|
||||||
$curlResult = DI::httpClient()->get($owner['homepage'], $accept_content = HttpClientAccept::HTML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
|
$curlResult = DI::httpClient()->get($owner['homepage'], HttpClientAccept::HTML, [HttpClientOptions::TIMEOUT => $xrd_timeout]);
|
||||||
if ($curlResult->isSuccess()) {
|
if (!$curlResult->isSuccess()) {
|
||||||
|
Logger::notice('Could not cURL the homepage URL', ['owner homepage' => $owner['homepage']]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$content = $curlResult->getBody();
|
$content = $curlResult->getBody();
|
||||||
if (!$content) {
|
if (!$content) {
|
||||||
Logger::notice('Empty body of the fetched homepage link). Cannot verify the relation to profile of UID %s.', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
|
Logger::notice('Empty body of the fetched homepage link). Cannot verify the relation to profile of UID %s.', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
|
||||||
} else {
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
@$doc->loadHTML($content);
|
if (!@$doc->loadHTML($content)) {
|
||||||
if (!$doc) {
|
|
||||||
Logger::notice('Could not parse the content');
|
Logger::notice('Could not parse the content');
|
||||||
} else {
|
return;
|
||||||
foreach ($doc->getElementsByTagName('a') as $link) {
|
|
||||||
$rel = $link->getAttribute('rel');
|
|
||||||
if ($rel == 'me') {
|
|
||||||
$href = $link->getAttribute('href');
|
|
||||||
if (!$homepageUrlVerified && Network::isValidHttpUrl($href)) {
|
|
||||||
$homepageUrlVerified = Strings::compareLink($owner['url'], $href);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
if (HTML::checkRelMeLink($doc, new Uri($owner['url']))) {
|
||||||
}
|
|
||||||
if ($homepageUrlVerified) {
|
|
||||||
Profile::update(['homepage_verified' => true], $uid);
|
Profile::update(['homepage_verified' => true], $uid);
|
||||||
Logger::notice('Homepage URL verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
|
Logger::notice('Homepage URL verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
|
||||||
} else {
|
} else {
|
||||||
Logger::notice('Homepage URL could not be verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
|
Logger::notice('Homepage URL could not be verified', ['uid' => $uid, 'owner homepage' => $owner['homepage']]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
Logger::notice('Could not cURL the homepage URL', ['owner homepage' => $owner['homepage']]);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Logger::notice('The user has no homepage link.', ['uid' => $uid]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
10
tests/datasets/dom/relme/a-multiple-rel-value-end.html
Normal file
10
tests/datasets/dom/relme/a-multiple-rel-value-end.html
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<a href="https://example.com/profile/me" rel="nofollow me">My Profile</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
10
tests/datasets/dom/relme/a-multiple-rel-value-middle.html
Normal file
10
tests/datasets/dom/relme/a-multiple-rel-value-middle.html
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<a href="https://example.com/profile/me" rel="noreferrer me nofollow">My Profile</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
10
tests/datasets/dom/relme/a-multiple-rel-value-start.html
Normal file
10
tests/datasets/dom/relme/a-multiple-rel-value-start.html
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<a href="https://example.com/profile/me" rel="me nofollow">My Profile</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
10
tests/datasets/dom/relme/a-single-rel-value-fail.html
Normal file
10
tests/datasets/dom/relme/a-single-rel-value-fail.html
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<a href="https://example.com/profile/me2" rel="me">My Profile</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
10
tests/datasets/dom/relme/a-single-rel-value.html
Normal file
10
tests/datasets/dom/relme/a-single-rel-value.html
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<a href="https://example.com/profile/me" rel="me">My Profile</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
11
tests/datasets/dom/relme/link-single-rel-value-fail.html
Normal file
11
tests/datasets/dom/relme/link-single-rel-value-fail.html
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
<link href="https://example.com/profile/me" rel="media"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
11
tests/datasets/dom/relme/link-single-rel-value.html
Normal file
11
tests/datasets/dom/relme/link-single-rel-value.html
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Remote page</title>
|
||||||
|
<link href="https://example.com/profile/me" rel="me"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -25,6 +25,8 @@ use Exception;
|
||||||
use Friendica\Content\Text\HTML;
|
use Friendica\Content\Text\HTML;
|
||||||
use Friendica\Network\HTTPException\InternalServerErrorException;
|
use Friendica\Network\HTTPException\InternalServerErrorException;
|
||||||
use Friendica\Test\FixtureTest;
|
use Friendica\Test\FixtureTest;
|
||||||
|
use GuzzleHttp\Psr7\Uri;
|
||||||
|
use Psr\Http\Message\UriInterface;
|
||||||
|
|
||||||
class HTMLTest extends FixtureTest
|
class HTMLTest extends FixtureTest
|
||||||
{
|
{
|
||||||
|
@ -105,4 +107,152 @@ its surprisingly good",
|
||||||
|
|
||||||
self::assertEquals($expectedBBCode, $actual);
|
self::assertEquals($expectedBBCode, $actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function dataXpathQuote(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'no quotes' => [
|
||||||
|
'value' => "foo",
|
||||||
|
],
|
||||||
|
'double quotes only' => [
|
||||||
|
'value' => "\"foo",
|
||||||
|
],
|
||||||
|
'single quotes only' => [
|
||||||
|
'value' => "'foo",
|
||||||
|
],
|
||||||
|
'both; double quotes in mid-string' => [
|
||||||
|
'value' => "'foo\"bar",
|
||||||
|
],
|
||||||
|
'multiple double quotes in mid-string' => [
|
||||||
|
'value' => "'foo\"bar\"baz",
|
||||||
|
],
|
||||||
|
'string ends with double quotes' => [
|
||||||
|
'value' => "'foo\"",
|
||||||
|
],
|
||||||
|
'string ends with run of double quotes' => [
|
||||||
|
'value' => "'foo\"\"",
|
||||||
|
],
|
||||||
|
'string begins with double quotes' => [
|
||||||
|
'value' => "\"'foo",
|
||||||
|
],
|
||||||
|
'string begins with run of double quotes' => [
|
||||||
|
'value' => "\"\"'foo",
|
||||||
|
],
|
||||||
|
'run of double quotes in mid-string' => [
|
||||||
|
'value' => "'foo\"\"bar",
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataXpathQuote
|
||||||
|
* @param string $value
|
||||||
|
* @return void
|
||||||
|
* @throws \DOMException
|
||||||
|
*/
|
||||||
|
public function testXpathQuote(string $value)
|
||||||
|
{
|
||||||
|
$dom = new \DOMDocument();
|
||||||
|
$element = $dom->createElement('test');
|
||||||
|
$attribute = $dom->createAttribute('value');
|
||||||
|
$attribute->value = $value;
|
||||||
|
$element->appendChild($attribute);
|
||||||
|
$dom->appendChild($element);
|
||||||
|
|
||||||
|
$xpath = new \DOMXPath($dom);
|
||||||
|
|
||||||
|
$result = $xpath->query('//test[@value = ' . HTML::xpathQuote($value) . ']');
|
||||||
|
|
||||||
|
$this->assertInstanceOf(\DOMNodeList::class, $result);
|
||||||
|
$this->assertEquals(1, $result->length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function dataCheckRelMeLink(): array
|
||||||
|
{
|
||||||
|
$aSingleRelValue = new \DOMDocument();
|
||||||
|
$aSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value.html');
|
||||||
|
|
||||||
|
$aMultipleRelValueStart = new \DOMDocument();
|
||||||
|
$aMultipleRelValueStart->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-start.html');
|
||||||
|
|
||||||
|
$aMultipleRelValueMiddle = new \DOMDocument();
|
||||||
|
$aMultipleRelValueMiddle->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-middle.html');
|
||||||
|
|
||||||
|
$aMultipleRelValueEnd = new \DOMDocument();
|
||||||
|
$aMultipleRelValueEnd->load(__DIR__ . '/../../../datasets/dom/relme/a-multiple-rel-value-end.html');
|
||||||
|
|
||||||
|
$linkSingleRelValue = new \DOMDocument();
|
||||||
|
$linkSingleRelValue->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value.html');
|
||||||
|
|
||||||
|
$meUrl = new Uri('https://example.com/profile/me');
|
||||||
|
|
||||||
|
return [
|
||||||
|
'a-single-rel-value' => [
|
||||||
|
'doc' => $aSingleRelValue,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
'a-multiple-rel-value-start' => [
|
||||||
|
'doc' => $aMultipleRelValueStart,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
'a-multiple-rel-value-middle' => [
|
||||||
|
'doc' => $aMultipleRelValueMiddle,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
'a-multiple-rel-value-end' => [
|
||||||
|
'doc' => $aMultipleRelValueEnd,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
'link-single-rel-value' => [
|
||||||
|
'doc' => $linkSingleRelValue,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataCheckRelMeLink
|
||||||
|
* @param \DOMDocument $doc
|
||||||
|
* @param UriInterface $meUrl
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testCheckRelMeLink(\DOMDocument $doc, UriInterface $meUrl)
|
||||||
|
{
|
||||||
|
$this->assertTrue(HTML::checkRelMeLink($doc, $meUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function dataCheckRelMeLinkFail(): array
|
||||||
|
{
|
||||||
|
$aSingleRelValueFail = new \DOMDocument();
|
||||||
|
$aSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/a-single-rel-value-fail.html');
|
||||||
|
|
||||||
|
$linkSingleRelValueFail = new \DOMDocument();
|
||||||
|
$linkSingleRelValueFail->load(__DIR__ . '/../../../datasets/dom/relme/link-single-rel-value-fail.html');
|
||||||
|
|
||||||
|
$meUrl = new Uri('https://example.com/profile/me');
|
||||||
|
|
||||||
|
return [
|
||||||
|
'a-single-rel-value-fail' => [
|
||||||
|
'doc' => $aSingleRelValueFail,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
'link-single-rel-value-fail' => [
|
||||||
|
'doc' => $linkSingleRelValueFail,
|
||||||
|
'meUrl' => $meUrl
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataCheckRelMeLinkFail
|
||||||
|
* @param \DOMDocument $doc
|
||||||
|
* @param UriInterface $meUrl
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testCheckRelMeLinkFail(\DOMDocument $doc, UriInterface $meUrl)
|
||||||
|
{
|
||||||
|
$this->assertFalse(HTML::checkRelMeLink($doc, $meUrl));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue