From f08b08e0d821b2ed7cb65d9d32a8b1dfc9df6d58 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 19 Sep 2022 12:36:12 +0200 Subject: [PATCH 1/4] Allow to reject specific languages on receiving posts via the relay --- src/Model/Item.php | 27 +++++++++++++++----------- src/Protocol/ActivityPub/Processor.php | 2 +- src/Protocol/Relay.php | 22 +++++++++++++++++++++ src/Util/Profiler.php | 8 ++++---- static/defaults.config.php | 8 ++++++++ 5 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/Model/Item.php b/src/Model/Item.php index 79fe4c02e..3fb8e52a5 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -1870,23 +1870,33 @@ class Item return ''; } - // Convert attachments to links - $naked_body = BBCode::removeAttachment($item['body']); - if (empty($naked_body)) { + $languages = self::getLanguageArray(trim($item['title'] . "\n" . $item['body'])); + if (empty($languages)) { return ''; } + return json_encode($languages); + } + + public static function getLanguageArray(string $body): array + { + // Convert attachments to links + $naked_body = BBCode::removeAttachment($body); + if (empty($naked_body)) { + return []; + } + // Remove links and pictures $naked_body = BBCode::removeLinks($naked_body); // Convert the title and the body to plain text - $naked_body = trim($item['title'] . "\n" . BBCode::toPlaintext($naked_body)); + $naked_body = BBCode::toPlaintext($naked_body); // Remove possibly remaining links $naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body); if (empty($naked_body)) { - return ''; + return []; } $naked_body = self::getDominantLanguage($naked_body); @@ -1898,12 +1908,7 @@ class Item $availableLanguages['fa'] = 'fa'; $ld = new Language(array_keys($availableLanguages)); - $languages = $ld->detect($naked_body)->limit(0, 3)->close(); - if (is_array($languages)) { - return json_encode($languages); - } - - return ''; + return $ld->detect($naked_body)->limit(0, 3)->close() ?: []; } /** diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index e16e2e953..7988d97f6 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -91,7 +91,7 @@ class Processor * @param string $body * @return string */ - protected static function normalizeMentionLinks(string $body): string + public static function normalizeMentionLinks(string $body): string { return preg_replace('%\[url=([^\[\]]*)]([#@!])(.*?)\[/url]%ism', '$2[url=$1]$3[/url]', $body); } diff --git a/src/Protocol/Relay.php b/src/Protocol/Relay.php index 7414c2931..f427d85b6 100644 --- a/src/Protocol/Relay.php +++ b/src/Protocol/Relay.php @@ -29,6 +29,7 @@ use Friendica\DI; use Friendica\Model\APContact; use Friendica\Model\Contact; use Friendica\Model\GServer; +use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Model\Search; use Friendica\Model\Tag; @@ -76,6 +77,8 @@ class Relay return false; } + $body = ActivityPub\Processor::normalizeMentionLinks($body); + $systemTags = []; $userTags = []; $denyTags = []; @@ -125,6 +128,25 @@ class Relay } } + $languages = []; + foreach (Item::getLanguageArray($body) as $language => $reliability) { + if ($reliability > 0) { + $languages[] = $language; + } + } + + Logger::debug('Got languages', ['languages' => $languages, 'body' => $body]); + + if (!empty($languages)) { + if (in_array($languages[0], $config->get('system', 'relay_deny_languages'))) { + Logger::info('Unwanted language found - rejected', ['language' => $languages[0], 'network' => $network, 'url' => $url]); + return false; + } + } elseif ($config->get('system', 'relay_deny_undetected_language')) { + Logger::info('Undetected language found - rejected', ['body' => $body, 'network' => $network, 'url' => $url]); + return false; + } + if ($scope == self::SCOPE_ALL) { Logger::info('Server accept all posts - accepted', ['network' => $network, 'url' => $url]); return true; diff --git a/src/Util/Profiler.php b/src/Util/Profiler.php index f4e485048..a98e72892 100644 --- a/src/Util/Profiler.php +++ b/src/Util/Profiler.php @@ -152,13 +152,13 @@ class Profiler implements ContainerInterface * Saves a timestamp for a value - f.e. a call * Necessary for profiling Friendica * - * @param int $timestamp the Timestamp + * @param float $timestamp the Timestamp * @param string $value A value to profile * @param string $callstack A callstack string, generated if absent * * @return void */ - public function saveTimestamp(int $timestamp, string $value, string $callstack = '') + public function saveTimestamp(float $timestamp, string $value, string $callstack = '') { if (!$this->enabled) { return; @@ -358,9 +358,9 @@ class Profiler implements ContainerInterface * @throws NotFoundExceptionInterface No entry was found for **this** identifier. * @throws ContainerExceptionInterface Error while retrieving the entry. * - * @return int Entry. + * @return float Entry. */ - public function get(string $id): int + public function get(string $id): float { if (!$this->has($id)) { return 0; diff --git a/static/defaults.config.php b/static/defaults.config.php index adffad896..285e544c8 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -525,6 +525,14 @@ return [ // The authentication password for the redis database 'redis_password' => null, + // relay_deny_languages (Array) + // Array of languages that are rejected. + 'relay_deny_languages' => [], + + // relay_deny_undetected_language (Boolean) + // Deny undetected languages + 'relay_deny_undetected_language' => false, + // session_handler (database|cache|native) // Whether to use Cache to store session data or to use PHP native session storage. 'session_handler' => 'database', From 3650feb2567ff5285db4fd987c3bb56d9d16602c Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 19 Sep 2022 12:46:28 +0200 Subject: [PATCH 2/4] "Count" parameter added --- src/Model/Item.php | 6 +++--- src/Protocol/Relay.php | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Model/Item.php b/src/Model/Item.php index 3fb8e52a5..89c972ea5 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -1870,7 +1870,7 @@ class Item return ''; } - $languages = self::getLanguageArray(trim($item['title'] . "\n" . $item['body'])); + $languages = self::getLanguageArray(trim($item['title'] . "\n" . $item['body']), 3); if (empty($languages)) { return ''; } @@ -1878,7 +1878,7 @@ class Item return json_encode($languages); } - public static function getLanguageArray(string $body): array + public static function getLanguageArray(string $body, int $count): array { // Convert attachments to links $naked_body = BBCode::removeAttachment($body); @@ -1908,7 +1908,7 @@ class Item $availableLanguages['fa'] = 'fa'; $ld = new Language(array_keys($availableLanguages)); - return $ld->detect($naked_body)->limit(0, 3)->close() ?: []; + return $ld->detect($naked_body)->limit(0, $count)->close() ?: []; } /** diff --git a/src/Protocol/Relay.php b/src/Protocol/Relay.php index f427d85b6..ff8276250 100644 --- a/src/Protocol/Relay.php +++ b/src/Protocol/Relay.php @@ -129,7 +129,7 @@ class Relay } $languages = []; - foreach (Item::getLanguageArray($body) as $language => $reliability) { + foreach (Item::getLanguageArray($body, 10) as $language => $reliability) { if ($reliability > 0) { $languages[] = $language; } From 6ba2c4bb01b594b21819a1d85cbcb3cd531beca7 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Mon, 19 Sep 2022 12:49:37 +0200 Subject: [PATCH 3/4] Added documentation --- src/Model/Item.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Model/Item.php b/src/Model/Item.php index 89c972ea5..94702e1cc 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -1878,6 +1878,13 @@ class Item return json_encode($languages); } + /** + * Get a language array from a given text + * + * @param string $body + * @param integer $count + * @return array + */ public static function getLanguageArray(string $body, int $count): array { // Convert attachments to links From d55a5cd17b20851e7ef7258826e9ef2f3a899a82 Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Tue, 20 Sep 2022 06:09:17 +0200 Subject: [PATCH 4/4] Changed documentation --- static/defaults.config.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/defaults.config.php b/static/defaults.config.php index 285e544c8..fb6727491 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -526,7 +526,7 @@ return [ 'redis_password' => null, // relay_deny_languages (Array) - // Array of languages that are rejected. + // Array of languages (two digit format) that are rejected. 'relay_deny_languages' => [], // relay_deny_undetected_language (Boolean)