Merge pull request #13519 from annando/language

Check more fields for the language
This commit is contained in:
Tobias Diekershoff 2023-10-08 11:38:48 +02:00 committed by GitHub
commit 2c297da755
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 49 additions and 46 deletions

View file

@ -1212,8 +1212,6 @@ class Item
// Check for hashtags in the body and repair or add hashtag links
$item['body'] = self::setHashtags($item['body']);
$item['language'] = self::getLanguage($item);
$notify_type = Delivery::POST;
// Filling item related side tables
@ -1262,6 +1260,8 @@ class Item
}
}
$item['language'] = self::getLanguage($item);
$inserted = Post::insert($item['uri-id'], $item);
if ($item['gravity'] == self::GRAVITY_PARENT) {
@ -1991,7 +1991,7 @@ class Item
return '';
}
$languages = self::getLanguageArray(trim($item['title'] . "\n" . $item['body']), 3, $item['uri-id'], $item['author-id']);
$languages = self::getLanguageArray($item['title'] . ' ' . ($item['content-warning'] ?? '') . ' ' . $item['body'], 3, $item['uri-id'], $item['author-id']);
if (empty($languages)) {
return '';
}

View file

@ -158,18 +158,7 @@ class Engagement
$body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
$body = preg_replace("~\[url\=.*\]https?:.*\[\/url\]~", '', $body);
$body = Post\Media::addAttachmentsToBody($item['uri-id'], $body, [Post\Media::IMAGE]);
$text = BBCode::toPlaintext($body, false);
$text = preg_replace(Strings::autoLinkRegEx(), '', $text);
do {
$oldtext = $text;
$text = str_replace([' ', "\n", "\r"], ' ', $text);
} while ($oldtext != $text);
return $text;
return BBCode::toSearchText($body, $item['uri-id']);
}
private static function getMediaType(int $uri_id): int

View file

@ -1652,7 +1652,19 @@ class Processor
$attributed_to = JsonLD::fetchElement($activity['as:object'], 'as:attributedTo', '@id');
$authorid = Contact::getIdForURL($attributed_to);
$body = HTML::toBBCode(JsonLD::fetchElement($activity['as:object'], 'as:content', '@value') ?? '');
$content = JsonLD::fetchElement($activity['as:object'], 'as:name', '@value') ?? '';
$content .= ' ' . JsonLD::fetchElement($activity['as:object'], 'as:summary', '@value') ?? '';
$content .= ' ' . HTML::toBBCode(JsonLD::fetchElement($activity['as:object'], 'as:content', '@value') ?? '');
$attachments = JsonLD::fetchElementArray($activity['as:object'], 'as:attachment') ?? [];
foreach ($attachments as $media) {
if (!empty($media['as:summary'])) {
$content .= ' ' . JsonLD::fetchElement($media, 'as:summary', '@value');
}
if (!empty($media['as:name'])) {
$content .= ' ' . JsonLD::fetchElement($media, 'as:name', '@value');
}
}
$messageTags = [];
$tags = Receiver::processTags(JsonLD::fetchElementArray($activity['as:object'], 'as:tag') ?? []);
@ -1665,7 +1677,7 @@ class Processor
}
}
return Relay::isSolicitedPost($messageTags, $body, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0);
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0);
}
/**

View file

@ -125,7 +125,7 @@ class Relay
}
}
if (!self::isWantedLanguage($body)) {
if (!self::isWantedLanguage($body, 0, $authorid)) {
Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]);
return false;
}
@ -166,12 +166,14 @@ class Relay
* Detect the language of a post and decide if the post should be accepted
*
* @param string $body
* @param int $uri_id
* @param int $author_id
* @return boolean
*/
public static function isWantedLanguage(string $body)
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0)
{
$languages = [];
foreach (Item::getLanguageArray($body, 10) as $language => $reliability) {
foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) {
if ($reliability > 0) {
$languages[] = $language;
}

View file

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: 2023.09-rc\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-10-07 19:00+0200\n"
"POT-Creation-Date: 2023-10-08 07:41+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -2188,8 +2188,8 @@ msgid ""
"<a href=\"%1$s\" target=\"_blank\" rel=\"noopener noreferrer\">%2$s</a> %3$s"
msgstr ""
#: src/Content/Text/BBCode.php:994 src/Model/Item.php:3760
#: src/Model/Item.php:3766 src/Model/Item.php:3767
#: src/Content/Text/BBCode.php:994 src/Model/Item.php:3761
#: src/Model/Item.php:3767 src/Model/Item.php:3768
msgid "Link to source"
msgstr ""
@ -2724,8 +2724,8 @@ msgstr ""
#: src/Core/Installer.php:511
msgid ""
"The web installer needs to be able to create a file called \"local.config."
"php\" in the \"config\" folder of your web server and it is unable to do so."
"The web installer needs to be able to create a file called \"local.config.php"
"\" in the \"config\" folder of your web server and it is unable to do so."
msgstr ""
#: src/Core/Installer.php:512
@ -3429,44 +3429,44 @@ msgstr ""
msgid "Content warning: %s"
msgstr ""
#: src/Model/Item.php:3667
#: src/Model/Item.php:3668
msgid "bytes"
msgstr ""
#: src/Model/Item.php:3698
#: src/Model/Item.php:3699
#, php-format
msgid "%2$s (%3$d%%, %1$d vote)"
msgid_plural "%2$s (%3$d%%, %1$d votes)"
msgstr[0] ""
msgstr[1] ""
#: src/Model/Item.php:3700
#: src/Model/Item.php:3701
#, php-format
msgid "%2$s (%1$d vote)"
msgid_plural "%2$s (%1$d votes)"
msgstr[0] ""
msgstr[1] ""
#: src/Model/Item.php:3705
#: src/Model/Item.php:3706
#, php-format
msgid "%d voter. Poll end: %s"
msgid_plural "%d voters. Poll end: %s"
msgstr[0] ""
msgstr[1] ""
#: src/Model/Item.php:3707
#: src/Model/Item.php:3708
#, php-format
msgid "%d voter."
msgid_plural "%d voters."
msgstr[0] ""
msgstr[1] ""
#: src/Model/Item.php:3709
#: src/Model/Item.php:3710
#, php-format
msgid "Poll end: %s"
msgstr ""
#: src/Model/Item.php:3743 src/Model/Item.php:3744
#: src/Model/Item.php:3744 src/Model/Item.php:3745
msgid "View on separate page"
msgstr ""
@ -5351,9 +5351,9 @@ msgstr ""
#: src/Module/Admin/Summary.php:98
msgid ""
"The last update failed. Please run \"php bin/console.php dbstructure "
"update\" from the command line and have a look at the errors that might "
"appear. (Some of the errors are possibly inside the logfile.)"
"The last update failed. Please run \"php bin/console.php dbstructure update"
"\" from the command line and have a look at the errors that might appear. "
"(Some of the errors are possibly inside the logfile.)"
msgstr ""
#: src/Module/Admin/Summary.php:102
@ -5504,8 +5504,8 @@ msgstr ""
#, php-format
msgid ""
"Show some informations regarding the needed information to operate the node "
"according e.g. to <a href=\"%s\" target=\"_blank\" rel=\"noopener "
"noreferrer\">EU-GDPR</a>."
"according e.g. to <a href=\"%s\" target=\"_blank\" rel=\"noopener noreferrer"
"\">EU-GDPR</a>."
msgstr ""
#: src/Module/Admin/Tos.php:81
@ -8856,8 +8856,8 @@ msgstr ""
#: src/Module/Profile/Profile.php:158
#, php-format
msgid ""
"You're currently viewing your profile as <b>%s</b> <a href=\"%s\" "
"class=\"btn btn-sm pull-right\">Cancel</a>"
"You're currently viewing your profile as <b>%s</b> <a href=\"%s\" class="
"\"btn btn-sm pull-right\">Cancel</a>"
msgstr ""
#: src/Module/Profile/Profile.php:167
@ -9405,8 +9405,8 @@ msgstr ""
#: src/Module/Security/TwoFactor/Verify.php:100
#, php-format
msgid ""
"If you do not have access to your authentication code you can use a <a "
"href=\"%s\">two-factor recovery code</a>."
"If you do not have access to your authentication code you can use a <a href="
"\"%s\">two-factor recovery code</a>."
msgstr ""
#: src/Module/Security/TwoFactor/Verify.php:101
@ -11047,8 +11047,8 @@ msgstr ""
#: src/Module/Settings/TwoFactor/Verify.php:149
#, php-format
msgid ""
"<p>Or you can open the following URL in your mobile device:</p><p><a "
"href=\"%s\">%s</a></p>"
"<p>Or you can open the following URL in your mobile device:</p><p><a href="
"\"%s\">%s</a></p>"
msgstr ""
#: src/Module/Settings/TwoFactor/Verify.php:156
@ -11157,9 +11157,9 @@ msgstr ""
msgid ""
"At any point in time a logged in user can export their account data from the "
"<a href=\"%1$s/settings/userexport\">account settings</a>. If the user wants "
"to delete their account they can do so at <a href=\"%1$s/settings/"
"removeme\">%1$s/settings/removeme</a>. The deletion of the account will be "
"permanent. Deletion of the data will also be requested from the nodes of the "
"to delete their account they can do so at <a href=\"%1$s/settings/removeme\">"
"%1$s/settings/removeme</a>. The deletion of the account will be permanent. "
"Deletion of the data will also be requested from the nodes of the "
"communication partners."
msgstr ""