Merge pull request #13611 from annando/languages

Use the post language for the language detection / config for quality
This commit is contained in:
Hypolite Petovan 2023-11-05 16:23:57 -08:00 committed by GitHub
commit 58e5f0d9c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 101 additions and 25 deletions

View file

@ -117,7 +117,7 @@ class Item
const DELIVER_FIELDLIST = [
'uid', 'id', 'parent', 'uri-id', 'uri', 'thr-parent', 'parent-uri', 'guid',
'parent-guid', 'conversation', 'received', 'created', 'edited', 'verb', 'object-type', 'object', 'target',
'private', 'title', 'body', 'raw-body', 'location', 'coord', 'app',
'private', 'title', 'body', 'raw-body', 'language', 'location', 'coord', 'app',
'inform', 'deleted', 'extid', 'post-type', 'post-reason', 'gravity',
'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid',
'author-id', 'author-addr', 'author-link', 'author-name', 'author-avatar', 'owner-id', 'owner-link', 'contact-uid',
@ -1484,6 +1484,10 @@ class Item
*/
private static function setOwnerforResharedItem(array $item)
{
if ($item['uid'] == 0) {
return;
}
$parent = Post::selectFirst(
['id', 'causer-id', 'owner-id', 'author-id', 'author-link', 'origin', 'post-reason'],
['uri-id' => $item['thr-parent-id'], 'uid' => $item['uid']]

View file

@ -586,7 +586,14 @@ class User
$languages = [];
$uids = [];
$users = DBA::select('user', ['uid', 'language'], ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `uid` > ?", 0]);
$condition = ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `uid` > ?", 0];
$abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
if (!empty($abandon_days)) {
$condition = DBA::mergeConditions($condition, ["`last-activity` > ?", DateTimeFormat::utc('now - ' . $abandon_days . ' days')]);
}
$users = DBA::select('user', ['uid', 'language'], $condition);
while ($user = DBA::fetch($users)) {
$uids[] = $user['uid'];
$code = DI::l10n()->toISO6391($user['language']);
@ -612,6 +619,7 @@ class User
}
DBA::close($channels);
ksort($languages);
return array_keys($languages);
}

View file

@ -1673,7 +1673,39 @@ class Processor
}
}
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0);
$languages = self::getPostLanguages($activity);
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0, $languages);
}
/**
* Fetch the post language from the content
*
* @param array $activity
* @return array
*/
private static function getPostLanguages(array $activity): array
{
$content = JsonLD::fetchElement($activity['as:object'], 'as:content') ?? '';
$languages = JsonLD::fetchElementArray($activity['as:object'], 'as:content', '@language') ?? [];
if (empty($languages)) {
return [];
}
$iso639 = new \Matriphe\ISO639\ISO639;
$result = [];
foreach ($languages as $language) {
if ($language == $content) {
continue;
}
$language = DI::l10n()->toISO6391($language);
if (!in_array($language, array_column($iso639->allLanguages(), 0))) {
continue;
}
$result[] = $language;
}
return $result;
}
/**

View file

@ -895,6 +895,19 @@ class Transmitter
*/
public static function getReceiversForUriId(int $uri_id, bool $blindcopy)
{
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
if (empty($tags)) {
Logger::debug('No receivers found', ['uri-id' => $uri_id]);
$post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]);
if (!empty($post)) {
ActivityPub\Transmitter::storeReceiversForItem($post);
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
Logger::debug('Receivers are created', ['uri-id' => $uri_id, 'receivers' => count($tags)]);
} else {
Logger::debug('Origin item not found', ['uri-id' => $uri_id]);
}
}
$receivers = [
'to' => [],
'cc' => [],
@ -902,7 +915,7 @@ class Transmitter
'audience' => [],
];
foreach (Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]) as $receiver) {
foreach ($tags as $receiver) {
switch ($receiver['type']) {
case Tag::TO:
$receivers['to'][] = $receiver['url'];
@ -1884,7 +1897,7 @@ class Transmitter
if (!empty($item['language'])) {
$languages = array_keys(json_decode($item['language'], true));
if (!empty($languages[0])) {
return $languages[0];
return DI::l10n()->toISO6391($languages[0]);
}
}
@ -1892,12 +1905,12 @@ class Transmitter
if (!empty($item['uid'])) {
$user = DBA::selectFirst('user', ['language'], ['uid' => $item['uid']]);
if (!empty($user['language'])) {
return $user['language'];
return DI::l10n()->toISO6391($user['language']);
}
}
// And finally just use the system language
return DI::config()->get('system', 'language');
return DI::l10n()->toISO6391(DI::config()->get('system', 'language'));
}
/**

View file

@ -57,9 +57,12 @@ class Relay
* @param string $body
* @param int $authorid
* @param string $url
* @param string $network
* @param int $causerid
* @param array $languages
* @return boolean "true" is the post is wanted by the system
*/
public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0): bool
public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0, array $languages = []): bool
{
$config = DI::config();
@ -128,7 +131,7 @@ class Relay
}
}
if (!self::isWantedLanguage($body, 0, $authorid)) {
if (!self::isWantedLanguage($body, 0, $authorid, $languages)) {
Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]);
return false;
}
@ -171,37 +174,45 @@ class Relay
* @param string $body
* @param int $uri_id
* @param int $author_id
* @param array $languages
* @return boolean
*/
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0)
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0, array $languages = [])
{
if (empty($body) || Smilies::isEmojiPost($body)) {
$detected = [];
$quality = DI::config()->get('system', 'relay_language_quality');
foreach (Item::getLanguageArray($body, DI::config()->get('system', 'relay_languages'), $uri_id, $author_id) as $language => $reliability) {
if (($reliability >= $quality) && ($quality > 0)) {
$detected[] = $language;
}
}
if (empty($languages) && empty($detected) && (empty($body) || Smilies::isEmojiPost($body))) {
Logger::debug('Empty body or only emojis', ['body' => $body]);
return true;
}
$languages = [];
foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) {
if ($reliability > 0) {
$languages[] = $language;
}
}
if (!empty($languages)) {
if (!empty($languages) || !empty($detected)) {
$cachekey = 'relay:isWantedLanguage';
$user_languages = DI::cache()->get($cachekey);
if (is_null($user_languages)) {
$user_languages = User::getLanguages();
DI::cache()->set($cachekey, $user_languages, Duration::HALF_HOUR);
DI::cache()->set($cachekey, $user_languages);
}
foreach ($languages as $language) {
foreach ($detected as $language) {
if (in_array($language, $user_languages)) {
Logger::debug('Wanted language found', ['language' => $language, 'languages' => $languages, 'userlang' => $user_languages, 'body' => $body]);
Logger::debug('Wanted language found in detected languages', ['language' => $language, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
return true;
}
}
Logger::debug('No wanted language found', ['languages' => $languages, 'userlang' => $user_languages, 'body' => $body]);
foreach ($languages as $language) {
if (in_array($language, $user_languages)) {
Logger::debug('Wanted language found in defined languages', ['language' => $language, 'languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
return true;
}
}
Logger::debug('No wanted language found', ['languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
return false;
} elseif (DI::config()->get('system', 'relay_deny_undetected_language')) {
Logger::info('Undetected language found', ['body' => $body]);

View file

@ -562,6 +562,14 @@ return [
// Deny undetected languages
'relay_deny_undetected_language' => false,
// relay_language_quality (Float)
// Minimum value for the language detection quality for relay posts. The value must be between 0 and 1.
'relay_language_quality' => 0,
// relay_languages (Integer)
// Number of languages that are used per post to check for acceptable posts.
'relay_languages' => 10,
// session_handler (database|cache|native)
// Whether to use Cache to store session data or to use PHP native session storage.
'session_handler' => 'database',