Compare commits

...

25 Commits

Author SHA1 Message Date
Hypolite Petovan 8d3d0f267b Merge pull request 'Bluesky: Support for transmitted languages' (#1435) from heluecht/friendica-addons:languages into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1435
2023-11-13 01:15:45 +01:00
Michael 66fdd31915 Bluesky: Support for transmitted languages 2023-11-11 05:30:07 +00:00
Hypolite Petovan 607cc9238c Merge pull request 'CLD2: Use ISO-639-1 for the language detection' (#1433) from heluecht/friendica-addons:ISO-639-1 into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1433
2023-11-03 19:07:04 +01:00
Michael 2a782b512e CLD2: Use ISO-639-1 for the language detection 2023-11-02 22:54:19 +00:00
Hypolite Petovan a75c9ba373 Merge pull request 'Bluesky: Fix warnings' (#1432) from heluecht/friendica-addons:warnings into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1432
2023-10-29 16:11:19 +01:00
Michael 77765ff6ed Bluesky: Fix warnings 2023-10-29 16:11:19 +01:00
heluecht 9c53c0c8d1 Merge pull request '[smileybutton] Add explicit conversion from float to int' (#1431) from warnings into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1431
2023-10-29 12:40:31 +01:00
Hypolite Petovan 43c46ae6d9 [smileybutton] Add explicit conversion from float to int
Address part of https://github.com/friendica/friendica/issues/13157#issuecomment-1771572442
2023-10-29 12:40:31 +01:00
Hypolite Petovan c7e06bfa53 Merge pull request 'Langfilter: Use two letter code for the language / Bluesky: Remove callstack' (#1430) from heluecht/friendica-addons:callstack-language into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1430
2023-10-18 22:30:52 +02:00
Michael 6948a15f1c Langfilter: Use two letter code for the language / Bluesky: Remove callstack 2023-10-18 22:30:52 +02:00
Hypolite Petovan 74c56c32b0 Merge pull request 'Upgrade PHP version in CI' (#1429) from nupplaPhil/friendica-addons:feat/phpunit_upgrade into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1429
2023-10-13 15:21:46 +02:00
Philipp Holzer 9bdaa8092e
Upgrade phpunit version in PHP-CI 2023-10-12 21:33:03 +02:00
Hypolite Petovan b11538d195 Merge pull request 'CLD: Keep the original detected language array' (#1428) from heluecht/friendica-addons:cld into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1428
2023-10-12 13:36:58 +02:00
Michael 73c6a0ff0c CLD: Keep the original detected language array 2023-10-11 18:57:04 +00:00
Hypolite Petovan fbafa80815 Merge pull request 'CLD: New plugin for language detection via CLD2' (#1425) from heluecht/friendica-addons:cld2 into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1425
2023-10-07 07:07:00 +02:00
Michael 92251f4a6c Updated CLD installation description 2023-10-07 07:07:00 +02:00
Michael 18266ea6ef Changed hook parameter / more languages added 2023-10-07 07:07:00 +02:00
Michael 80ce855189 Renamed hook 2023-10-07 07:07:00 +02:00
Michael 0eda161e04 Cleaned up code 2023-10-07 07:07:00 +02:00
Michael 981e6821d0 CLD: New plugin for language detection via CLD2 2023-10-07 07:07:00 +02:00
Philipp Holzer a5ed02ed23 Merge pull request '[CI/CD] Kick CI again' (#1427) from nupplaPhil/friendica-addons:2023.09-rc into 2023.09-rc
Reviewed-on: friendica/friendica-addons#1427
2023-10-05 22:10:33 +02:00
Philipp Holzer 7a8f8fcbd2
[CI/CD] Kick CI again 2023-10-05 21:58:35 +02:00
Philipp Holzer 30b9f73f5e
[CI/CD] Kick CI again 2023-10-05 21:57:17 +02:00
Hypolite Petovan be8d8b9c10 Merge pull request 'Bluesky: Fix some issues when fetching posts' (#1424) from heluecht/friendica-addons:bluesky-fixes into develop
Reviewed-on: friendica/friendica-addons#1424
2023-10-03 03:54:41 +02:00
Michael 16d99dbdfc Bluesky: Fix some issues when fetching posts 2023-10-01 04:37:11 +00:00
6 changed files with 202 additions and 22 deletions

View File

@ -5,11 +5,11 @@ matrix:
- PHP_MAJOR_VERSION: 7.4
PHP_VERSION: 7.4.33
- PHP_MAJOR_VERSION: 8.0
PHP_VERSION: 8.0.29
PHP_VERSION: 8.0.30
- PHP_MAJOR_VERSION: 8.1
PHP_VERSION: 8.1.21
PHP_VERSION: 8.1.23
- PHP_MAJOR_VERSION: 8.2
PHP_VERSION: 8.2.8
PHP_VERSION: 8.2.11
# This forces PHP Unit executions at the "opensocial" labeled location (because of much more power...)
labels:

View File

@ -32,7 +32,6 @@ use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\Renderer;
use Friendica\Core\System;
use Friendica\Core\Worker;
use Friendica\Database\DBA;
use Friendica\DI;
@ -611,6 +610,13 @@ function bluesky_create_post(array $item, stdClass $root = null, stdClass $paren
return;
}
// Try to fetch the language from the post itself
if (!empty($item['language'])) {
$language = array_key_first(json_decode($item['language'], true));
} else {
$language = '';
}
$did = DI::pConfig()->get($uid, 'bluesky', 'did');
$urls = bluesky_get_urls(Post\Media::removeFromBody($item['body']));
$item['body'] = $urls['body'];
@ -622,10 +628,14 @@ function bluesky_create_post(array $item, stdClass $root = null, stdClass $paren
$record = [
'text' => $facets['body'],
'$type' => 'app.bsky.feed.post',
'createdAt' => DateTimeFormat::utcNow(DateTimeFormat::ATOM),
'$type' => 'app.bsky.feed.post'
];
if (!empty($language)) {
$record['langs'] = [$language];
}
if (!empty($facets['facets'])) {
$record['facets'] = $facets['facets'];
}
@ -983,15 +993,22 @@ function bluesky_fetch_feed(int $uid, string $feed)
}
foreach (array_reverse($data->feed) as $entry) {
if (!Relay::isWantedLanguage($entry->post->record->text)) {
$contact = bluesky_get_contact($entry->post->author, 0, $uid);
$languages = $entry->post->record->langs ?? [];
if (!Relay::isWantedLanguage($entry->post->record->text, 0, $contact['id'] ?? 0, $languages)) {
Logger::debug('Unwanted language detected', ['text' => $entry->post->record->text]);
continue;
}
$id = bluesky_process_post($entry->post, $uid, Item::PR_TAG, 0);
if (!empty($id)) {
$post = Post::selectFirst(['uri-id'], ['id' => $id]);
$stored = Post\Category::storeFileByURIId($post['uri-id'], $uid, Post\Category::SUBCRIPTION, $feedname, $feedurl);
Logger::debug('Stored tag subscription for user', ['uri-id' => $post['uri-id'], 'uid' => $uid, 'name' => $feedname, 'url' => $feedurl, 'stored' => $stored]);
if (!empty($post['uri-id'])) {
$stored = Post\Category::storeFileByURIId($post['uri-id'], $uid, Post\Category::SUBCRIPTION, $feedname, $feedurl);
Logger::debug('Stored tag subscription for user', ['uri-id' => $post['uri-id'], 'uid' => $uid, 'name' => $feedname, 'url' => $feedurl, 'stored' => $stored]);
} else {
Logger::notice('Post not found', ['id' => $id, 'entry' => $entry]);
}
}
if (!empty($entry->reason)) {
bluesky_process_reason($entry->reason, bluesky_get_uri($entry->post), $uid);
@ -1003,8 +1020,12 @@ function bluesky_process_post(stdClass $post, int $uid, int $post_reason, $level
{
$uri = bluesky_get_uri($post);
if ($id = Post::selectFirst(['id'], ['uri' => $uri, 'uid' => $uid]) || $id = Post::selectFirst(['id'], ['extid' => $uri, 'uid' => $uid])) {
return $id;
if ($id = Post::selectFirst(['id'], ['uri' => $uri, 'uid' => $uid])) {
return $id['id'];
}
if ($id = Post::selectFirst(['id'], ['extid' => $uri, 'uid' => $uid])) {
return $id['id'];
}
Logger::debug('Importing post', ['uid' => $uid, 'indexedAt' => $post->indexedAt, 'uri' => $post->uri, 'cid' => $post->cid, 'root' => $post->record->reply->root ?? '']);
@ -1087,12 +1108,13 @@ function bluesky_get_content(array $item, stdClass $record, string $uri, int $ui
$item['body'] = bluesky_get_text($record);
$item['created'] = DateTimeFormat::utc($record->createdAt, DateTimeFormat::MYSQL);
$item['transmitted-languages'] = $record->langs ?? [];
return $item;
}
function bluesky_get_text(stdClass $record): string
{
$text = $record->text;
$text = $record->text ?? '';
if (empty($record->facets)) {
return $text;
@ -1185,11 +1207,10 @@ function bluesky_add_media(stdClass $embed, array $item, int $fetch_uid, int $le
$shared = bluesky_add_media($single, $shared, $fetch_uid, $level);
}
}
$id = Item::insert($shared);
$shared = Post::selectFirst(['uri-id'], ['id' => $id]);
Item::insert($shared);
}
}
if (!empty($shared)) {
if (!empty($shared['uri-id'])) {
$item['quote-uri-id'] = $shared['uri-id'];
}
break;
@ -1206,12 +1227,10 @@ function bluesky_add_media(stdClass $embed, array $item, int $fetch_uid, int $le
$shared = bluesky_add_media($single, $shared, $fetch_uid, $level);
}
}
$id = Item::insert($shared);
$shared = Post::selectFirst(['uri-id'], ['id' => $id]);
Item::insert($shared);
}
}
if (!empty($shared)) {
if (!empty($shared['uri-id'])) {
$item['quote-uri-id'] = $shared['uri-id'];
}
@ -1230,7 +1249,7 @@ function bluesky_add_media(stdClass $embed, array $item, int $fetch_uid, int $le
function bluesky_get_uri(stdClass $post): string
{
if (empty($post->cid)) {
Logger::info('Invalid URI', ['post' => $post, 'callstack' => System::callstack(10, 0, true)]);
Logger::info('Invalid URI', ['post' => $post]);
return '';
}
return $post->uri . ':' . $post->cid;
@ -1303,7 +1322,7 @@ function bluesky_fetch_missing_post(string $uri, int $uid, int $causer, int $lev
return $fallback;
}
Logger::debug('Reply count', ['replies' => $data->thread->post->replyCount, 'level' => $level, 'uid' => $uid, 'uri' => $uri]);
Logger::debug('Reply count', ['level' => $level, 'uid' => $uid, 'uri' => $uri]);
if ($causer != 0) {
$cdata = Contact::getPublicAndUserContactID($causer, $uid);
@ -1331,7 +1350,12 @@ function bluesky_fetch_post(string $uri, int $uid): string
function bluesky_process_thread(stdClass $thread, int $uid, array $cdata, int $level): string
{
if (empty($thread->post)) {
Logger::info('Invalid post', ['post' => $thread]);
return '';
}
$uri = bluesky_get_uri($thread->post);
$fetched_uri = bluesky_fetch_post($uri, $uid);
if (empty($fetched_uri)) {
Logger::debug('Process missing post', ['uri' => $uri]);

85
cld/README.md Normal file
View File

@ -0,0 +1,85 @@
Compact Language Detector
===
CLD2 is an advanced language dectection library with a high reliability.
This addon depends on the CLD PHP module which is not included in any Linux distribution.
It needs to be built and installed by hand, which is not totally straightforward.
Prerequisite
---
To be able to build the extension, you need the CLD module and the files for the PHP module development.
On Debian you install the packages php-dev, libcld2-dev and libcld2-0.
Make sure to have installed the correct PHP version.
Means: When you have got both PHP 8.0 and 8.2 on your system, you have to install php8.0-dev as well.
Installation
---
The original PHP extension is https://github.com/fntlnz/cld2-php-ext.
However, it doesn't support PHP8.
So https://github.com/hiteule/cld2-php-ext/tree/support-php8 has to be used.
Download the source code:
```
wget https://github.com/hiteule/cld2-php-ext/archive/refs/heads/support-php8.zip
```
Unzip it:
```
unzip support-php8.zip
```
Change into the folder:
```
cd cld2-php-ext-support-php8/
```
Configure for the PHP Api version:
```
phpize
```
(if you have got several PHP versions on your system, execute the command with the version that you run Friendica with, e.g. `phpize8.0`)
Create the Makefile:
```
./configure --with-cld2=/usr/include/cld2
```
Have a look at the line `checking for PHP includes`.
When the output (for example `/usr/include/php/20220829` doesn't match the API version that you got from `phpize`, then you have to change all the version codes in your `Makefile` afterwards)
Create the module:
```
make -j
```
Install it:
```
sudo make install
```
Change to the folder with the available modules. When you use PHP 8.2 on Debian it is:
```
cd /etc/php/8.2/mods-available
```
Create the file `cld2.ini` with this content:
```
; configuration for php cld2 module
; priority=20
extension=cld2.so
```
Enable the module for all versions and all sapi:
```
phpenmod -v ALL -s ALL cld2
```
Then restart the apache or fpm (or whatever you use) to load the changed configuration.
Call `/admin/phpinfo` on your webserver.
You then see the PHP Info.
Search for "cld2".
The module is installed, when you find it here.
**Only proceed when the module is installed**
Now you can enable the addon.

71
cld/cld.php Normal file
View File

@ -0,0 +1,71 @@
<?php
/**
* Name: Compact Language Detector
* Description: Improved language detection
* Version: 0.1
* Author: Michael Vogel <heluecht@pirati.ca>
*/
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\DI;
function cld_install()
{
Hook::register('detect_languages', __FILE__, 'cld_detect_languages');
}
function cld_detect_languages(array &$data)
{
if (!in_array('cld2', get_loaded_extensions())) {
Logger::warning('CLD2 is not installed.');
return;
}
$cld2 = new \CLD2Detector();
$cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding
$cld2->setPlainText(true);
$result = $cld2->detect($data['text']);
if ($data['detected']) {
$original = array_key_first($data['detected']);
} else {
$original = '';
}
$detected = DI::l10n()->toISO6391($result['language_code']);
// languages that aren't supported via the base language detection or tend to false detections
if ((strlen($detected) == 3) || in_array($detected, ['ht', 'kk', 'ku', 'ky', 'lg', 'mg', 'mk', 'mt', 'ny', 'rw', 'st', 'su', 'tg', 'ts', 'xx'])) {
return;
}
if (!$result['is_reliable']) {
Logger::debug('Unreliable detection', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
if (($original == $detected) && ($data['detected'][$original] < $result['language_probability'] / 100)) {
$data['detected'][$original] = $result['language_probability'] / 100;
}
return;
}
$available = array_keys(DI::l10n()->getLanguageCodes());
if (!in_array($detected, $available)) {
Logger::debug('Unsupported language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
return;
}
if ($original != $detected) {
Logger::debug('Detected different language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
}
$length = count($data['detected']);
if ($length > 0) {
unset($data['detected'][$detected]);
$data['detected'] = array_merge([$detected => $result['language_probability'] / 100], array_slice($data['detected'], 0, $length - 1));
} else {
$data['detected'] = [$detected => $result['language_probability'] / 100];
}
}

View File

@ -163,7 +163,7 @@ function langfilter_prepare_body_content_filter(&$hook_data)
return;
}
$lang = $iso639->languageByCode1($iso2);
$lang = $iso639->languageByCode1(substr($iso2, 0, 2));
} else {
$opts = $hook_data['item']['postopts'];
if (!$opts) {

View File

@ -90,7 +90,7 @@ function smileybutton_jot_tool(string &$body)
for ($x = 0; $x < count($params['texts']); $x++) {
$icon = $params['icons'][$x];
$s .= '<td onclick="smileybutton_addsmiley(\'' . $params['texts'][$x] . '\')">' . $icon . '</td>';
if (($x + 1) % (sqrt(count($params['texts'])) + 1) == 0) {
if (($x + 1) % (floor(sqrt(count($params['texts']))) + 1) == 0) {
$s .= '</tr><tr>';
}
}