Require whitespace around smilies and normalize federating text

This commit is contained in:
gudzpoz 2023-11-15 23:53:38 +08:00
parent 2cb0027f56
commit d45e9d6af2
9 changed files with 458 additions and 57 deletions

View File

@ -21,6 +21,7 @@
namespace Friendica\Content;
use Friendica\Content\Text\BBCode;
use Friendica\Core\Hook;
use Friendica\DI;
use Friendica\Util\Strings;
@ -67,7 +68,7 @@ class Smilies
*/
public static function getList(): array
{
$texts = [
$texts = [
'<3',
'</3',
'<\\3',
@ -153,34 +154,129 @@ class Smilies
}
/**
* Finds all used smilies (like :heart: or :p) in the provided text.
* Normalizes smiley shortcodes into texts with no special symbols.
*
* @param string $text that might contain smilie usages (denoted by a starting colon)
* @param bool $extract_url whether to further extract image urls
* @return array with smilie codes (colon included) as the keys, the smilie images as values
* @return array
* 'texts' => smilie shortcut
* 'icons' => icon url or an empty string
* 'norms' => normalized shortcut
*/
public static function extractUsedSmilies(string $text, bool $extract_url = false): array
public static function getNormalizedList(): array
{
$smilies = self::getList();
$norms = [];
$icons = $smilies['icons'];
foreach ($smilies['texts'] as $i => $shortcode) {
// Extract urls
$icon = $icons[$i];
if (preg_match('/src="(.+?)"/', $icon, $match)) {
$icon = $match[1];
} else {
$icon = '';
}
$icons[$i] = $icon;
// Normalize name
$norm = preg_replace('/[\s\-:#~]/', '', $shortcode);
if (ctype_alnum($norm)) {
$norms[] = $norm;
} elseif (preg_match('#/smiley-(\w+)\.gif#', $icon, $match)) {
$norms[] = $match[1];
} else {
$norms[] = 'smiley' . $i;
}
}
$smilies['norms'] = $norms;
return $smilies;
}
/**
* Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages.
*
* @param string $text that might contain smiley usages
* @return array with smilie codes (colon included) as the keys, their image urls as values;
* the normalized string is put under the '' (empty string) key
*/
public static function extractUsedSmilies(string $text): array
{
$emojis = [];
$smilies = self::getList();
$icons = $smilies['icons'];
foreach ($smilies['texts'] as $i => $name) {
if (strstr($text, $name)) {
$image = $icons[$i];
if ($extract_url) {
if (preg_match('/src="(.+?)"/', $image, $match)) {
$image = $match[1];
} else {
continue;
}
$emojis[''] = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) {
return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) {
if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
return $text;
}
$emojis[$name] = $image;
}
}
$smilies = self::getNormalizedList();
$normalized = array_combine($smilies['texts'], $smilies['norms']);
return self::performForEachWordMatch(
array_combine($smilies['texts'], $smilies['icons']),
$text,
function (string $name, string $image) use($normalized, &$emojis) {
$name = $normalized[$name];
if (preg_match('/src="(.+?)"/', $image, $match)) {
$image = $match[1];
$emojis[$name] = $image;
}
return ':' . $name . ':';
},
);
});
});
return $emojis;
}
/**
* Similar to strtr but matches only whole words and replaces texts with $callback.
*
* @param array $words
* @param string $subject
* @param callable $callback ($offset, $value)
* @return string
*/
private static function performForEachWordMatch(array $words, string $subject, callable $callback): string
{
$offset = 0;
$result = '';
$processed = 0;
// Learned from PHP's strtr implementation
// Should probably improve performance once JIT-compiled
$length_bitset = 0;
$ord_bitset = 0;
foreach ($words as $word => $_) {
$length = strlen($word);
if ($length <= 31) {
$length_bitset |= 1 << $length;
}
$ord = ord($word);
$ord_bitset |= 1 << ($ord & 31);
}
while ($offset < strlen($subject) && preg_match('/\s+?(?=\S|$)/', $subject, $matches, PREG_OFFSET_CAPTURE, $offset)) {
[$whitespaces, $next] = $matches[0];
$word = substr($subject, $offset, $next - $offset);
$shift = strlen($word);
$ord = ord($word);
if (($shift > 31 || ($length_bitset & (1 << $shift)))
&& ($ord_bitset & (1 << ($ord & 31)))
&& array_key_exists($word, $words)) {
$result .= substr($subject, $processed, $offset - $processed);
$result .= call_user_func($callback, $word, $words[$word]);
$processed = $offset + strlen($word);
}
$offset = $next + strlen($whitespaces);
}
$word = substr($subject, $offset);
if (array_key_exists($word, $words)) {
$result .= substr($subject, $processed, $offset - $processed);
$result .= call_user_func($callback, $word, $words[$word]);
} else {
$result .= substr($subject, $processed);
}
return $result;
}
/**
* Copied from http://php.net/manual/en/function.str-replace.php#88569
* Modified for camel caps: renamed stro_replace -> strOrigReplace
@ -198,7 +294,13 @@ class Smilies
*/
private static function strOrigReplace(array $search, array $replace, string $subject): string
{
return strtr($subject, array_combine($search, $replace));
return self::performForEachWordMatch(
array_combine($search, $replace),
$subject,
function (string $_, string $value) {
return $value;
}
);
}
/**
@ -227,6 +329,12 @@ class Smilies
return $s;
}
private static function noSmilies(): bool {
return (intval(DI::config()->get('system', 'no_smilies')) ||
(DI::userSession()->getLocalUserId() &&
intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'system', 'no_smilies'))));
}
/**
* Replaces emoji shortcodes in a string from a structured array of searches and replaces.
*
@ -240,9 +348,7 @@ class Smilies
*/
public static function replaceFromArray(string $text, array $smilies, bool $no_images = false): string
{
if (intval(DI::config()->get('system', 'no_smilies'))
|| (DI::userSession()->getLocalUserId() && intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'system', 'no_smilies')))
) {
if (self::noSmilies()) {
return $text;
}
@ -261,7 +367,7 @@ class Smilies
$smilies = $cleaned;
}
$text = preg_replace_callback('/&lt;(3+)/', [self::class, 'heartReplaceCallback'], $text);
$text = preg_replace_callback('/\B&lt;3+?\b/', [self::class, 'heartReplaceCallback'], $text);
$text = self::strOrigReplace($smilies['texts'], $smilies['icons'], $text);
$text = preg_replace_callback('/<(code)>(.*?)<\/code>/ism', [self::class, 'decode'], $text);
@ -302,16 +408,7 @@ class Smilies
*/
private static function heartReplaceCallback(array $matches): string
{
if (strlen($matches[1]) == 1) {
return $matches[0];
}
$t = '';
for ($cnt = 0; $cnt < strlen($matches[1]); $cnt ++) {
$t .= '❤';
}
return str_replace($matches[0], $t, $matches[0]);
return str_repeat('❤', strlen($matches[0]) - 4);
}
/**

View File

@ -34,28 +34,18 @@ class Emoji extends BaseFactory
/**
* Creates an emoji collection from shortcode => image mappings.
*
* Only emojis with shortcodes of the form of ':shortcode:' are passed in the collection.
*
* @param array $smilies
* @param bool $extract_url
*
* @return Emojis
*/
public function createCollectionFromArray(array $smilies, bool $extract_url = true): Emojis
public function createCollectionFromArray(array $smilies): Emojis
{
$prototype = null;
$emojis = [];
foreach ($smilies as $shortcode => $url) {
if (substr($shortcode, 0, 1) == ':' && substr($shortcode, -1) == ':') {
if ($extract_url) {
if (preg_match('/src="(.+?)"/', $url, $matches)) {
$url = $matches[1];
} else {
continue;
}
}
if ($shortcode !== '' && $url !== '') {
$shortcode = trim($shortcode, ':');
if ($prototype === null) {
@ -71,12 +61,20 @@ class Emoji extends BaseFactory
}
/**
* @param array $smilies
* @param array $smilies as is returned by Smilies::getList()
*
* @return Emojis
*/
public function createCollectionFromSmilies(array $smilies): Emojis
{
return self::createCollectionFromArray(array_combine($smilies['texts'], $smilies['icons']));
$emojis = [];
$icons = $smilies['icons'];
foreach ($smilies['texts'] as $i => $name) {
$url = $icons[$i];
if (preg_match('/src="(.+?)"/', $url, $matches)) {
$emojis[$name] = $matches[1];
}
}
return self::createCollectionFromArray($emojis);
}
}

View File

@ -290,11 +290,18 @@ class Status extends BaseFactory
$emojis = null;
if (DI::baseUrl()->isLocalUrl($item['uri'])) {
$used_smilies = Smilies::extractUsedSmilies($item['body'] ?: $item['raw-body']);
$used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body']);
// $used_smilies contains normalized texts
if ($item['raw-body']) {
$item['raw-body'] = $used_smilies[''];
} elseif ($item['body']) {
$item['body'] = $used_smilies[''];
}
unset($used_smilies['']);
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray($used_smilies)->getArrayCopy(true);
} else {
if (preg_match_all("(\[emoji=(.*?)](.*?)\[/emoji])ism", $item['body'] ?: $item['raw-body'], $matches)) {
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray(array_combine($matches[2], $matches[1]), false)->getArrayCopy(true);
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray(array_combine($matches[2], $matches[1]))->getArrayCopy(true);
}
}

View File

@ -899,7 +899,7 @@ class Transmitter
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
if (empty($tags)) {
Logger::debug('No receivers found', ['uri-id' => $uri_id]);
$post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]);
$post = Post::selectFirst(Item::DELIVER_FIELDLIST, ['uri-id' => $uri_id, 'origin' => true]);
if (!empty($post)) {
ActivityPub\Transmitter::storeReceiversForItem($post);
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
@ -1512,10 +1512,14 @@ class Transmitter
*
* @param array $tags Tag array
* @param string $text Text containing tags like :tag:
* @return string normalized text
*/
private static function addEmojiTags(array &$tags, string $text)
{
foreach (Smilies::extractUsedSmilies($text, true) as $name => $url) {
$emojis = Smilies::extractUsedSmilies($text);
$normalized = $emojis[''];
unset($emojis['']);
foreach ($emojis as $name => $url) {
$tags[] = [
'type' => 'Emoji',
'name' => $name,
@ -1525,6 +1529,7 @@ class Transmitter
],
];
}
return $normalized;
}
/**
@ -1559,8 +1564,6 @@ class Transmitter
}
}
self::addEmojiTags($tags, $item['body']);
$announce = self::getAnnounceArray($item);
// Mention the original author upon commented reshares
if (!empty($announce['comment'])) {
@ -1808,10 +1811,11 @@ class Transmitter
$item = Post\Media::addHTMLAttachmentToItem($item);
$body = $item['body'];
$emojis = [];
if ($type == 'Note') {
$body = $item['raw-body'] ?? self::removePictures($body);
}
$body = self::addEmojiTags($emojis, $body);
/**
* @todo Improve the automated summary
@ -1893,7 +1897,7 @@ class Transmitter
}
$data['attachment'] = self::createAttachmentList($item);
$data['tag'] = self::createTagList($item, $data['quoteUrl'] ?? '');
$data['tag'] = array_merge(self::createTagList($item, $data['quoteUrl'] ?? ''), $emojis);
if (empty($data['location']) && (!empty($item['coord']) || !empty($item['location']))) {
$data['location'] = self::createLocation($item);

View File

@ -112,6 +112,11 @@ return [
'uri' => 'http://localhost/profile/mutualcontact',
'guid' => '46',
],
[
'id' => 100,
'uri' => 'https://friendica.local/posts/100',
'guid' => '100',
],
],
'contact' => [
[
@ -363,6 +368,12 @@ return [
'et sed beatae nihil ullam temporibus corporis ratione blanditiis',
'plink' => 'http://localhost/display/6',
],
[
'uri-id' => 100,
'title' => 'item_title',
'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[',
'plink' => 'https://friendica.local/post/100',
],
],
'post' => [
[
@ -744,6 +755,28 @@ return [
'deleted' => 0,
'wall' => 0,
],
// An emoji post
[
'id' => 14,
'uri-id' => 100,
'visible' => 1,
'contact-id' => 44,
'author-id' => 44,
'owner-id' => 42,
'causer-id' => 44,
'uid' => 0,
'vid' => 8,
'unseen' => 0,
'parent-uri-id' => 7,
'thr-parent-id' => 7,
'private' => Item::PUBLIC,
'global' => true,
'gravity' => Item::GRAVITY_PARENT,
'network' => Protocol::DFRN,
'origin' => 0,
'deleted' => 0,
'wall' => 0,
],
],
'post-thread' => [
[

View File

@ -143,4 +143,107 @@ class SmiliesTest extends FixtureTest
{
$this->assertEquals($expected, Smilies::isEmojiPost($body));
}
public function dataReplace(): array
{
return [
'simple-1' => [
'expected' => 'alt=":-p"',
'body' => ':-p',
],
'simple-1' => [
'expected' => 'alt=":-p"',
'body' => ' :-p ',
],
'word-boundary-1' => [
'expected' => ':-pppp',
'body' => ':-pppp',
],
'word-boundary-2' => [
'expected' => '~friendicaca',
'body' => '~friendicaca',
],
'symbol-boundary-1' => [
'expected' => '(:-p)',
'body' => '(:-p)',
],
'hearts-1' => [
'expected' => '❤ (❤) ❤',
'body' => '&lt;3 (&lt;3) &lt;3',
],
'hearts-8' => [
'expected' => '(❤❤❤❤❤❤❤❤)',
'body' => '(&lt;33333333)',
],
'no-hearts-1' => [
'expected' => '(&lt;30)',
'body' => '(&lt;30)',
],
'no-hearts-2' => [
'expected' => '(3&lt;33)',
'body' => '(3&lt;33)',
],
];
}
/**
* @dataProvider dataReplace
*
* @param string $expected
* @param string $body
*/
public function testReplace(string $expected, string $body)
{
$result = Smilies::replace($body);
$this->assertStringContainsString($expected, $result);
}
public function dataExtractUsedSmilies(): array
{
return [
'single-smiley' => [
'expected' => ['like'],
'body' => ':like',
'normalized' => ':like:',
],
'multiple-smilies' => [
'expected' => ['like', 'dislike'],
'body' => ':like :dislike',
'normalized' => ':like: :dislike:',
],
'nosmile' => [
'expected' => [],
'body' => '[nosmile] :like :like',
'normalized' => '[nosmile] :like :like'
],
'in-code' => [
'expected' => [],
'body' => '[code]:like :like :like[/code]',
'normalized' => '[code]:like :like :like[/code]'
],
'~friendica' => [
'expected' => ['friendica'],
'body' => '~friendica',
'normalized' => ':friendica:'
],
];
}
/**
* @dataProvider dataExtractUsedSmilies
*
* @param array $expected
* @param string $body
* @param stirng $normalized
*/
public function testExtractUsedSmilies(array $expected, string $body, string $normalized)
{
$extracted = Smilies::extractUsedSmilies($body);
$this->assertEquals($normalized, $extracted['']);
foreach ($expected as $shortcode) {
$this->assertArrayHasKey($shortcode, $extracted);
}
$this->assertEquals(count($expected), count($extracted) - 1);
}
}

View File

@ -0,0 +1,45 @@
<?php
/**
* @copyright Copyright (C) 2010-2023, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Test\src\Factory\Api\Mastodon;
use Friendica\Content\Smilies;
use Friendica\DI;
use Friendica\Test\FixtureTest;
class EmojiTest extends FixtureTest
{
protected function setUp(): void
{
parent::setUp();
DI::config()->set('system', 'no_smilies', false);
}
public function testBuiltInCollection()
{
$emoji = DI::mstdnEmoji();
$collection = $emoji->createCollectionFromSmilies(Smilies::getList())->getArrayCopy(true);
foreach ($collection as $item) {
$this->assertTrue(preg_match('(/images/.*)', $item['url']) === 1, $item['url']);
}
}
}

View File

@ -0,0 +1,61 @@
<?php
/**
* @copyright Copyright (C) 2010-2023, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Test\src\Factory\Api\Mastodon;
use Friendica\Model\Post;
use Friendica\DI;
use Friendica\Test\FixtureTest;
class StatusTest extends FixtureTest
{
protected $status;
protected function setUp(): void
{
parent::setUp();
DI::config()->set('system', 'no_smilies', false);
$this->status = DI::mstdnStatus();
}
public function testSimpleStatus()
{
$post = Post::selectFirst([], ['id' => 13]);
$this->assertNotNull($post);
$result = $this->status->createFromUriId($post['uri-id']);
$this->assertNotNull($result);
}
public function testSimpleEmojiStatus()
{
$post = Post::selectFirst([], ['id' => 14]);
$this->assertNotNull($post);
$result = $this->status->createFromUriId($post['uri-id'])->toArray();
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed:', $result['content']);
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
$this->assertEquals(count($emojis), count($result['emojis']));
foreach ($result['emojis'] as $emoji) {
$this->assertTrue(array_key_exists($emoji['shortcode'], $emojis));
$this->assertEquals(0, strpos($emoji['url'], 'http'));
}
}
}

View File

@ -0,0 +1,53 @@
<?php
/**
* @copyright Copyright (C) 2010-2023, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Test\src\Protocol\ActivityPub;
use Friendica\DI;
use Friendica\Model\Post;
use Friendica\Protocol\ActivityPub\Transmitter;
use Friendica\Test\FixtureTest;
class TransmitterTest extends FixtureTest
{
protected function setUp(): void
{
parent::setUp();
DI::config()->set('system', 'no_smilies', false);
}
public function testEmojiPost()
{
$post = Post::selectFirst([], ['id' => 14]);
$this->assertNotNull($post);
$note = Transmitter::createNote($post);
$this->assertNotNull($note);
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed:', $note['content']);
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
$this->assertEquals(count($emojis), count($note['tag']));
foreach ($note['tag'] as $emoji) {
$this->assertTrue(array_key_exists($emoji['name'], $emojis));
$this->assertEquals('Emoji', $emoji['type']);
}
}
}