diff --git a/mod/item.php b/mod/item.php index bf8c5f7217..193fa9ed9f 100644 --- a/mod/item.php +++ b/mod/item.php @@ -348,7 +348,7 @@ function item_post(App $a) { $str_tags = ''; $inform = ''; - $tags = Strings::getTags($body); + $tags = BBCode::getTags($body); // Add a tag if the parent contact is from ActivityPub or OStatus (This will notify them) if ($parent && in_array($thr_parent_contact['network'], [Protocol::OSTATUS, Protocol::ACTIVITYPUB])) { diff --git a/mod/photos.php b/mod/photos.php index b5c648d3bd..8b11e72421 100644 --- a/mod/photos.php +++ b/mod/photos.php @@ -525,7 +525,7 @@ function photos_post(App $a) } $taginfo = []; - $tags = Strings::getTags($rawtags); + $tags = BBCode::getTags($rawtags); if (count($tags)) { foreach ($tags as $tag) { diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 778d95cedb..9c9adec0fa 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -1911,4 +1911,78 @@ class BBCode extends BaseObject return $text; } + + /** + * @brief Pull out all #hashtags and @person tags from $string. + * + * We also get @person@domain.com - which would make + * the regex quite complicated as tags can also + * end a sentence. So we'll run through our results + * and strip the period from any tags which end with one. + * Returns array of tags found, or empty array. + * + * @param string $string Post content + * + * @return array List of tag and person names + */ + public static function getTags($string) + { + $ret = []; + + // Convert hashtag links to hashtags + $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string); + + // ignore anything in a code block + $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string); + + // Force line feeds at bbtags + $string = str_replace(['[', ']'], ["\n[", "]\n"], $string); + + // ignore anything in a bbtag + $string = preg_replace('/\[(.*?)\]/sm', '', $string); + + // Match full names against @tags including the space between first and last + // We will look these up afterward to see if they are full names or not recognisable. + + if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) { + foreach ($matches[1] as $match) { + if (strstr($match, ']')) { + // we might be inside a bbcode color tag - leave it alone + continue; + } + + if (substr($match, -1, 1) === '.') { + $ret[] = substr($match, 0, -1); + } else { + $ret[] = $match; + } + } + } + + // Otherwise pull out single word tags. These can be @nickname, @first_last + // and #hash tags. + + if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) { + foreach ($matches[1] as $match) { + if (strstr($match, ']')) { + // we might be inside a bbcode color tag - leave it alone + continue; + } + if (substr($match, -1, 1) === '.') { + $match = substr($match,0,-1); + } + // ignore strictly numeric tags like #1 + if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) { + continue; + } + // try not to catch url fragments + if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) { + continue; + } + $ret[] = $match; + } + } + + return $ret; + } } diff --git a/src/Model/Item.php b/src/Model/Item.php index a96c77921d..e12f6b6eed 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2403,7 +2403,7 @@ class Item extends BaseObject public static function setHashtags(&$item) { - $tags = Strings::getTags($item["body"]); + $tags = BBCode::getTags($item["body"]); // No hashtags? if (!count($tags)) { diff --git a/src/Util/Strings.php b/src/Util/Strings.php index 30f8b7329a..5cb1fd6b3a 100644 --- a/src/Util/Strings.php +++ b/src/Util/Strings.php @@ -158,7 +158,7 @@ class Strings { if ($network != "") { if ($url != "") { - $network_name = ''.ContactSelector::networkToName($network, $url).""; + $network_name = '' . ContactSelector::networkToName($network, $url) . ""; } else { $network_name = ContactSelector::networkToName($network); } @@ -279,80 +279,6 @@ class Strings return base64_decode(strtr($s, '-_', '+/')); } - /** - * @brief Pull out all #hashtags and @person tags from $string. - * - * We also get @person@domain.com - which would make - * the regex quite complicated as tags can also - * end a sentence. So we'll run through our results - * and strip the period from any tags which end with one. - * Returns array of tags found, or empty array. - * - * @param string $string Post content - * - * @return array List of tag and person names - */ - public static function getTags($string) - { - $ret = []; - - // Convert hashtag links to hashtags - $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string); - - // ignore anything in a code block - $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string); - - // Force line feeds at bbtags - $string = str_replace(['[', ']'], ["\n[", "]\n"], $string); - - // ignore anything in a bbtag - $string = preg_replace('/\[(.*?)\]/sm', '', $string); - - // Match full names against @tags including the space between first and last - // We will look these up afterward to see if they are full names or not recognisable. - - if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) { - foreach ($matches[1] as $match) { - if (strstr($match, ']')) { - // we might be inside a bbcode color tag - leave it alone - continue; - } - - if (substr($match, -1, 1) === '.') { - $ret[] = substr($match, 0, -1); - } else { - $ret[] = $match; - } - } - } - - // Otherwise pull out single word tags. These can be @nickname, @first_last - // and #hash tags. - - if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) { - foreach ($matches[1] as $match) { - if (strstr($match, ']')) { - // we might be inside a bbcode color tag - leave it alone - continue; - } - if (substr($match, -1, 1) === '.') { - $match = substr($match,0,-1); - } - // ignore strictly numeric tags like #1 - if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) { - continue; - } - // try not to catch url fragments - if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) { - continue; - } - $ret[] = $match; - } - } - - return $ret; - } - /** * @brief Normalize url *