Merge pull request #10196 from annando/link-detection
Improved link detection
This commit is contained in:
commit
4395f73d1e
|
@ -253,10 +253,15 @@ class PageInfo
|
||||||
// Fix for Mastodon where the mentions are in a different format
|
// Fix for Mastodon where the mentions are in a different format
|
||||||
$body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
|
$body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
|
||||||
|
|
||||||
preg_match("~(?<![!#@])\[url]($URLSearchString)\[/url]$~is", $body, $matches);
|
// Remove all hashtags and mentions
|
||||||
|
$body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
|
||||||
|
|
||||||
|
// Search for pure links
|
||||||
|
preg_match("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches);
|
||||||
|
|
||||||
if (!$matches) {
|
if (!$matches) {
|
||||||
preg_match("~(?<![!#@])\[url=($URLSearchString)].*\[/url]$~is", $body, $matches);
|
// Search for links with descriptions
|
||||||
|
preg_match("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$matches && $searchNakedUrls) {
|
if (!$matches && $searchNakedUrls) {
|
||||||
|
|
|
@ -971,6 +971,8 @@ class Item
|
||||||
$item['raw-body'] = Post\Media::insertFromBody($item['uri-id'], $item['raw-body']);
|
$item['raw-body'] = Post\Media::insertFromBody($item['uri-id'], $item['raw-body']);
|
||||||
$item['raw-body'] = self::setHashtags($item['raw-body']);
|
$item['raw-body'] = self::setHashtags($item['raw-body']);
|
||||||
|
|
||||||
|
Post\Media::insertFromAttachmentData($item['uri-id'], $item['body']);
|
||||||
|
|
||||||
// Check for hashtags in the body and repair or add hashtag links
|
// Check for hashtags in the body and repair or add hashtag links
|
||||||
$item['body'] = self::setHashtags($item['body']);
|
$item['body'] = self::setHashtags($item['body']);
|
||||||
|
|
||||||
|
@ -2646,7 +2648,7 @@ class Item
|
||||||
}
|
}
|
||||||
|
|
||||||
$body = $item['body'] ?? '';
|
$body = $item['body'] ?? '';
|
||||||
$item['body'] = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", '', $item['body']);
|
$item['body'] = preg_replace("/\s*\[attachment .*?\].*?\[\/attachment\]\s*/ism", "\n", $item['body']);
|
||||||
self::putInCache($item);
|
self::putInCache($item);
|
||||||
$item['body'] = $body;
|
$item['body'] = $body;
|
||||||
$s = $item["rendered-html"];
|
$s = $item["rendered-html"];
|
||||||
|
@ -2722,6 +2724,12 @@ class Item
|
||||||
*/
|
*/
|
||||||
public static function containsLink(string $body, string $url)
|
public static function containsLink(string $body, string $url)
|
||||||
{
|
{
|
||||||
|
// Make sure that for example site parameters aren't used when testing if the link is contained in the body
|
||||||
|
$urlparts = parse_url($url);
|
||||||
|
unset($urlparts['query']);
|
||||||
|
unset($urlparts['fragment']);
|
||||||
|
$url = Network::unparseURL($urlparts);
|
||||||
|
|
||||||
if (strpos($body, $url)) {
|
if (strpos($body, $url)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -286,6 +286,8 @@ class Media
|
||||||
// Simplify image codes
|
// Simplify image codes
|
||||||
$body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
|
$body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
|
||||||
|
|
||||||
|
$unshared_body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
|
||||||
|
|
||||||
$attachments = [];
|
$attachments = [];
|
||||||
if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
|
if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
|
||||||
foreach ($pictures as $picture) {
|
foreach ($pictures as $picture) {
|
||||||
|
@ -346,7 +348,10 @@ class Media
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($attachments as $attachment) {
|
foreach ($attachments as $attachment) {
|
||||||
self::insert($attachment);
|
// Only store attachments that are part of the unshared body
|
||||||
|
if (strpos($unshared_body, $attachment['url']) !== false) {
|
||||||
|
self::insert($attachment);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return trim($body);
|
return trim($body);
|
||||||
|
@ -360,6 +365,9 @@ class Media
|
||||||
*/
|
*/
|
||||||
public static function insertFromAttachmentData(int $uriid, string $body)
|
public static function insertFromAttachmentData(int $uriid, string $body)
|
||||||
{
|
{
|
||||||
|
// Don't look at the shared content
|
||||||
|
$body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
|
||||||
|
|
||||||
$data = BBCode::getAttachmentData($body);
|
$data = BBCode::getAttachmentData($body);
|
||||||
if (empty($data)) {
|
if (empty($data)) {
|
||||||
return;
|
return;
|
||||||
|
@ -548,10 +556,18 @@ class Media
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($media['type'] == self::IMAGE) {
|
if ($media['type'] == self::IMAGE) {
|
||||||
if (!empty($media['description'])) {
|
if (!empty($media['preview'])) {
|
||||||
$body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
|
if (!empty($media['description'])) {
|
||||||
|
$body .= "\n[url=" . $media['url'] . "][img=" . $media['preview'] . ']' . $media['description'] .'[/img][/url]';
|
||||||
|
} else {
|
||||||
|
$body .= "\n[url=" . $media['url'] . "][img]" . $media['preview'] .'[/img][/url]';
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
$body .= "\n[img]" . $media['url'] .'[/img]';
|
if (!empty($media['description'])) {
|
||||||
|
$body .= "\n[img=" . $media['url'] . ']' . $media['description'] .'[/img]';
|
||||||
|
} else {
|
||||||
|
$body .= "\n[img]" . $media['url'] .'[/img]';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} elseif ($media['type'] == self::AUDIO) {
|
} elseif ($media['type'] == self::AUDIO) {
|
||||||
$body .= "\n[audio]" . $media['url'] . "[/audio]\n";
|
$body .= "\n[audio]" . $media['url'] . "[/audio]\n";
|
||||||
|
|
Loading…
Reference in a new issue