Improve plaintext generation for language detection
This commit is contained in:
parent
cc42c0ba27
commit
16224a7001
|
@ -1220,6 +1220,19 @@ class BBCode
|
||||||
return $return;
|
return $return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function removeLinks(string $bbcode)
|
||||||
|
{
|
||||||
|
$bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", ' ', $bbcode);
|
||||||
|
$bbcode = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", ' $1 ', $bbcode);
|
||||||
|
$bbcode = preg_replace("/\[img\](.*?)\[\/img\]/ism", ' ', $bbcode);
|
||||||
|
|
||||||
|
$bbcode = preg_replace('/([@!#])\[url\=(.*?)\](.*?)\[\/url\]/ism', '', $bbcode);
|
||||||
|
$bbcode = preg_replace("/\[url\](.*?)\[\/url\]/ism", ' ', $bbcode);
|
||||||
|
$bbcode = preg_replace("/\[url=[^\[\]]*\](.*)\[\/url\]/Usi", ' $1 ', $bbcode);
|
||||||
|
$bbcode = preg_replace("/\[url\](.*?)\[\/url\]/ism", ' ', $bbcode);
|
||||||
|
return $bbcode;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a BBCode message to HTML message
|
* Converts a BBCode message to HTML message
|
||||||
*
|
*
|
||||||
|
|
|
@ -2476,7 +2476,17 @@ class Item
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$naked_body = BBCode::toPlaintext($item['body'], false);
|
// Convert attachments to links
|
||||||
|
$naked_body = BBCode::removeAttachment($item['body']);
|
||||||
|
|
||||||
|
// Remove links and pictures
|
||||||
|
$naked_body = BBCode::removeLinks($naked_body);
|
||||||
|
|
||||||
|
// Convert the title and the body to plain text
|
||||||
|
$naked_body = trim($item['title'] . "\n" . BBCode::toPlaintext($naked_body));
|
||||||
|
|
||||||
|
// Remove possibly remaining links
|
||||||
|
$naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body);
|
||||||
|
|
||||||
$ld = new Language();
|
$ld = new Language();
|
||||||
$languages = $ld->detect($naked_body)->limit(0, 3)->close();
|
$languages = $ld->detect($naked_body)->limit(0, 3)->close();
|
||||||
|
|
Loading…
Reference in a new issue