Merge pull request #7167 from MrPetovan/bug/7150-unescape-xml-entities

Add Unicode support to autolink regular expression
This commit is contained in:
Michael Vogel 2019-05-20 06:36:59 +02:00 committed by GitHub
commit cbca26d185
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 11 deletions

View file

@ -7,6 +7,7 @@ use Friendica\Content\Text;
use Friendica\Core\L10n;
use Friendica\Core\Renderer;
use Friendica\Model\Item;
use Friendica\Util\XML;
/**
* Translates input text into different formats (HTML, BBCode, Markdown)
@ -98,10 +99,10 @@ class Babel extends BaseModule
$markdown = trim($_REQUEST['text']);
$results[] = [
'title' => L10n::t('Source input (Diaspora format)'),
'content' => '<pre>' . $markdown . '</pre>'
'content' => '<pre>' . htmlspecialchars($markdown) . '</pre>'
];
$html = Text\Markdown::convert($markdown);
$html = Text\Markdown::convert(html_entity_decode($markdown,ENT_COMPAT, 'UTF-8'));
$results[] = [
'title' => L10n::t('Markdown::convert (raw HTML)'),
'content' => visible_whitespace(htmlspecialchars($html))
@ -112,7 +113,7 @@ class Babel extends BaseModule
'content' => $html
];
$bbcode = Text\Markdown::toBBCode($markdown);
$bbcode = Text\Markdown::toBBCode(XML::unescape($markdown));
$results[] = [
'title' => L10n::t('Markdown::toBBCode'),
'content' => '<pre>' . $bbcode . '</pre>'

View file

@ -355,25 +355,25 @@ class Strings
*/
public static function autoLinkRegEx()
{
return '@(?xi)
return '@
(?<![=\'\]"/]) # Not preceded by [, =, \', ], ", /
\b
( # Capture 1: entire matched URL
https?:// # http or https protocol
(?:
[^/\s`!()\[\]{};:\'",<>?«»“”‘’.] # Domain can\'t start with a .
[^/\s`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a .
[^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’.] # Domain can\'t start with a .
[^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a .
\.
[^/\s`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
[^/\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
)
(?: # One or more:
[^\s()<>]+ # Run of non-space, non-()<>
[^\s\xA0()<>]+ # Run of non-space, non-()<>
| # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
\(([^\s\xA0()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or
[^\s`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
[^\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
)*
)@';
)@xiu';
}
/**

View file

@ -109,6 +109,10 @@ class BBCodeTest extends MockedTest
'data' => "http://example.com<ul>",
'assertHTML' => false
],
'bug-7150' => [
'data' => html_entity_decode('http://example.com&nbsp;', ENT_QUOTES, 'UTF-8'),
'assertHTML' => false
],
];
}