Add Unicode support to autolink regular expression

- Explicitly exclude non-breaking spaces from URLs as \s doesn't include them
This commit is contained in:
Hypolite Petovan 2019-05-19 08:45:29 -04:00
parent 6f0c6e8926
commit 3395f56036

View file

@ -355,25 +355,25 @@ class Strings
*/ */
public static function autoLinkRegEx() public static function autoLinkRegEx()
{ {
return '@(?xi) return '@
(?<![=\'\]"/]) # Not preceded by [, =, \', ], ", / (?<![=\'\]"/]) # Not preceded by [, =, \', ], ", /
\b \b
( # Capture 1: entire matched URL ( # Capture 1: entire matched URL
https?:// # http or https protocol https?:// # http or https protocol
(?: (?:
[^/\s`!()\[\]{};:\'",<>?«»“”‘’.] # Domain can\'t start with a . [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’.] # Domain can\'t start with a .
[^/\s`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a . [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a .
\. \.
[^/\s`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash [^/\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
) )
(?: # One or more: (?: # One or more:
[^\s()<>]+ # Run of non-space, non-()<> [^\s\xA0()<>]+ # Run of non-space, non-()<>
| # or | # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels \(([^\s\xA0()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or | # or
[^\s`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars [^\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
)* )*
)@'; )@xiu';
} }
/** /**