Merge pull request #6845 from MrPetovan/bug/6633-add-markdown-autolinker

Add autolinker to Markdown parser
This commit is contained in:
Tobias Diekershoff 2019-03-10 07:02:09 +01:00 committed by GitHub
commit 81e1e859a3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 130 additions and 26 deletions

View file

@ -24,7 +24,8 @@ use Friendica\Util\ParseUrl;
use Friendica\Util\Strings; use Friendica\Util\Strings;
use Friendica\Util\Temporal; use Friendica\Util\Temporal;
require_once 'mod/share.php'; require_once __DIR__ . '/../mod/share.php';
function add_page_info_data(array $data, $no_photos = false) function add_page_info_data(array $data, $no_photos = false)
{ {
Hook::callAll('page_info_data', $data); Hook::callAll('page_info_data', $data);

View file

@ -4,7 +4,7 @@
*/ */
namespace Friendica; namespace Friendica;
require_once 'boot.php'; require_once __DIR__ . '/../boot.php';
use Friendica\Network\HTTPException\InternalServerErrorException; use Friendica\Network\HTTPException\InternalServerErrorException;

View file

@ -1268,24 +1268,7 @@ class BBCode extends BaseObject
// if the HTML is used to generate plain text, then don't do this search, but replace all URL of that kind to text // if the HTML is used to generate plain text, then don't do this search, but replace all URL of that kind to text
if (!$for_plaintext) { if (!$for_plaintext) {
// Autolink feature (thanks to https://daringfireball.net/2010/07/improved_regex_for_matching_urls) $text = preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text);
$autolink_regex = '@(?xi)
(?<![=\'\]"/]) # Not preceded by =, \', ], ", /
\b
( # Capture 1: entire matched URL
https?:// # http or https protocol
(?:
[^/.][^/]+[.][^/]+/? # looks like domain name followed by a slash
)
(?: # One or more:
[^\s()<>]+ # Run of non-space, non-()<>
| # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or
[^\s`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
)*
)@';
$text = preg_replace($autolink_regex, '[url]$1[/url]', $text);
if ($simple_html == 7) { if ($simple_html == 7) {
$text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text); $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
$text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text); $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);

View file

@ -9,7 +9,6 @@ namespace Friendica\Content\Text;
use Friendica\BaseObject; use Friendica\BaseObject;
use Friendica\Core\System; use Friendica\Core\System;
use Friendica\Model\Contact; use Friendica\Model\Contact;
use Michelf\MarkdownExtra;
/** /**
* Friendica-specific usage of Markdown * Friendica-specific usage of Markdown
@ -31,11 +30,18 @@ class Markdown extends BaseObject
public static function convert($text, $hardwrap = true) { public static function convert($text, $hardwrap = true) {
$stamp1 = microtime(true); $stamp1 = microtime(true);
$MarkdownParser = new MarkdownExtra(); $MarkdownParser = new MarkdownParser();
$MarkdownParser->hard_wrap = $hardwrap; $MarkdownParser->code_class_prefix = 'language-';
$MarkdownParser->code_class_prefix = 'language-'; $MarkdownParser->hard_wrap = $hardwrap;
$MarkdownParser->hashtag_protection = true;
$MarkdownParser->url_filter_func = function ($url) {
if (strpos($url, '#') === 0) {
$url = ltrim($_SERVER['REQUEST_URI'], '/') . $url;
}
return $url;
};
$html = $MarkdownParser->transform($text); $html = $MarkdownParser->transform($text);
$html = preg_replace('/<a(.*?)href="#/is', '<a$1href="' . ltrim($_SERVER['REQUEST_URI'], '/') . '#', $html);
self::getApp()->getProfiler()->saveTimestamp($stamp1, "parser", System::callstack()); self::getApp()->getProfiler()->saveTimestamp($stamp1, "parser", System::callstack());

View file

@ -0,0 +1,18 @@
<?php
namespace Friendica\Content\Text;
use Friendica\Util\Strings;
use Michelf\MarkdownExtra;
class MarkdownParser extends MarkdownExtra
{
protected function doAutoLinks($text)
{
$text = parent::doAutoLinks($text);
$text = preg_replace_callback(Strings::autoLinkRegEx(),
array($this, '_doAutoLinks_url_callback'), $text);
return $text;
}
}

View file

@ -17,7 +17,7 @@ class FriendicaSmartyEngine implements ITemplateEngine
public function __construct() public function __construct()
{ {
if (!is_writable('view/smarty3/')) { if (!is_writable(__DIR__ . '/../../view/smarty3/')) {
echo "<b>ERROR:</b> folder <tt>view/smarty3/</tt> must be writable by webserver."; echo "<b>ERROR:</b> folder <tt>view/smarty3/</tt> must be writable by webserver.";
exit(); exit();
} }

View file

@ -346,4 +346,30 @@ class Strings
return $return; return $return;
} }
/**
* Returns the regular expression string to match URLs in a given text
*
* @return string
* @see https://daringfireball.net/2010/07/improved_regex_for_matching_urls
*/
public static function autoLinkRegEx()
{
return '@(?xi)
(?<![=\'\]"/]) # Not preceded by [, =, \', ], ", /
\b
( # Capture 1: entire matched URL
https?:// # http or https protocol
(?:
[^/.][^/]+[.][^/]+/? # looks like domain name followed by a slash
)
(?: # One or more:
[^\s()<>]+ # Run of non-space, non-()<>
| # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or
[^\s`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
)*
)@';
}
} }

View file

@ -0,0 +1,9 @@
<h1>Produção de cebola em sistema orgânico com uso de homeopatia</h1>
<p><img src="https://mapadaagroecologia.org/system/midias/imagems/000/000/097/original/Cebola_em_sistema_org%C3%A2nico.jpg?1549640469" alt="Bulbos de cebola em sistema orgânico na fase de colheita
" title="Bulbos de cebola em sistema orgânico na fase de colheita
" /></p>
<h2><a href="https://mapadaagroecologia.org/locais/epagri-estacao-experimental-de-ituporanga-sc?locale=pt-BR">https://mapadaagroecologia.org/locais/epagri-estacao-experimental-de-ituporanga-sc?locale=pt-BR</a></h2>
<p>#agroecologia #ecologia #orgânico #agroecology #brazil</p>

View file

@ -0,0 +1,9 @@
# Produção de cebola em sistema orgânico com uso de homeopatia
![Bulbos de cebola em sistema orgânico na fase de colheita
](https://mapadaagroecologia.org/system/midias/imagems/000/000/097/original/Cebola_em_sistema_org%C3%A2nico.jpg?1549640469 "Bulbos de cebola em sistema orgânico na fase de colheita
")
## https://mapadaagroecologia.org/locais/epagri-estacao-experimental-de-ituporanga-sc?locale=pt-BR
#agroecologia #ecologia #orgânico #agroecology #brazil

View file

@ -0,0 +1,52 @@
<?php
namespace Friendica\Test\src\Content\Text;
use Friendica\Content\Text\Markdown;
use Friendica\Test\MockedTest;
use Friendica\Test\Util\AppMockTrait;
use Friendica\Test\Util\VFSTrait;
class MarkdownTest extends MockedTest
{
use VFSTrait;
use AppMockTrait;
protected function setUp()
{
parent::setUp();
$this->setUpVfsDir();
$this->mockApp($this->root);
}
public function dataMarkdown()
{
$inputFiles = glob(__DIR__ . '/../../../datasets/content/text/markdown/*.md');
$data = [];
foreach ($inputFiles as $file) {
$data[str_replace('.md', '', $file)] = [
'input' => file_get_contents($file),
'expected' => file_get_contents(str_replace('.md', '.html', $file))
];
}
return $data;
}
/**
* Test convert different input Markdown text into HTML
* @dataProvider dataMarkdown
*
* @param string $input The Markdown text to test
* @param string $expected The expected HTML output
* @throws \Exception
*/
public function testConvert($input, $expected)
{
$output = Markdown::convert($input);
$this->assertEquals($expected, $output);
}
}