Browse Source

Merge pull request #3295 from Hypolite/issue/#3274

Friendica code blocks aren't rendered in Diaspora
pull/3305/head
Michael Vogel 5 years ago
committed by GitHub
parent
commit
56c0675fba
  1. 1
      composer.json
  2. 66
      composer.lock
  3. 83
      include/bb2diaspora.php
  4. 360
      include/bbcode.php
  5. 3
      library/html-to-markdown/.gitignore
  6. 6
      library/html-to-markdown/.travis.yml
  7. 598
      library/html-to-markdown/HTML_To_Markdown.php
  8. 138
      library/html-to-markdown/README.md
  9. 4
      library/html-to-markdown/circle.yml
  10. 25
      library/html-to-markdown/composer.json
  11. 17
      vendor/bin/html-to-markdown
  12. 4
      vendor/bin/html-to-markdown.bat
  13. 23
      vendor/composer/autoload_classmap.php
  14. 1
      vendor/composer/autoload_psr4.php
  15. 31
      vendor/composer/autoload_static.php
  16. 66
      vendor/composer/installed.json
  17. 214
      vendor/league/html-to-markdown/CHANGELOG.md
  18. 22
      vendor/league/html-to-markdown/CONDUCT.md
  19. 4
      vendor/league/html-to-markdown/LICENSE
  20. 196
      vendor/league/html-to-markdown/README.md
  21. 108
      vendor/league/html-to-markdown/bin/html-to-markdown
  22. 48
      vendor/league/html-to-markdown/composer.json
  23. 60
      vendor/league/html-to-markdown/src/Configuration.php
  24. 11
      vendor/league/html-to-markdown/src/ConfigurationAwareInterface.php
  25. 44
      vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php
  26. 62
      vendor/league/html-to-markdown/src/Converter/CodeConverter.php
  27. 26
      vendor/league/html-to-markdown/src/Converter/CommentConverter.php
  28. 20
      vendor/league/html-to-markdown/src/Converter/ConverterInterface.php
  29. 50
      vendor/league/html-to-markdown/src/Converter/DefaultConverter.php
  30. 45
      vendor/league/html-to-markdown/src/Converter/DivConverter.php
  31. 57
      vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php
  32. 41
      vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php
  33. 78
      vendor/league/html-to-markdown/src/Converter/HeaderConverter.php
  34. 26
      vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php
  35. 35
      vendor/league/html-to-markdown/src/Converter/ImageConverter.php
  36. 52
      vendor/league/html-to-markdown/src/Converter/LinkConverter.php
  37. 26
      vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php
  38. 47
      vendor/league/html-to-markdown/src/Converter/ListItemConverter.php
  39. 124
      vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php
  40. 59
      vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php
  41. 46
      vendor/league/html-to-markdown/src/Converter/TextConverter.php
  42. 257
      vendor/league/html-to-markdown/src/Element.php
  43. 80
      vendor/league/html-to-markdown/src/ElementInterface.php
  44. 104
      vendor/league/html-to-markdown/src/Environment.php
  45. 231
      vendor/league/html-to-markdown/src/HtmlConverter.php

1
composer.json

@ -15,6 +15,7 @@
"require": {
"ezyang/htmlpurifier": "~4.7.0",
"mobiledetect/mobiledetectlib": "2.8.*",
"league/html-to-markdown": "~4.4.1",
"pear-pear.php.net/Text_Highlighter": "*"
},
"repositories": [

66
composer.lock

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"content-hash": "4d3a9e742e7ad746fb7206f3b5aff5af",
"content-hash": "802372ddf124ef949e80dd8dc1d38797",
"packages": [
{
"name": "ezyang/htmlpurifier",
@ -50,6 +50,70 @@
],
"time": "2015-08-05T01:03:42+00:00"
},
{
"name": "league/html-to-markdown",
"version": "4.4.1",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/html-to-markdown.git",
"reference": "82ea375b5b2b1da1da222644c0565c695bf88186"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/html-to-markdown/zipball/82ea375b5b2b1da1da222644c0565c695bf88186",
"reference": "82ea375b5b2b1da1da222644c0565c695bf88186",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-xml": "*",
"php": ">=5.3.3"
},
"require-dev": {
"mikehaertl/php-shellcommand": "~1.1.0",
"phpunit/phpunit": "4.*",
"scrutinizer/ocular": "~1.1"
},
"bin": [
"bin/html-to-markdown"
],
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "4.5-dev"
}
},
"autoload": {
"psr-4": {
"League\\HTMLToMarkdown\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Colin O'Dell",
"email": "colinodell@gmail.com",
"homepage": "http://www.colinodell.com",
"role": "Lead Developer"
},
{
"name": "Nick Cernis",
"email": "nick@cern.is",
"homepage": "http://modernnerd.net",
"role": "Original Author"
}
],
"description": "An HTML-to-markdown conversion helper for PHP",
"homepage": "https://github.com/thephpleague/html-to-markdown",
"keywords": [
"html",
"markdown"
],
"time": "2017-03-16T00:45:59+00:00"
},
{
"name": "mobiledetect/mobiledetectlib",
"version": "2.8.25",

83
include/bb2diaspora.php

@ -1,11 +1,12 @@
<?php
require_once("include/oembed.php");
require_once("include/event.php");
require_once("library/markdown.php");
require_once("include/html2bbcode.php");
require_once("include/bbcode.php");
require_once("library/html-to-markdown/HTML_To_Markdown.php");
use League\HTMLToMarkdown\HtmlConverter;
require_once "include/oembed.php";
require_once "include/event.php";
require_once "library/markdown.php";
require_once "include/html2bbcode.php";
require_once "include/bbcode.php";
/**
* @brief Callback function to replace a Diaspora style mention in a mention for Friendica
@ -104,47 +105,73 @@ function diaspora_mentions($match) {
return $mention;
}
function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
/**
* @brief Converts a BBCode text into Markdown
*
* This function converts a BBCode item body to be sent to Markdown-enabled
* systems like Diaspora and Libertree
*
* @param string $Text
* @param bool $preserve_nl Effects unclear, unused in Friendica
* @param bool $fordiaspora Diaspora requires more changes than Libertree
* @return string
*/
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
$a = get_app();
$OriginalText = $Text;
// Since Diaspora is creating a summary for links, this function removes them before posting
if ($fordiaspora)
if ($fordiaspora) {
$Text = bb_remove_share_information($Text);
}
/**
* Transform #tags, strip off the [url] and replace spaces with underscore
*/
$URLSearchString = "^\[\]";
$Text = preg_replace_callback("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/i", create_function('$match',
'return \'#\'. str_replace(\' \', \'_\', $match[2]);'
), $Text);
$Text = preg_replace_callback("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/i",
function ($matches) {
return '#' . str_replace(' ', '_', $matches[2]);
}
, $Text);
// Converting images with size parameters to simple images. Markdown doesn't know it.
$Text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $Text);
// Extracting multi-line code blocks before the whitespace processing/code highlighter in bbcode()
$codeblocks = [];
$Text = preg_replace_callback('#\[code(?:=([^\]]*))?\](?=\n)(.*?)\[\/code\]#is',
function ($matches) use (&$codeblocks) {
$return = '#codeblock-' . count($codeblocks) . '#';
$prefix = '````' . $matches[1] . PHP_EOL;
$codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
return $return;
}
, $Text);
// Convert it to HTML - don't try oembed
if ($fordiaspora) {
$Text = bbcode($Text, $preserve_nl, false, 3);
// Add all tags that maybe were removed
if (preg_match_all("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",$OriginalText, $tags)) {
if (preg_match_all("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", $OriginalText, $tags)) {
$tagline = "";
foreach($tags[2] as $tag) {
foreach ($tags[2] as $tag) {
$tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8');
if (!strpos(html_entity_decode($Text, ENT_QUOTES, 'UTF-8'), "#".$tag))
$tagline .= "#".$tag." ";
if (!strpos(html_entity_decode($Text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) {
$tagline .= '#' . $tag . ' ';
}
}
$Text = $Text." ".$tagline;
}
} else
} else {
$Text = bbcode($Text, $preserve_nl, false, 4);
}
// mask some special HTML chars from conversation to markdown
$Text = str_replace(array('&lt;','&gt;','&amp;'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
$Text = str_replace(array('&lt;', '&gt;', '&amp;'), array('&_lt_;', '&_gt_;', '&_amp_;'), $Text);
// If a link is followed by a quote then there should be a newline before it
// Maybe we should make this newline at every time before a quote.
@ -153,10 +180,11 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
$stamp1 = microtime(true);
// Now convert HTML to Markdown
$Text = new HTML_To_Markdown($Text);
$converter = new HtmlConverter();
$Text = $converter->convert($Text);
// unmask the special chars back to HTML
$Text = str_replace(array('&_lt_;','&_gt_;','&_amp_;'),array('&lt;','&gt;','&amp;'),$Text);
$Text = str_replace(array('&_lt_;', '&_gt_;', '&_amp_;'), array('&lt;', '&gt;', '&amp;'), $Text);
$a->save_timestamp($stamp1, "parser");
@ -172,13 +200,24 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
$Text = preg_replace_callback("/([@]\[(.*?)\])\(([$URLSearchString]*?)\)/ism", 'diaspora_mentions', $Text);
}
// Restore code blocks
$Text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
function ($matches) use ($codeblocks) {
$return = '';
if (isset($codeblocks[intval($matches[1])])) {
$return = $codeblocks[$matches[1]];
}
return $return;
}
, $Text);
call_hooks('bb2diaspora',$Text);
return $Text;
}
function unescape_underscores_in_links($m) {
$y = str_replace('\\_','_', $m[2]);
$y = str_replace('\\_', '_', $m[2]);
return('[' . $m[1] . '](' . $y . ')');
}

360
include/bbcode.php

@ -1,12 +1,12 @@
<?php
use \Friendica\Core\Config;
require_once("include/oembed.php");
require_once('include/event.php');
require_once('include/map.php');
require_once('mod/proxy.php');
require_once('include/Contact.php');
require_once('include/plaintext.php');
require_once 'include/oembed.php';
require_once 'include/event.php';
require_once 'include/map.php';
require_once 'mod/proxy.php';
require_once 'include/Contact.php';
require_once 'include/plaintext.php';
function bb_PictureCacheExt($matches) {
if (strpos($matches[3], "data:image/") === 0)
@ -159,14 +159,7 @@ function stripcode_br_cb($s) {
return '[code]' . str_replace('<br />', '', $s[1]) . '[/code]';
}
function bb_onelinecode_cb($match) {
if (strpos($match[1],"<br>")===false){
return "<key>".$match[1]."</key>";
}
return "<code>".$match[1]."</code>";
}
function tryoembed($match){
function tryoembed($match) {
$url = $match[1];
// Always embed the SSL version
@ -186,7 +179,6 @@ function tryoembed($match){
$html = oembed_format_object($o);
return $html;
}
// [noparse][i]italic[/i][/noparse] turns into
@ -194,11 +186,11 @@ function tryoembed($match){
// to hide them from parser.
function bb_spacefy($st) {
$whole_match = $st[0];
$captured = $st[1];
$spacefied = preg_replace("/\[(.*?)\]/", "[ $1 ]", $captured);
$new_str = str_replace($captured, $spacefied, $whole_match);
return $new_str;
$whole_match = $st[0];
$captured = $st[1];
$spacefied = preg_replace("/\[(.*?)\]/", "[ $1 ]", $captured);
$new_str = str_replace($captured, $spacefied, $whole_match);
return $new_str;
}
// The previously spacefied [noparse][ i ]italic[ /i ][/noparse],
@ -206,19 +198,19 @@ function bb_spacefy($st) {
// returning [i]italic[/i]
function bb_unspacefy_and_trim($st) {
$whole_match = $st[0];
$captured = $st[1];
$unspacefied = preg_replace("/\[ (.*?)\ ]/", "[$1]", $captured);
return $unspacefied;
$whole_match = $st[0];
$captured = $st[1];
$unspacefied = preg_replace("/\[ (.*?)\ ]/", "[$1]", $captured);
return $unspacefied;
}
function bb_find_open_close($s, $open, $close, $occurance = 1) {
function bb_find_open_close($s, $open, $close, $occurence = 1) {
if($occurance < 1)
$occurance = 1;
if($occurence < 1)
$occurence = 1;
$start_pos = -1;
for($i = 1; $i <= $occurance; $i++) {
for($i = 1; $i <= $occurence; $i++) {
if( $start_pos !== false)
$start_pos = strpos($s, $open, $start_pos + 1);
}
@ -236,13 +228,13 @@ function bb_find_open_close($s, $open, $close, $occurance = 1) {
return $res;
}
function get_bb_tag_pos($s, $name, $occurance = 1) {
function get_bb_tag_pos($s, $name, $occurence = 1) {
if($occurance < 1)
$occurance = 1;
if($occurence < 1)
$occurence = 1;
$start_open = -1;
for($i = 1; $i <= $occurance; $i++) {
for($i = 1; $i <= $occurence; $i++) {
if( $start_open !== false)
$start_open = strpos($s, '[' . $name, $start_open + 1); // allow [name= type tags
}
@ -275,9 +267,9 @@ function bb_tag_preg_replace($pattern, $replace, $name, $s) {
$string = $s;
$occurance = 1;
$pos = get_bb_tag_pos($string, $name, $occurance);
while($pos !== false && $occurance < 1000) {
$occurence = 1;
$pos = get_bb_tag_pos($string, $name, $occurence);
while($pos !== false && $occurence < 1000) {
$start = substr($string, 0, $pos['start']['open']);
$subject = substr($string, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']);
@ -288,8 +280,8 @@ function bb_tag_preg_replace($pattern, $replace, $name, $s) {
$subject = preg_replace($pattern, $replace, $subject);
$string = $start . $subject . $end;
$occurance++;
$pos = get_bb_tag_pos($string, $name, $occurance);
$occurence++;
$pos = get_bb_tag_pos($string, $name, $occurence);
}
return $string;
@ -730,19 +722,41 @@ function bb_highlight($match) {
return $match[0];
}
// BBcode 2 HTML was written by WAY2WEB.net
// extended to work with Mistpark/Friendica - Mike Macgirvin
function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = false, $forplaintext = false) {
/**
* @brief Converts a BBCode message to HTML message
*
* BBcode 2 HTML was written by WAY2WEB.net
* extended to work with Mistpark/Friendica - Mike Macgirvin
*
* Simple HTML values meaning:
* - 0: Friendica display
* - 1: Unused
* - 2: Used for Facebook, Google+, Windows Phone push, Friendica API
* - 3: Used before converting to Markdown in bb2diaspora.php
* - 4: Used for WordPress, Libertree (before Markdown), pump.io and tumblr
* - 5: Unused
* - 6: Used for Appnet
* - 7: Used for dfrn, OStatus
* - 8: Used for WP backlink text setting
*
* @staticvar array $allowed_src_protocols
* @param string $Text
* @param bool $preserve_nl
* @param bool $tryoembed
* @param int $simplehtml
* @param bool $forplaintext
* @return string
*/
function bbcode($Text, $preserve_nl = false, $tryoembed = true, $simplehtml = false, $forplaintext = false) {
$a = get_app();
// Hide all [noparse] contained bbtags by spacefying them
// POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image?
$Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_spacefy',$Text);
$Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_spacefy',$Text);
$Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_spacefy',$Text);
$Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_spacefy', $Text);
$Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_spacefy', $Text);
$Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_spacefy', $Text);
// Remove the abstract element. It is a non visible element.
$Text = remove_abstract($Text);
@ -772,19 +786,20 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
$Text = str_replace(">", "&gt;", $Text);
// remove some newlines before the general conversion
$Text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism","[share$1]$2[/share]",$Text);
$Text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism","[quote$1]$2[/quote]",$Text);
$Text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $Text);
$Text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $Text);
$Text = preg_replace("/\n\[code\]/ism", "[code]", $Text);
$Text = preg_replace("/\[\/code\]\n/ism", "[/code]", $Text);
// when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems
if (!$tryoembed)
$Text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism","\n[share$1$2]$3[/share]",$Text);
if (!$tryoembed) {
$Text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $Text);
}
// Check for [code] text here, before the linefeeds are messed with.
// The highlighter will unescape and re-escape the content.
if (strpos($Text,'[code=') !== false) {
if (strpos($Text, '[code=') !== false) {
$Text = preg_replace_callback("/\[code=(.*?)\](.*?)\[\/code\]/ism", 'bb_highlight', $Text);
}
// Convert new line chars to html <br /> tags
@ -795,7 +810,7 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// We'll emulate it.
$Text = trim($Text);
$Text = str_replace("\r\n","\n", $Text);
$Text = str_replace("\r\n", "\n", $Text);
// removing multiplicated newlines
if (get_config("system", "remove_multiplicated_lines")) {
@ -814,8 +829,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
$Text = str_replace(array("\r","\n"), array('<br />','<br />'), $Text);
if($preserve_nl)
$Text = str_replace(array("\n","\r"), array('',''),$Text);
if ($preserve_nl) {
$Text = str_replace(array("\n", "\r"), array('', ''), $Text);
}
// Set up the parameters for a URL search string
$URLSearchString = "^\[\]";
@ -823,20 +839,21 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
$MAILSearchString = $URLSearchString;
// Remove all hashtag addresses
if ((!$tryoembed OR $simplehtml) AND !in_array($simplehtml, array(3, 7)))
if ((!$tryoembed OR $simplehtml) AND !in_array($simplehtml, array(3, 7))) {
$Text = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $Text);
elseif ($simplehtml == 3)
} elseif ($simplehtml == 3) {
$Text = preg_replace("/([@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
'$1<a href="$2">$3</a>',
$Text);
elseif ($simplehtml == 7)
} elseif ($simplehtml == 7) {
$Text = preg_replace("/([@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
'$1<span class="vcard"><a href="$2" class="url" title="$3"><span class="fn nickname mention">$3</span></a></span>',
$Text);
elseif (!$simplehtml)
} elseif (!$simplehtml) {
$Text = preg_replace("/([@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
'$1<a href="$2" class="userinfo mention" title="$3">$3</a>',
$Text);
}
// Bookmarks in red - will be converted to bookmarks in friendica
$Text = preg_replace("/#\^\[url\]([$URLSearchString]*)\[\/url\]/ism", '[bookmark=$1]$1[/bookmark]', $Text);
@ -845,38 +862,42 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
"[bookmark=$1]$2[/bookmark]", $Text);
if (in_array($simplehtml, array(2, 6, 7, 8, 9))) {
$Text = preg_replace_callback("/([^#@])\[url\=([^\]]*)\](.*?)\[\/url\]/ism","bb_expand_links",$Text);
//$Text = preg_replace("/[^#@]\[url\=([^\]]*)\](.*?)\[\/url\]/ism",' $2 [url]$1[/url]',$Text);
$Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",' $2 [url]$1[/url]',$Text);
$Text = preg_replace_callback("/([^#@])\[url\=([^\]]*)\](.*?)\[\/url\]/ism", "bb_expand_links", $Text);
//$Text = preg_replace("/[^#@]\[url\=([^\]]*)\](.*?)\[\/url\]/ism", ' $2 [url]$1[/url]', $Text);
$Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", ' $2 [url]$1[/url]',$Text);
}
if ($simplehtml == 5)
if ($simplehtml == 5) {
$Text = preg_replace("/[^#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '[url]$1[/url]', $Text);
}
// Perform URL Search
if ($tryoembed)
$Text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'tryoembed',$Text);
if ($tryoembed) {
$Text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", 'tryoembed', $Text);
}
if ($simplehtml == 5)
$Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url]$1[/url]',$Text);
else
$Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url=$1]$2[/url]',$Text);
if ($simplehtml == 5) {
$Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url]$1[/url]', $Text);
} else {
$Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url=$1]$2[/url]', $Text);
}
// Handle Diaspora posts
$Text = preg_replace_callback("&\[url=/posts/([^\[\]]*)\](.*)\[\/url\]&Usi", 'bb_DiasporaLinks', $Text);
// if the HTML is used to generate plain text, then don't do this search, but replace all URL of that kind to text
// if ($simplehtml != 7) {
if (!$forplaintext)
if (!$forplaintext) {
$Text = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1<a href="$2" target="_blank">$2</a>', $Text);
else {
$Text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism"," $1 ",$Text);
} else {
$Text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism", " $1 ", $Text);
$Text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'bb_RemovePictureLinks', $Text);
}
// }
if ($tryoembed)
$Text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism",'tryoembed',$Text);
if ($tryoembed) {
$Text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'tryoembed', $Text);
}
$Text = preg_replace("/([#])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism",
'$1<a href="$2" class="tag" title="$3">$3</a>', $Text);
@ -892,7 +913,7 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// we may need to restrict this further if it picks up too many strays
// link acct:user@host to a webfinger profile redirector
$Text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', '<a href="' . App::get_baseurl() . '/acctlink?addr=$1@$2" target="extlink">acct:$1@$2</a>',$Text);
$Text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', '<a href="' . App::get_baseurl() . '/acctlink?addr=$1@$2" target="extlink">acct:$1@$2</a>', $Text);
// Perform MAIL Search
$Text = preg_replace("/\[mail\]([$MAILSearchString]*)\[\/mail\]/", '<a href="mailto:$1">$1</a>', $Text);
@ -901,61 +922,61 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// leave open the posibility of [map=something]
// this is replaced in prepare_body() which has knowledge of the item location
if (strpos($Text,'[/map]') !== false) {
if (strpos($Text, '[/map]') !== false) {
$Text = preg_replace_callback("/\[map\](.*?)\[\/map\]/ism", 'bb_map_location', $Text);
}
if (strpos($Text,'[map=') !== false) {
if (strpos($Text, '[map=') !== false) {
$Text = preg_replace_callback("/\[map=(.*?)\]/ism", 'bb_map_coords', $Text);
}
if (strpos($Text,'[map]') !== false) {
if (strpos($Text, '[map]') !== false) {
$Text = preg_replace("/\[map\]/", '<div class="map"></div>', $Text);
}
// Check for headers
$Text = preg_replace("(\[h1\](.*?)\[\/h1\])ism",'<h1>$1</h1>',$Text);
$Text = preg_replace("(\[h2\](.*?)\[\/h2\])ism",'<h2>$1</h2>',$Text);
$Text = preg_replace("(\[h3\](.*?)\[\/h3\])ism",'<h3>$1</h3>',$Text);
$Text = preg_replace("(\[h4\](.*?)\[\/h4\])ism",'<h4>$1</h4>',$Text);
$Text = preg_replace("(\[h5\](.*?)\[\/h5\])ism",'<h5>$1</h5>',$Text);
$Text = preg_replace("(\[h6\](.*?)\[\/h6\])ism",'<h6>$1</h6>',$Text);
$Text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '<h1>$1</h1>', $Text);
$Text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '<h2>$1</h2>', $Text);
$Text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '<h3>$1</h3>', $Text);
$Text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '<h4>$1</h4>', $Text);
$Text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '<h5>$1</h5>', $Text);
$Text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '<h6>$1</h6>', $Text);
// Check for paragraph
$Text = preg_replace("(\[p\](.*?)\[\/p\])ism",'<p>$1</p>',$Text);
$Text = preg_replace("(\[p\](.*?)\[\/p\])ism", '<p>$1</p>', $Text);
// Check for bold text
$Text = preg_replace("(\[b\](.*?)\[\/b\])ism",'<strong>$1</strong>',$Text);
$Text = preg_replace("(\[b\](.*?)\[\/b\])ism", '<strong>$1</strong>', $Text);
// Check for Italics text
$Text = preg_replace("(\[i\](.*?)\[\/i\])ism",'<em>$1</em>',$Text);
$Text = preg_replace("(\[i\](.*?)\[\/i\])ism", '<em>$1</em>', $Text);
// Check for Underline text
$Text = preg_replace("(\[u\](.*?)\[\/u\])ism",'<u>$1</u>',$Text);
$Text = preg_replace("(\[u\](.*?)\[\/u\])ism", '<u>$1</u>', $Text);
// Check for strike-through text
$Text = preg_replace("(\[s\](.*?)\[\/s\])ism",'<strike>$1</strike>',$Text);
$Text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<strike>$1</strike>', $Text);
// Check for over-line text
$Text = preg_replace("(\[o\](.*?)\[\/o\])ism",'<span class="overline">$1</span>',$Text);
$Text = preg_replace("(\[o\](.*?)\[\/o\])ism", '<span class="overline">$1</span>', $Text);
// Check for colored text
$Text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism","<span style=\"color: $1;\">$2</span>",$Text);
$Text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "<span style=\"color: $1;\">$2</span>", $Text);
// Check for sized text
// [size=50] --> font-size: 50px (with the unit).
$Text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism","<span style=\"font-size: $1px; line-height: initial;\">$2</span>",$Text);
$Text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism","<span style=\"font-size: $1; line-height: initial;\">$2</span>",$Text);
$Text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "<span style=\"font-size: $1px; line-height: initial;\">$2</span>", $Text);
$Text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "<span style=\"font-size: $1; line-height: initial;\">$2</span>", $Text);
// Check for centered text
$Text = preg_replace("(\[center\](.*?)\[\/center\])ism","<div style=\"text-align:center;\">$1</div>",$Text);
$Text = preg_replace("(\[center\](.*?)\[\/center\])ism", "<div style=\"text-align:center;\">$1</div>", $Text);
// Check for list text
$Text = str_replace("[*]", "<li>", $Text);
// Check for style sheet commands
$Text = preg_replace_callback("(\[style=(.*?)\](.*?)\[\/style\])ism","bb_cleanstyle",$Text);
$Text = preg_replace_callback("(\[style=(.*?)\](.*?)\[\/style\])ism", "bb_cleanstyle", $Text);
// Check for CSS classes
$Text = preg_replace_callback("(\[class=(.*?)\](.*?)\[\/class\])ism","bb_cleanclass",$Text);
$Text = preg_replace_callback("(\[class=(.*?)\](.*?)\[\/class\])ism", "bb_cleanclass", $Text);
// handle nested lists
$endlessloop = 0;
@ -964,42 +985,42 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) ||
((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false)) ||
((strpos($Text, "[/li]") !== false) && (strpos($Text, "[li]") !== false))) && (++$endlessloop < 20)) {
$Text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '<ul class="listbullet" style="list-style-type: circle;">$1</ul>' ,$Text);
$Text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '<ul class="listnone" style="list-style-type: none;">$1</ul>' ,$Text);
$Text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '<ul class="listdecimal" style="list-style-type: decimal;">$1</ul>' ,$Text);
$Text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism",'<ul class="listlowerroman" style="list-style-type: lower-roman;">$2</ul>' ,$Text);
$Text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '<ul class="listupperroman" style="list-style-type: upper-roman;">$2</ul>' ,$Text);
$Text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '<ul class="listloweralpha" style="list-style-type: lower-alpha;">$2</ul>' ,$Text);
$Text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '<ul class="listupperalpha" style="list-style-type: upper-alpha;">$2</ul>' ,$Text);
$Text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '<ul class="listbullet" style="list-style-type: circle;">$1</ul>' ,$Text);
$Text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '<ul class="listdecimal" style="list-style-type: decimal;">$1</ul>' ,$Text);
$Text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '<li>$1</li>' ,$Text);
$Text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '<ul class="listbullet" style="list-style-type: circle;">$1</ul>', $Text);
$Text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '<ul class="listnone" style="list-style-type: none;">$1</ul>', $Text);
$Text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '<ul class="listdecimal" style="list-style-type: decimal;">$1</ul>', $Text);
$Text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '<ul class="listlowerroman" style="list-style-type: lower-roman;">$2</ul>', $Text);
$Text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '<ul class="listupperroman" style="list-style-type: upper-roman;">$2</ul>', $Text);
$Text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '<ul class="listloweralpha" style="list-style-type: lower-alpha;">$2</ul>', $Text);
$Text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '<ul class="listupperalpha" style="list-style-type: upper-alpha;">$2</ul>', $Text);
$Text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '<ul class="listbullet" style="list-style-type: circle;">$1</ul>', $Text);
$Text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '<ul class="listdecimal" style="list-style-type: decimal;">$1</ul>', $Text);
$Text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '<li>$1</li>', $Text);
}
$Text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '<th>$1</th>' ,$Text);
$Text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '<td>$1</td>' ,$Text);
$Text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '<tr>$1</tr>' ,$Text);
$Text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '<table>$1</table>' ,$Text);
$Text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '<th>$1</th>', $Text);
$Text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '<td>$1</td>', $Text);
$Text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '<tr>$1</tr>', $Text);
$Text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '<table>$1</table>', $Text);
$Text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '<table border="1" >$1</table>' ,$Text);
$Text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '<table border="0" >$1</table>' ,$Text);
$Text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '<table border="1" >$1</table>', $Text);
$Text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '<table border="0" >$1</table>', $Text);
$Text = str_replace('[hr]','<hr />', $Text);
$Text = str_replace('[hr]', '<hr />', $Text);
// This is actually executed in prepare_body()
$Text = str_replace('[nosmile]','',$Text);
$Text = str_replace('[nosmile]', '', $Text);
// Check for font change text
$Text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm","<span style=\"font-family: $1;\">$2</span>",$Text);
$Text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "<span style=\"font-family: $1;\">$2</span>", $Text);
// Declare the format for [code] layout
// $Text = preg_replace_callback("/\[code\](.*?)\[\/code\]/ism",'stripcode_br_cb',$Text);
// $Text = preg_replace_callback("/\[code\](.*?)\[\/code\]/ism", 'stripcode_br_cb', $Text);
$CodeLayout = '<code>$1</code>';
// Check for [code] text
$Text = preg_replace("/\[code\](.*?)\[\/code\]/ism","$CodeLayout", $Text);
$Text = preg_replace("/\[code\](.*?)\[\/code\]/ism", "$CodeLayout", $Text);
// Declare the format for [spoiler] layout
$SpoilerLayout = '<blockquote class="spoiler">$1</blockquote>';
@ -1007,8 +1028,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// Check for [spoiler] text
// handle nested quotes
$endlessloop = 0;
while ((strpos($Text, "[/spoiler]") !== false) and (strpos($Text, "[spoiler]") !== false) and (++$endlessloop < 20))
$Text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism","$SpoilerLayout", $Text);
while ((strpos($Text, "[/spoiler]") !== false) and (strpos($Text, "[spoiler]") !== false) and (++$endlessloop < 20)) {
$Text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", "$SpoilerLayout", $Text);
}
// Check for [spoiler=Author] text
@ -1016,10 +1038,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// handle nested quotes
$endlessloop = 0;
while ((strpos($Text, "[/spoiler]")!== false) and (strpos($Text, "[spoiler=") !== false) and (++$endlessloop < 20))
while ((strpos($Text, "[/spoiler]")!== false) and (strpos($Text, "[spoiler=") !== false) and (++$endlessloop < 20)) {
$Text = preg_replace("/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism",
"<br /><strong class=".'"spoiler"'.">" . $t_wrote . "</strong><blockquote class=".'"spoiler"'.">$2</blockquote>",
$Text);
}
// Declare the format for [quote] layout
$QuoteLayout = '<blockquote>$1</blockquote>';
@ -1027,8 +1050,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// Check for [quote] text
// handle nested quotes
$endlessloop = 0;
while ((strpos($Text, "[/quote]") !== false) and (strpos($Text, "[quote]") !== false) and (++$endlessloop < 20))
$Text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism","$QuoteLayout", $Text);
while ((strpos($Text, "[/quote]") !== false) and (strpos($Text, "[quote]") !== false) and (++$endlessloop < 20)) {
$Text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $Text);
}
// Check for [quote=Author] text
@ -1036,10 +1060,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// handle nested quotes
$endlessloop = 0;
while ((strpos($Text, "[/quote]")!== false) and (strpos($Text, "[quote=") !== false) and (++$endlessloop < 20))
while ((strpos($Text, "[/quote]")!== false) and (strpos($Text, "[quote=") !== false) and (++$endlessloop < 20)) {
$Text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism",
"<br /><strong class=".'"author"'.">" . $t_wrote . "</strong><blockquote>$2</blockquote>",
$Text);
}
// [img=widthxheight]image source[/img]
@ -1059,11 +1084,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
$Text = preg_replace_callback("/(.*?)\[share(.*?)\](.*?)\[\/share\]/ism",
function ($match) use ($simplehtml){
return(bb_ShareAttributes($match, $simplehtml));
},$Text);
}, $Text);
$Text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism",'<br/><img src="' .App::get_baseurl() . '/images/lock_icon.gif" alt="' . t('Encrypted content') . '" title="' . t('Encrypted content') . '" /><br />', $Text);
$Text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism",'<br/><img src="' .App::get_baseurl() . '/images/lock_icon.gif" alt="' . t('Encrypted content') . '" title="' . '$1' . ' ' . t('Encrypted content') . '" /><br />', $Text);
//$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism",'<br/><img src="' .App::get_baseurl() . '/images/lock_icon.gif" alt="' . t('Encrypted content') . '" title="' . '$1' . ' ' . t('Encrypted content') . '" /><br />', $Text);
$Text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '<br/><img src="' .App::get_baseurl() . '/images/lock_icon.gif" alt="' . t('Encrypted content') . '" title="' . t('Encrypted content') . '" /><br />', $Text);
$Text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '<br/><img src="' .App::get_baseurl() . '/images/lock_icon.gif" alt="' . t('Encrypted content') . '" title="' . '$1' . ' ' . t('Encrypted content') . '" /><br />', $Text);
//$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '<br/><img src="' .App::get_baseurl() . '/images/lock_icon.gif" alt="' . t('Encrypted content') . '" title="' . '$1' . ' ' . t('Encrypted content') . '" /><br />', $Text);
// Try to Oembed
@ -1083,41 +1108,44 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// html5 video and audio
if ($tryoembed)
if ($tryoembed) {
$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<iframe src="$1" width="' . $a->videowidth . '" height="' . $a->videoheight . '"><a href="$1">$1</a></iframe>', $Text);
else
} else {
$Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '<a href="$1">$1</a>', $Text);
}
// Youtube extensions
if ($tryoembed) {
$Text = preg_replace_callback("/\[youtube\](https?:\/\/www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", 'tryoembed', $Text);
$Text = preg_replace_callback("/\[youtube\](www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", 'tryoembed', $Text);
$Text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism",'tryoembed',$Text);
$Text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism", 'tryoembed', $Text);
}
$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
$Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text);
$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $Text);
$Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $Text);
$Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $Text);
if ($tryoembed)
if ($tryoembed) {
$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '<iframe width="' . $a->videowidth . '" height="' . $a->videoheight . '" src="https://www.youtube.com/embed/$1" frameborder="0" ></iframe>', $Text);
else
} else {
$Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism",
'<a href="https://www.youtube.com/watch?v=$1" target="_blank">https://www.youtube.com/watch?v=$1</a>', $Text);
}
if ($tryoembed) {
$Text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism",'tryoembed',$Text);
$Text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism",'tryoembed',$Text);
$Text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism", 'tryoembed', $Text);
$Text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism", 'tryoembed', $Text);
}
$Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
$Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text);
$Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $Text);
$Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $Text);
if ($tryoembed)
if ($tryoembed) {
$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '<iframe width="' . $a->videowidth . '" height="' . $a->videoheight . '" src="https://player.vimeo.com/video/$1" frameborder="0" ></iframe>', $Text);
else
} else {
$Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism",
'<a href="https://vimeo.com/$1" target="_blank">https://vimeo.com/$1</a>', $Text);
}
// $Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '<object width="425" height="350" type="application/x-shockwave-flash" data="http://www.youtube.com/v/$1" ><param name="movie" value="http://www.youtube.com/v/$1"></param><!--[if IE]><embed src="http://www.youtube.com/v/$1" type="application/x-shockwave-flash" width="425" height="350" /><![endif]--></object>', $Text);
@ -1132,33 +1160,42 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
// Summary (e.g. title) is required, earlier revisions only required description (in addition to
// start which is always required). Allow desc with a missing summary for compatibility.
if((x($ev,'desc') || x($ev,'summary')) && x($ev,'start')) {
if((x($ev, 'desc') || x($ev, 'summary')) && x($ev, 'start')) {
$sub = format_event_html($ev, $simplehtml);
$Text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism",'',$Text);
$Text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism",'',$Text);
$Text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism",$sub,$Text);
$Text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism",'',$Text);
$Text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism",'',$Text);
$Text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism",'',$Text);
$Text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism",'',$Text);
$Text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $Text);
$Text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $Text);
$Text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $Text);
$Text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $Text);
$Text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $Text);
$Text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism", '', $Text);
$Text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $Text);
}
//replace oneliner <code> with <key>
$Text = preg_replace_callback("|(?!<br[^>]*>)<code>([^<]*)</code>(?!<br[^>]*>)|ism", 'bb_onelinecode_cb', $Text);
// Replace inline code blocks
$Text = preg_replace_callback("|(?!<br[^>]*>)<code>([^<]*)</code>(?!<br[^>]*>)|ism",
function ($match) use ($simplehtml) {
$return = '<key>' . $match[1] . '</key>';
// Use <code> for Diaspora inline code blocks
if ($simplehtml === 3) {
$return = '<code>' . $match[1] . '</code>';
}
return $return;
}
, $Text);
// Unhide all [noparse] contained bbtags unspacefying them
// and triming the [noparse] tag.
$Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_unspacefy_and_trim',$Text);
$Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_unspacefy_and_trim',$Text);
$Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_unspacefy_and_trim',$Text);
$Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_unspacefy_and_trim', $Text);
$Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_unspacefy_and_trim', $Text);
$Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_unspacefy_and_trim', $Text);
$Text = preg_replace('/\[\&amp\;([#a-z0-9]+)\;\]/','&$1;',$Text);
$Text = preg_replace('/\&\#039\;/','\'',$Text);
$Text = preg_replace('/\&quot\;/','"',$Text);
$Text = preg_replace('/\[\&amp\;([#a-z0-9]+)\;\]/', '&$1;', $Text);
$Text = preg_replace('/\&\#039\;/', '\'', $Text);
$Text = preg_replace('/\&quot\;/', '"', $Text);
// fix any escaped ampersands that may have been converted into links
$Text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism', '<$1$2=$3&$4>', $Text);
@ -1198,19 +1235,19 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal
$Text = $doc->saveHTML();
$Text = str_replace(array("<html><body>", "</body></html>", $doctype, $encoding), array("", "", "", ""), $Text);
$Text = str_replace('<br></li>','</li>', $Text);
$Text = str_replace('<br></li>', '</li>', $Text);
//$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES');
}
// Clean up some useless linebreaks in lists
//$Text = str_replace('<br /><ul','<ul ', $Text);
//$Text = str_replace('</ul><br />','</ul>', $Text);
//$Text = str_replace('</li><br />','</li>', $Text);
//$Text = str_replace('<br /><li>','<li>', $Text);
// $Text = str_replace('<br /><ul','<ul ', $Text);
//$Text = str_replace('<br /><ul', '<ul ', $Text);
//$Text = str_replace('</ul><br />', '</ul>', $Text);
//$Text = str_replace('</li><br />', '</li>', $Text);
//$Text = str_replace('<br /><li>', '<li>', $Text);
//$Text = str_replace('<br /><ul', '<ul ', $Text);
call_hooks('bbcode',$Text);
call_hooks('bbcode', $Text);
return trim($Text);
}
@ -1253,4 +1290,3 @@ function fetch_abstract($text, $addon = "") {
return $abstract;
}
?>

3
library/html-to-markdown/.gitignore

@ -1,3 +0,0 @@
~*
vendor
composer.lock

6
library/html-to-markdown/.travis.yml

@ -1,6 +0,0 @@
language: php
php:
- "5.5"
- "5.4"
- "5.3"
script: phpunit --no-configuration HTML_To_MarkdownTest ./tests/HTML_To_MarkdownTest.php

598
library/html-to-markdown/HTML_To_Markdown.php

@ -1,598 +0,0 @@
<?php
/**
* Class HTML_To_Markdown
*
* A helper class to convert HTML to Markdown.
*
* @version 2.2.1
* @author Nick Cernis <nick@cern.is>
* @link https://github.com/nickcernis/html2markdown/ Latest version on GitHub.
* @link http://twitter.com/nickcernis Nick on twitter.
* @license http://www.opensource.org/licenses/mit-license.php MIT
*/
class HTML_To_Markdown
{
/**
* @var DOMDocument The root of the document tree that holds our HTML.
*/
private $document;
/**
* @var string|boolean The Markdown version of the original HTML, or false if conversion failed
*/
private $output;
/**
* @var array Class-wide options users can override.
*/
private $options = array(
'header_style' => 'setext', // Set to "atx" to output H1 and H2 headers as # Header1 and ## Header2
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
'bold_style' => '**', // Set to '__' if you prefer the underlined style
'italic_style' => '*', // Set to '_' if you prefer the underlined style
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: "meta style script"
);
/**
* Constructor
*
* Set up a new DOMDocument from the supplied HTML, convert it to Markdown, and store it in $this->$output.
*
* @param string $html The HTML to convert to Markdown.
* @param array $overrides [optional] List of style and error display overrides.
*/
public function __construct($html = null, $overrides = null)
{
if ($overrides)
$this->options = array_merge($this->options, $overrides);
if ($html)
$this->convert($html);
}
/**
* Setter for conversion options
*
* @param $name
* @param $value
*/
public function set_option($name, $value)
{
$this->options[$name] = $value;
}
/**
* Convert
*
* Loads HTML and passes to get_markdown()
*
* @param $html
* @return string The Markdown version of the html
*/
public function convert($html)
{
$html = preg_replace('~>\s+<~', '><', $html); // Strip white space between tags to prevent creation of empty #text nodes
$this->document = new DOMDocument();
if ($this->options['suppress_errors'])
libxml_use_internal_errors(true); // Suppress conversion errors (from http://bit.ly/pCCRSX )
$this->document->loadHTML('<?xml encoding="UTF-8">' . $html); // Hack to load utf-8 HTML (from http://bit.ly/pVDyCt )
$this->document->encoding = 'UTF-8';
if ($this->options['suppress_errors'])
libxml_clear_errors();
return $this->get_markdown($html);
}
/**
* Is Child Of?
*
* Is the node a child of the given parent tag?
*
* @param $parent_name string|array The name of the parent node(s) to search for e.g. 'code' or array('pre', 'code')
* @param $node
* @return bool
*/
private static function is_child_of($parent_name, $node)
{
for ($p = $node->parentNode; $p != false; $p = $p->parentNode) {
if (is_null($p))
return false;
if ( is_array($parent_name) && in_array($p->nodeName, $parent_name) )
return true;
if ($p->nodeName == $parent_name)
return true;
}
return false;
}
/**
* Convert Children
*
* Recursive function to drill into the DOM and convert each node into Markdown from the inside out.
*
* Finds children of each node and convert those to #text nodes containing their Markdown equivalent,
* starting with the innermost element and working up to the outermost element.
*
* @param $node
*/
private function convert_children($node)
{
// Don't convert HTML code inside <code> and <pre> blocks to Markdown - that should stay as HTML
if (self::is_child_of(array('pre', 'code'), $node))
return;
// If the node has children, convert those to Markdown first
if ($node->hasChildNodes()) {
$length = $node->childNodes->length;
for ($i = 0; $i < $length; $i++) {
$child = $node->childNodes->item($i);
$this->convert_children($child);
}
}
// Now that child nodes have been converted, convert the original node
$markdown = $this->convert_to_markdown($node);
// Create a DOM text node containing the Markdown equivalent of the original node
$markdown_node = $this->document->createTextNode($markdown);
// Replace the old $node e.g. "<h3>Title</h3>" with the new $markdown_node e.g. "### Title"
$node->parentNode->replaceChild($markdown_node, $node);
}
/**
* Get Markdown
*
* Sends the body node to convert_children() to change inner nodes to Markdown #text nodes, then saves and
* returns the resulting converted document as a string in Markdown format.
*
* @return string|boolean The converted HTML as Markdown, or false if conversion failed
*/
private function get_markdown()
{
// Work on the entire DOM tree (including head and body)
$input = $this->document->getElementsByTagName("html")->item(0);
if (!$input)
return false;
// Convert all children of this root element. The DOMDocument stored in $this->doc will
// then consist of #text nodes, each containing a Markdown version of the original node
// that it replaced.
$this->convert_children($input);
// Sanitize and return the body contents as a string.
$markdown = $this->document->saveHTML(); // stores the DOMDocument as a string
$markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8');
$markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8'); // Double decode to cover cases like &amp;nbsp; http://www.php.net/manual/en/function.htmlentities.php#99984
$markdown = preg_replace("/<!DOCTYPE [^>]+>/", "", $markdown); // Strip doctype declaration
$unwanted = array('<html>', '</html>', '<body>', '</body>', '<head>', '</head>', '<?xml encoding="UTF-8">', '&#xD;');
$markdown = str_replace($unwanted, '', $markdown); // Strip unwanted tags
$markdown = trim($markdown, "\n\r\0\x0B");
$this->output = $markdown;
return $markdown;
}
/**
* Convert to Markdown
*
* Converts an individual node into a #text node containing a string of its Markdown equivalent.
*
* Example: An <h3> node with text content of "Title" becomes a text node with content of "### Title"
*
* @param $node
* @return string The converted HTML as Markdown
*/
private function convert_to_markdown($node)
{
$tag = $node->nodeName; // the type of element, e.g. h1
$value = $node->nodeValue; // the value of that element, e.g. The Title
// Strip nodes named in remove_nodes
$tags_to_remove = explode(' ', $this->options['remove_nodes']);
if ( in_array($tag, $tags_to_remove) )
return false;
switch ($tag) {
case "p":
$markdown = (trim($value)) ? rtrim($value) . PHP_EOL . PHP_EOL : '';
break;
case "pre":
$markdown = PHP_EOL . $this->convert_code($node) . PHP_EOL;
break;
case "h1":
case "h2":
$markdown = $this->convert_header($tag, $node);
break;
case "h3":
$markdown = "### " . $value . PHP_EOL . PHP_EOL;
break;
case "h4":
$markdown = "#### " . $value . PHP_EOL . PHP_EOL;
break;
case "h5":
$markdown = "##### " . $value . PHP_EOL . PHP_EOL;
break;
case "h6":
$markdown = "###### " . $value . PHP_EOL . PHP_EOL;
break;
case "em":
case "i":
case "strong":
case "b":
$markdown = $this->convert_emphasis($tag, $value);
break;
case "hr":
$markdown = "- - - - - -" . PHP_EOL . PHP_EOL;
break;
case "br":
$markdown = " " . PHP_EOL;
break;
case "blockquote":
$markdown = $this->convert_blockquote($node);
break;
case "code":
$markdown = $this->convert_code($node);
break;
case "ol":
case "ul":
$markdown = $value . PHP_EOL;
break;
case "li":
$markdown = $this->convert_list($node);
break;
case "img":
$markdown = $this->convert_image($node);
break;
case "a":
$markdown = $this->convert_anchor($node);
break;
case "#text":
$markdown = preg_replace('~\s+~', ' ', $value);
$markdown = preg_replace('~^#~', '\\\\#', $markdown);
break;
case "#comment":
$markdown = '';
break;
case "div":
$markdown = ($this->options['strip_tags']) ? $value . PHP_EOL . PHP_EOL : html_entity_decode($node->C14N());
break;
default:
// If strip_tags is false (the default), preserve tags that don't have Markdown equivalents,
// such as <span> nodes on their own. C14N() canonicalizes the node to a string.
// See: http://www.php.net/manual/en/domnode.c14n.php
$markdown = ($this->options['strip_tags']) ? $value : html_entity_decode($node->C14N());
}
return $markdown;
}
/**
* Convert Header
*
* Converts h1 and h2 headers to Markdown-style headers in setext style,
* matching the number of underscores with the length of the title.
*
* e.g. Header 1 Header Two
* ======== ----------
*
* Returns atx headers instead if $this->options['header_style'] is "atx"
*
* e.g. # Header 1 ## Header Two
*
* @param string $level The header level, including the "h". e.g. h1
* @param string $node The node to convert.
* @return string The Markdown version of the header.
*/
private function convert_header($level, $node)
{
$content = $node->nodeValue;
if (!$this->is_child_of('blockquote', $node) && $this->options['header_style'] == "setext") {
$length = (function_exists('mb_strlen')) ? mb_strlen($content, 'utf-8') : strlen($content);
$underline = ($level == "h1") ? "=" : "-";
$markdown = $content . PHP_EOL . str_repeat($underline, $length) . PHP_EOL . PHP_EOL; // setext style
} else {
$prefix = ($level == "h1") ? "# " : "## ";
$markdown = $prefix . $content . PHP_EOL . PHP_EOL; // atx style
}
return $markdown;
}
/**
* Converts inline styles
* This function is used to render strong and em tags
*
* eg <strong>bold text</strong> becomes **bold text** or __bold text__
*
* @param string $tag
* @param string $value
* @return string
*/
private function convert_emphasis($tag, $value)
{
if ($tag == 'i' || $tag == 'em') {
$markdown = $this->options['italic_style'] . $value . $this->options['italic_style'];
} else {
$markdown = $this->options['bold_style'] . $value . $this->options['bold_style'];
}
return $markdown;
}
/**
* Convert Image
*
* Converts <img /> tags to Markdown.
*
* e.g. <img src="/path/img.jpg" alt="alt text" title="Title" />
* becomes ![alt text](/path/img.jpg "Title")
*
* @param $node
* @return string
*/
private function convert_image($node)
{
$src = $node->getAttribute('src');
$alt = $node->getAttribute('alt');
$title = $node->getAttribute('title');
if ($title != "") {
$markdown = '![' . $alt . '](' . $src . ' "' . $title . '")'; // No newlines added. <img> should be in a block-level element.
} else {
$markdown = '![' . $alt . '](' . $src . ')';
}
return $markdown;
}
/**
* Convert Anchor
*
* Converts <a> tags to Markdown.
*
* e.g. <a href="http://modernnerd.net" title="Title">Modern Nerd</a>
* becomes [Modern Nerd](http://modernnerd.net "Title")
*
* @param $node
* @return string
*/
private function convert_anchor($node)
{
$href = $node->getAttribute('href');
$title = $node->getAttribute('title');
$text = $node->nodeValue;
if ($title != "") {
$markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
} else {
$markdown = '[' . $text . '](' . $href . ')';
}
if (! $href)
$markdown = html_entity_decode($node->C14N());
// Append a space if the node after this one is also an anchor
$next_node_name = $this->get_next_node_name($node);
if ($next_node_name == 'a')
$markdown = $markdown . ' ';
return $markdown;
}
/**
* Convert List
*
* Converts <ul> and <ol> lists to Markdown.
*
* @param $node
* @return string
*/
private function convert_list($node)
{
// If parent is an ol, use numbers, otherwise, use dashes
$list_type = $node->parentNode->nodeName;
$value = $node->nodeValue;
if ($list_type == "ul") {
$markdown = "- " . trim($value) . PHP_EOL;
} else {
$number = $this->get_position($node);
$markdown = $number . ". " . trim($value) . PHP_EOL;
}
return $markdown;
}
/**
* Convert Code
*
* Convert code tags by indenting blocks of code and wrapping single lines in backticks.
*
* @param DOMNode $node
* @return string
*/
private function convert_code($node)
{
// Store the content of the code block in an array, one entry for each line
$markdown = '';
$code_content = html_entity_decode($node->C14N());
$code_content = str_replace(array("<code>", "</code>"), "", $code_content);
$code_content = str_replace(array("<pre>", "</pre>"), "", $code_content);
$lines = preg_split('/\r\n|\r|\n/', $code_content);
$total = count($lines);
// If there's more than one line of code, prepend each line with four spaces and no backticks.
if ($total > 1 || $node->nodeName === 'pre') {
// Remove the first and last line if they're empty
$first_line = trim($lines[0]);
$last_line = trim($lines[$total - 1]);
$first_line = trim($first_line, "&#xD;"); //trim XML style carriage returns too
$last_line = trim($last_line, "&#xD;");
if (empty($first_line))
array_shift($lines);
if (empty($last_line))
array_pop($lines);
$count = 1;
foreach ($lines as $line) {
$line = str_replace('&#xD;', '', $line);
$markdown .= " " . $line;
// Add newlines, except final line of the code
if ($count != $total)
$markdown .= PHP_EOL;
$count++;
}
$markdown .= PHP_EOL;
} else { // There's only one line of code. It's a code span, not a block. Just wrap it with backticks.
$markdown .= "`" . $lines[0] . "`";
}
return $markdown;
}
/**
* Convert blockquote
*
* Prepend blockquotes with > chars.
*
* @param $node
* @return string
*/
private function convert_blockquote($node)
{
// Contents should have already been converted to Markdown by this point,
// so we just need to add ">" symbols to each line.
$markdown = '';
$quote_content = trim($node->nodeValue);
$lines = preg_split('/\r\n|\r|\n/', $quote_content);
$total_lines = count($lines);
foreach ($lines as $i => $line) {
$markdown .= "> " . $line . PHP_EOL;
if ($i + 1 == $total_lines)
$markdown .= PHP_EOL;
}
return $markdown;
}
/**
* Get Position
*
* Returns the numbered position of a node inside its parent
*
* @param $node
* @return int The numbered position of the node, starting at 1.
*/
private function get_position($node)
{
// Get all of the nodes inside the parent
$list_nodes = $node->parentNode->childNodes;
$total_nodes = $list_nodes->length;
$position = 1;
// Loop through all nodes and find the given $node
for ($a = 0; $a < $total_nodes; $a++) {
$current_node = $list_nodes->item($a);
if ($current_node->isSameNode($node))
$position = $a + 1;
}
return $position;
}
/**
* Get Next Node Name
*
* Return the name of the node immediately after the passed one.
*
* @param $node
* @return string|null The node name (e.g. 'h1') or null.
*/
private function get_next_node_name($node)
{
$next_node_name = null;
$current_position = $this->get_position($node);
$next_node = $node->parentNode->childNodes->item($current_position);
if ($next_node)
$next_node_name = $next_node->nodeName;
return $next_node_name;
}
/**
* To String
*
* Magic method to return Markdown output when HTML_To_Markdown instance is treated as a string.
*
* @return string
*/
public function __toString()
{
return $this->output();
}
/**
* Output
*
* Getter for the converted Markdown contents stored in $this->output
*
* @return string
*/
public function output()
{
if (!$this->output) {
return '';
} else {
return $this->output;
}
}
}

138
library/html-to-markdown/README.md

@ -1,138 +0,0 @@
HTML To Markdown for PHP
========================
A helper class that converts HTML to [Markdown](http://daringfireball.net/projects/markdown/) for your sanity and convenience.
[![Build Status](https://travis-ci.org/nickcernis/html-to-markdown.png?branch=master)](https://travis-ci.org/nickcernis/html-to-markdown)
**Version**: 2.2.1
**Requires**: PHP 5.3+
**Author**: [@nickcernis](http://twitter.com/nickcernis)
**License**: [MIT](http://www.opensource.org/licenses/mit-license.php)
### Why convert HTML to Markdown?
*"What alchemy is this?"* you mutter. *"I can see why you'd convert [Markdown to HTML](http://michelf.com/projects/php-markdown/),"* you continue, already labouring the question somewhat, *"but why go the other way?"*
Typically you would convert HTML to Markdown if:
1. You have an existing HTML document that needs to be edited by people with good taste.
2. You want to store new content in HTML format but edit it as Markdown.
3. You want to convert HTML email to plain text email.
4. You know a guy who's been converting HTML to Markdown for years, and now he can speak Elvish. You'd quite like to be able to speak Elvish.
5. You just really like Markdown.
### How to use it
Either include HTML_To_Markdown.php directly:
require_once( dirname( __FILE__) . '/HTML_To_Markdown.php' );
Or, require the library in your composer.json:
{
"require": {
"nickcernis/html-to-markdown": "dev-master"
}
}
Then `composer install` and add `require 'vendor/autoload.php';` to the top of your script.
Next, create a new HTML_To_Markdown instance, passing in your valid HTML code:
$html = "<h3>Quick, to the Batpoles!</h3>";
$markdown = new HTML_To_Markdown($html);
The `$markdown` object now contains the Markdown version of your HTML. Use it like a string:
echo $markdown; // ==> ### Quick, to the Batpoles!
Or access the Markdown output directly:
$string = $markdown->output();
The included `demo` directory contains an HTML->Markdown conversion form to try out.
### Conversion options
By default, HTML To Markdown preserves HTML tags without Markdown equivalents, like `<span>` and `<div>`.
To strip HTML tags that don't have a Markdown equivalent while preserving the content inside them, set `strip_tags` to true, like this:
$html = '<span>Turnips!</span>';
$markdown = new HTML_To_Markdown($html, array('strip_tags' => true)); // $markdown now contains "Turnips!"
Or more explicitly, like this:
$html = '<span>Turnips!</span>';
$markdown = new HTML_To_Markdown();
$markdown->set_option('strip_tags', true);
$markdown->convert($html); // $markdown now contains "Turnips!"
Note that only the tags themselves are stripped, not the content they hold.
To strip tags and their content, pass a space-separated list of tags in `remove_nodes`, like this:
$html = '<span>Turnips!</span><div>Monkeys!</div>';
$markdown = new HTML_To_Markdown($html, array('remove_nodes' => 'span div')); // $markdown now contains ""
### Style options
Bold and italic tags are converted using the asterisk syntax by default. Change this to the underlined syntax using the `bold_style` and `italic_style` options.
$html = '<em>Italic</em> and a <strong>bold</strong>';
$markdown = new HTML_To_Markdown();
$markdown->set_option('italic_style', '_');
$markdown->set_option('bold_style', '__');
$markdown->convert($html); // $markdown now contains "_Italic_ and a __bold__"
### Limitations
- Markdown Extra, MultiMarkdown and other variants aren't supported – just Markdown.
### Known issues
- Nested lists and lists containing multiple paragraphs aren't converted correctly.
- Lists inside blockquotes aren't converted correctly.
- Any reported [open issues here](https://github.com/nickcernis/html-to-markdown/issues?state=open).
[Report your issue or request a feature here.](https://github.com/nickcernis/html2markdown/issues/new) Issues with patches or failing tests are especially welcome.
### Style notes
- Setext (underlined) headers are the default for H1 and H2. If you prefer the ATX style for H1 and H2 (# Header 1 and ## Header 2), set `header_style` to 'atx' in the options array when you instantiate the object:
`$markdown = new HTML_To_Markdown( $html, array('header_style'=>'atx') );`
Headers of H3 priority and lower always use atx style.
- Links and images are referenced inline. Footnote references (where image src and anchor href attributes are listed in the footnotes) are not used.
- Blockquotes aren't line wrapped – it makes the converted Markdown easier to edit.
### Dependencies
HTML To Markdown requires PHP's [xml](http://www.php.net/manual/en/xml.installation.php), [lib-xml](http://www.php.net/manual/en/libxml.installation.php), and [dom](http://www.php.net/manual/en/dom.installation.php) extensions, all of which are enabled by default on most distributions.
Errors such as "Fatal error: Class 'DOMDocument' not found" on distributions such as CentOS that disable PHP's xml extension can be resolved by installing php-xml.