diff --git a/composer.json b/composer.json index f2aeaa8eb..8b4a630d3 100644 --- a/composer.json +++ b/composer.json @@ -15,6 +15,7 @@ "require": { "ezyang/htmlpurifier": "~4.7.0", "mobiledetect/mobiledetectlib": "2.8.*", + "league/html-to-markdown": "~4.4.1", "pear-pear.php.net/Text_Highlighter": "*" }, "repositories": [ diff --git a/composer.lock b/composer.lock index b35d1ca12..b46614f6e 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "4d3a9e742e7ad746fb7206f3b5aff5af", + "content-hash": "802372ddf124ef949e80dd8dc1d38797", "packages": [ { "name": "ezyang/htmlpurifier", @@ -50,6 +50,70 @@ ], "time": "2015-08-05T01:03:42+00:00" }, + { + "name": "league/html-to-markdown", + "version": "4.4.1", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/html-to-markdown.git", + "reference": "82ea375b5b2b1da1da222644c0565c695bf88186" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/html-to-markdown/zipball/82ea375b5b2b1da1da222644c0565c695bf88186", + "reference": "82ea375b5b2b1da1da222644c0565c695bf88186", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-xml": "*", + "php": ">=5.3.3" + }, + "require-dev": { + "mikehaertl/php-shellcommand": "~1.1.0", + "phpunit/phpunit": "4.*", + "scrutinizer/ocular": "~1.1" + }, + "bin": [ + "bin/html-to-markdown" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.5-dev" + } + }, + "autoload": { + "psr-4": { + "League\\HTMLToMarkdown\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "http://www.colinodell.com", + "role": "Lead Developer" + }, + { + "name": "Nick Cernis", + "email": "nick@cern.is", + "homepage": "http://modernnerd.net", + "role": "Original Author" + } + ], + "description": "An HTML-to-markdown conversion helper for PHP", + "homepage": "https://github.com/thephpleague/html-to-markdown", + "keywords": [ + "html", + "markdown" + ], + "time": "2017-03-16T00:45:59+00:00" + }, { "name": "mobiledetect/mobiledetectlib", "version": "2.8.25", diff --git a/include/bb2diaspora.php b/include/bb2diaspora.php index 03eff5a6b..cef293c76 100644 --- a/include/bb2diaspora.php +++ b/include/bb2diaspora.php @@ -1,11 +1,12 @@ convert($Text); // unmask the special chars back to HTML - $Text = str_replace(array('&_lt_;','&_gt_;','&_amp_;'),array('<','>','&'),$Text); + $Text = str_replace(array('&_lt_;', '&_gt_;', '&_amp_;'), array('<', '>', '&'), $Text); $a->save_timestamp($stamp1, "parser"); @@ -172,13 +200,24 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) { $Text = preg_replace_callback("/([@]\[(.*?)\])\(([$URLSearchString]*?)\)/ism", 'diaspora_mentions', $Text); } + // Restore code blocks + $Text = preg_replace_callback('/#codeblock-([0-9]+)#/iU', + function ($matches) use ($codeblocks) { + $return = ''; + if (isset($codeblocks[intval($matches[1])])) { + $return = $codeblocks[$matches[1]]; + } + return $return; + } + , $Text); + call_hooks('bb2diaspora',$Text); return $Text; } function unescape_underscores_in_links($m) { - $y = str_replace('\\_','_', $m[2]); + $y = str_replace('\\_', '_', $m[2]); return('[' . $m[1] . '](' . $y . ')'); } diff --git a/include/bbcode.php b/include/bbcode.php index 489ef8b2e..fd380edc9 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -1,12 +1,12 @@ ', '', $s[1]) . '[/code]'; } -function bb_onelinecode_cb($match) { - if (strpos($match[1],"
")===false){ - return "".$match[1].""; - } - return "".$match[1].""; -} - -function tryoembed($match){ +function tryoembed($match) { $url = $match[1]; // Always embed the SSL version @@ -186,7 +179,6 @@ function tryoembed($match){ $html = oembed_format_object($o); return $html; - } // [noparse][i]italic[/i][/noparse] turns into @@ -194,11 +186,11 @@ function tryoembed($match){ // to hide them from parser. function bb_spacefy($st) { - $whole_match = $st[0]; - $captured = $st[1]; - $spacefied = preg_replace("/\[(.*?)\]/", "[ $1 ]", $captured); - $new_str = str_replace($captured, $spacefied, $whole_match); - return $new_str; + $whole_match = $st[0]; + $captured = $st[1]; + $spacefied = preg_replace("/\[(.*?)\]/", "[ $1 ]", $captured); + $new_str = str_replace($captured, $spacefied, $whole_match); + return $new_str; } // The previously spacefied [noparse][ i ]italic[ /i ][/noparse], @@ -206,19 +198,19 @@ function bb_spacefy($st) { // returning [i]italic[/i] function bb_unspacefy_and_trim($st) { - $whole_match = $st[0]; - $captured = $st[1]; - $unspacefied = preg_replace("/\[ (.*?)\ ]/", "[$1]", $captured); - return $unspacefied; + $whole_match = $st[0]; + $captured = $st[1]; + $unspacefied = preg_replace("/\[ (.*?)\ ]/", "[$1]", $captured); + return $unspacefied; } -function bb_find_open_close($s, $open, $close, $occurance = 1) { +function bb_find_open_close($s, $open, $close, $occurence = 1) { - if($occurance < 1) - $occurance = 1; + if($occurence < 1) + $occurence = 1; $start_pos = -1; - for($i = 1; $i <= $occurance; $i++) { + for($i = 1; $i <= $occurence; $i++) { if( $start_pos !== false) $start_pos = strpos($s, $open, $start_pos + 1); } @@ -236,13 +228,13 @@ function bb_find_open_close($s, $open, $close, $occurance = 1) { return $res; } -function get_bb_tag_pos($s, $name, $occurance = 1) { +function get_bb_tag_pos($s, $name, $occurence = 1) { - if($occurance < 1) - $occurance = 1; + if($occurence < 1) + $occurence = 1; $start_open = -1; - for($i = 1; $i <= $occurance; $i++) { + for($i = 1; $i <= $occurence; $i++) { if( $start_open !== false) $start_open = strpos($s, '[' . $name, $start_open + 1); // allow [name= type tags } @@ -275,9 +267,9 @@ function bb_tag_preg_replace($pattern, $replace, $name, $s) { $string = $s; - $occurance = 1; - $pos = get_bb_tag_pos($string, $name, $occurance); - while($pos !== false && $occurance < 1000) { + $occurence = 1; + $pos = get_bb_tag_pos($string, $name, $occurence); + while($pos !== false && $occurence < 1000) { $start = substr($string, 0, $pos['start']['open']); $subject = substr($string, $pos['start']['open'], $pos['end']['close'] - $pos['start']['open']); @@ -288,8 +280,8 @@ function bb_tag_preg_replace($pattern, $replace, $name, $s) { $subject = preg_replace($pattern, $replace, $subject); $string = $start . $subject . $end; - $occurance++; - $pos = get_bb_tag_pos($string, $name, $occurance); + $occurence++; + $pos = get_bb_tag_pos($string, $name, $occurence); } return $string; @@ -730,19 +722,41 @@ function bb_highlight($match) { return $match[0]; } - // BBcode 2 HTML was written by WAY2WEB.net - // extended to work with Mistpark/Friendica - Mike Macgirvin - -function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = false, $forplaintext = false) { +/** + * @brief Converts a BBCode message to HTML message + * + * BBcode 2 HTML was written by WAY2WEB.net + * extended to work with Mistpark/Friendica - Mike Macgirvin + * + * Simple HTML values meaning: + * - 0: Friendica display + * - 1: Unused + * - 2: Used for Facebook, Google+, Windows Phone push, Friendica API + * - 3: Used before converting to Markdown in bb2diaspora.php + * - 4: Used for WordPress, Libertree (before Markdown), pump.io and tumblr + * - 5: Unused + * - 6: Used for Appnet + * - 7: Used for dfrn, OStatus + * - 8: Used for WP backlink text setting + * + * @staticvar array $allowed_src_protocols + * @param string $Text + * @param bool $preserve_nl + * @param bool $tryoembed + * @param int $simplehtml + * @param bool $forplaintext + * @return string + */ +function bbcode($Text, $preserve_nl = false, $tryoembed = true, $simplehtml = false, $forplaintext = false) { $a = get_app(); // Hide all [noparse] contained bbtags by spacefying them // POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image? - $Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_spacefy',$Text); - $Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_spacefy',$Text); - $Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_spacefy',$Text); + $Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_spacefy', $Text); + $Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_spacefy', $Text); + $Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_spacefy', $Text); // Remove the abstract element. It is a non visible element. $Text = remove_abstract($Text); @@ -772,19 +786,20 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $Text = str_replace(">", ">", $Text); // remove some newlines before the general conversion - $Text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism","[share$1]$2[/share]",$Text); - $Text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism","[quote$1]$2[/quote]",$Text); + $Text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $Text); + $Text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $Text); $Text = preg_replace("/\n\[code\]/ism", "[code]", $Text); $Text = preg_replace("/\[\/code\]\n/ism", "[/code]", $Text); // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems - if (!$tryoembed) - $Text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism","\n[share$1$2]$3[/share]",$Text); + if (!$tryoembed) { + $Text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $Text); + } // Check for [code] text here, before the linefeeds are messed with. // The highlighter will unescape and re-escape the content. - if (strpos($Text,'[code=') !== false) { + if (strpos($Text, '[code=') !== false) { $Text = preg_replace_callback("/\[code=(.*?)\](.*?)\[\/code\]/ism", 'bb_highlight', $Text); } // Convert new line chars to html
tags @@ -795,7 +810,7 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // We'll emulate it. $Text = trim($Text); - $Text = str_replace("\r\n","\n", $Text); + $Text = str_replace("\r\n", "\n", $Text); // removing multiplicated newlines if (get_config("system", "remove_multiplicated_lines")) { @@ -814,8 +829,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $Text = str_replace(array("\r","\n"), array('
','
'), $Text); - if($preserve_nl) - $Text = str_replace(array("\n","\r"), array('',''),$Text); + if ($preserve_nl) { + $Text = str_replace(array("\n", "\r"), array('', ''), $Text); + } // Set up the parameters for a URL search string $URLSearchString = "^\[\]"; @@ -823,20 +839,21 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $MAILSearchString = $URLSearchString; // Remove all hashtag addresses - if ((!$tryoembed OR $simplehtml) AND !in_array($simplehtml, array(3, 7))) + if ((!$tryoembed OR $simplehtml) AND !in_array($simplehtml, array(3, 7))) { $Text = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $Text); - elseif ($simplehtml == 3) + } elseif ($simplehtml == 3) { $Text = preg_replace("/([@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $Text); - elseif ($simplehtml == 7) + } elseif ($simplehtml == 7) { $Text = preg_replace("/([@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $Text); - elseif (!$simplehtml) + } elseif (!$simplehtml) { $Text = preg_replace("/([@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $Text); + } // Bookmarks in red - will be converted to bookmarks in friendica $Text = preg_replace("/#\^\[url\]([$URLSearchString]*)\[\/url\]/ism", '[bookmark=$1]$1[/bookmark]', $Text); @@ -845,38 +862,42 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal "[bookmark=$1]$2[/bookmark]", $Text); if (in_array($simplehtml, array(2, 6, 7, 8, 9))) { - $Text = preg_replace_callback("/([^#@])\[url\=([^\]]*)\](.*?)\[\/url\]/ism","bb_expand_links",$Text); - //$Text = preg_replace("/[^#@]\[url\=([^\]]*)\](.*?)\[\/url\]/ism",' $2 [url]$1[/url]',$Text); - $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",' $2 [url]$1[/url]',$Text); + $Text = preg_replace_callback("/([^#@])\[url\=([^\]]*)\](.*?)\[\/url\]/ism", "bb_expand_links", $Text); + //$Text = preg_replace("/[^#@]\[url\=([^\]]*)\](.*?)\[\/url\]/ism", ' $2 [url]$1[/url]', $Text); + $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", ' $2 [url]$1[/url]',$Text); } - if ($simplehtml == 5) + if ($simplehtml == 5) { $Text = preg_replace("/[^#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '[url]$1[/url]', $Text); + } // Perform URL Search - if ($tryoembed) - $Text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'tryoembed',$Text); + if ($tryoembed) { + $Text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", 'tryoembed', $Text); + } - if ($simplehtml == 5) - $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url]$1[/url]',$Text); - else - $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism",'[url=$1]$2[/url]',$Text); + if ($simplehtml == 5) { + $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url]$1[/url]', $Text); + } else { + $Text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url=$1]$2[/url]', $Text); + } // Handle Diaspora posts $Text = preg_replace_callback("&\[url=/posts/([^\[\]]*)\](.*)\[\/url\]&Usi", 'bb_DiasporaLinks', $Text); // if the HTML is used to generate plain text, then don't do this search, but replace all URL of that kind to text // if ($simplehtml != 7) { - if (!$forplaintext) + if (!$forplaintext) { $Text = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1$2', $Text); - else { - $Text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism"," $1 ",$Text); + } else { + $Text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism", " $1 ", $Text); $Text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'bb_RemovePictureLinks', $Text); } // } - if ($tryoembed) - $Text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism",'tryoembed',$Text); + if ($tryoembed) { + $Text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'tryoembed', $Text); + } $Text = preg_replace("/([#])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $Text); @@ -892,7 +913,7 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // we may need to restrict this further if it picks up too many strays // link acct:user@host to a webfinger profile redirector - $Text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2',$Text); + $Text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $Text); // Perform MAIL Search $Text = preg_replace("/\[mail\]([$MAILSearchString]*)\[\/mail\]/", '$1', $Text); @@ -901,61 +922,61 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // leave open the posibility of [map=something] // this is replaced in prepare_body() which has knowledge of the item location - if (strpos($Text,'[/map]') !== false) { + if (strpos($Text, '[/map]') !== false) { $Text = preg_replace_callback("/\[map\](.*?)\[\/map\]/ism", 'bb_map_location', $Text); } - if (strpos($Text,'[map=') !== false) { + if (strpos($Text, '[map=') !== false) { $Text = preg_replace_callback("/\[map=(.*?)\]/ism", 'bb_map_coords', $Text); } - if (strpos($Text,'[map]') !== false) { + if (strpos($Text, '[map]') !== false) { $Text = preg_replace("/\[map\]/", '
', $Text); } // Check for headers - $Text = preg_replace("(\[h1\](.*?)\[\/h1\])ism",'

$1

',$Text); - $Text = preg_replace("(\[h2\](.*?)\[\/h2\])ism",'

$1

',$Text); - $Text = preg_replace("(\[h3\](.*?)\[\/h3\])ism",'

$1

',$Text); - $Text = preg_replace("(\[h4\](.*?)\[\/h4\])ism",'

$1

',$Text); - $Text = preg_replace("(\[h5\](.*?)\[\/h5\])ism",'
$1
',$Text); - $Text = preg_replace("(\[h6\](.*?)\[\/h6\])ism",'
$1
',$Text); + $Text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

$1

', $Text); + $Text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

$1

', $Text); + $Text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

$1

', $Text); + $Text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

$1

', $Text); + $Text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '
$1
', $Text); + $Text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '
$1
', $Text); // Check for paragraph - $Text = preg_replace("(\[p\](.*?)\[\/p\])ism",'

$1

',$Text); + $Text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

$1

', $Text); // Check for bold text - $Text = preg_replace("(\[b\](.*?)\[\/b\])ism",'$1',$Text); + $Text = preg_replace("(\[b\](.*?)\[\/b\])ism", '$1', $Text); // Check for Italics text - $Text = preg_replace("(\[i\](.*?)\[\/i\])ism",'$1',$Text); + $Text = preg_replace("(\[i\](.*?)\[\/i\])ism", '$1', $Text); // Check for Underline text - $Text = preg_replace("(\[u\](.*?)\[\/u\])ism",'$1',$Text); + $Text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $Text); // Check for strike-through text - $Text = preg_replace("(\[s\](.*?)\[\/s\])ism",'$1',$Text); + $Text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $Text); // Check for over-line text - $Text = preg_replace("(\[o\](.*?)\[\/o\])ism",'$1',$Text); + $Text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $Text); // Check for colored text - $Text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism","$2",$Text); + $Text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $Text); // Check for sized text // [size=50] --> font-size: 50px (with the unit). - $Text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism","$2",$Text); - $Text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism","$2",$Text); + $Text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $Text); + $Text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $Text); // Check for centered text - $Text = preg_replace("(\[center\](.*?)\[\/center\])ism","
$1
",$Text); + $Text = preg_replace("(\[center\](.*?)\[\/center\])ism", "
$1
", $Text); // Check for list text $Text = str_replace("[*]", "
  • ", $Text); // Check for style sheet commands - $Text = preg_replace_callback("(\[style=(.*?)\](.*?)\[\/style\])ism","bb_cleanstyle",$Text); + $Text = preg_replace_callback("(\[style=(.*?)\](.*?)\[\/style\])ism", "bb_cleanstyle", $Text); // Check for CSS classes - $Text = preg_replace_callback("(\[class=(.*?)\](.*?)\[\/class\])ism","bb_cleanclass",$Text); + $Text = preg_replace_callback("(\[class=(.*?)\](.*?)\[\/class\])ism", "bb_cleanclass", $Text); // handle nested lists $endlessloop = 0; @@ -964,42 +985,42 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal ((strpos($Text, "[/ol]") !== false) && (strpos($Text, "[ol]") !== false)) || ((strpos($Text, "[/ul]") !== false) && (strpos($Text, "[ul]") !== false)) || ((strpos($Text, "[/li]") !== false) && (strpos($Text, "[li]") !== false))) && (++$endlessloop < 20)) { - $Text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '' ,$Text); - $Text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '' ,$Text); - $Text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '' ,$Text); - $Text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism",'' ,$Text); - $Text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '' ,$Text); - $Text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '' ,$Text); - $Text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '' ,$Text); - $Text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '' ,$Text); - $Text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '' ,$Text); - $Text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '
  • $1
  • ' ,$Text); + $Text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '', $Text); + $Text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '', $Text); + $Text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '', $Text); + $Text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '
  • $1
  • ', $Text); } - $Text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1' ,$Text); - $Text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1' ,$Text); - $Text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1' ,$Text); - $Text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '$1
    ' ,$Text); + $Text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $Text); + $Text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $Text); + $Text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $Text); + $Text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '$1
    ', $Text); - $Text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '$1
    ' ,$Text); - $Text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '$1
    ' ,$Text); + $Text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '$1
    ', $Text); + $Text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '$1
    ', $Text); - $Text = str_replace('[hr]','
    ', $Text); + $Text = str_replace('[hr]', '
    ', $Text); // This is actually executed in prepare_body() - $Text = str_replace('[nosmile]','',$Text); + $Text = str_replace('[nosmile]', '', $Text); // Check for font change text - $Text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm","$2",$Text); + $Text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $Text); // Declare the format for [code] layout -// $Text = preg_replace_callback("/\[code\](.*?)\[\/code\]/ism",'stripcode_br_cb',$Text); +// $Text = preg_replace_callback("/\[code\](.*?)\[\/code\]/ism", 'stripcode_br_cb', $Text); $CodeLayout = '$1'; // Check for [code] text - $Text = preg_replace("/\[code\](.*?)\[\/code\]/ism","$CodeLayout", $Text); + $Text = preg_replace("/\[code\](.*?)\[\/code\]/ism", "$CodeLayout", $Text); // Declare the format for [spoiler] layout $SpoilerLayout = '
    $1
    '; @@ -1007,8 +1028,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // Check for [spoiler] text // handle nested quotes $endlessloop = 0; - while ((strpos($Text, "[/spoiler]") !== false) and (strpos($Text, "[spoiler]") !== false) and (++$endlessloop < 20)) - $Text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism","$SpoilerLayout", $Text); + while ((strpos($Text, "[/spoiler]") !== false) and (strpos($Text, "[spoiler]") !== false) and (++$endlessloop < 20)) { + $Text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", "$SpoilerLayout", $Text); + } // Check for [spoiler=Author] text @@ -1016,10 +1038,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // handle nested quotes $endlessloop = 0; - while ((strpos($Text, "[/spoiler]")!== false) and (strpos($Text, "[spoiler=") !== false) and (++$endlessloop < 20)) + while ((strpos($Text, "[/spoiler]")!== false) and (strpos($Text, "[spoiler=") !== false) and (++$endlessloop < 20)) { $Text = preg_replace("/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", "
    " . $t_wrote . "
    $2
    ", $Text); + } // Declare the format for [quote] layout $QuoteLayout = '
    $1
    '; @@ -1027,8 +1050,9 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // Check for [quote] text // handle nested quotes $endlessloop = 0; - while ((strpos($Text, "[/quote]") !== false) and (strpos($Text, "[quote]") !== false) and (++$endlessloop < 20)) - $Text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism","$QuoteLayout", $Text); + while ((strpos($Text, "[/quote]") !== false) and (strpos($Text, "[quote]") !== false) and (++$endlessloop < 20)) { + $Text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $Text); + } // Check for [quote=Author] text @@ -1036,10 +1060,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // handle nested quotes $endlessloop = 0; - while ((strpos($Text, "[/quote]")!== false) and (strpos($Text, "[quote=") !== false) and (++$endlessloop < 20)) + while ((strpos($Text, "[/quote]")!== false) and (strpos($Text, "[quote=") !== false) and (++$endlessloop < 20)) { $Text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", "
    " . $t_wrote . "
    $2
    ", $Text); + } // [img=widthxheight]image source[/img] @@ -1059,11 +1084,11 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $Text = preg_replace_callback("/(.*?)\[share(.*?)\](.*?)\[\/share\]/ism", function ($match) use ($simplehtml){ return(bb_ShareAttributes($match, $simplehtml)); - },$Text); + }, $Text); - $Text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism",'
    ' . t('Encrypted content') . '
    ', $Text); - $Text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism",'
    ' . t('Encrypted content') . '
    ', $Text); - //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism",'
    ' . t('Encrypted content') . '
    ', $Text); + $Text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . t('Encrypted content') . '
    ', $Text); + $Text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . t('Encrypted content') . '
    ', $Text); + //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . t('Encrypted content') . '
    ', $Text); // Try to Oembed @@ -1083,41 +1108,44 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // html5 video and audio - if ($tryoembed) + if ($tryoembed) { $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $Text); - else + } else { $Text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $Text); + } // Youtube extensions if ($tryoembed) { $Text = preg_replace_callback("/\[youtube\](https?:\/\/www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", 'tryoembed', $Text); $Text = preg_replace_callback("/\[youtube\](www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", 'tryoembed', $Text); - $Text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism",'tryoembed',$Text); + $Text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism", 'tryoembed', $Text); } - $Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); - $Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); - $Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism",'[youtube]$1[/youtube]',$Text); - - if ($tryoembed) - $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $Text); - else - $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", - 'https://www.youtube.com/watch?v=$1', $Text); + $Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $Text); + $Text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $Text); + $Text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $Text); if ($tryoembed) { - $Text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism",'tryoembed',$Text); - $Text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism",'tryoembed',$Text); + $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $Text); + } else { + $Text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", + 'https://www.youtube.com/watch?v=$1', $Text); } - $Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); - $Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism",'[vimeo]$1[/vimeo]',$Text); + if ($tryoembed) { + $Text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism", 'tryoembed', $Text); + $Text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism", 'tryoembed', $Text); + } - if ($tryoembed) + $Text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $Text); + $Text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $Text); + + if ($tryoembed) { $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $Text); - else + } else { $Text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", 'https://vimeo.com/$1', $Text); + } // $Text = preg_replace("/\[youtube\](.*?)\[\/youtube\]/", '', $Text); @@ -1132,33 +1160,42 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal // Summary (e.g. title) is required, earlier revisions only required description (in addition to // start which is always required). Allow desc with a missing summary for compatibility. - if((x($ev,'desc') || x($ev,'summary')) && x($ev,'start')) { + if((x($ev, 'desc') || x($ev, 'summary')) && x($ev, 'start')) { $sub = format_event_html($ev, $simplehtml); - $Text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism",'',$Text); - $Text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism",'',$Text); - $Text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism",$sub,$Text); - $Text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism",'',$Text); - $Text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism",'',$Text); - $Text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism",'',$Text); - $Text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism",'',$Text); + $Text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $Text); + $Text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $Text); + $Text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $Text); + $Text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $Text); + $Text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $Text); + $Text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism", '', $Text); + $Text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $Text); } - //replace oneliner with - $Text = preg_replace_callback("|(?!]*>)([^<]*)(?!]*>)|ism", 'bb_onelinecode_cb', $Text); + // Replace inline code blocks + $Text = preg_replace_callback("|(?!]*>)([^<]*)(?!]*>)|ism", + function ($match) use ($simplehtml) { + $return = '' . $match[1] . ''; + // Use for Diaspora inline code blocks + if ($simplehtml === 3) { + $return = '' . $match[1] . ''; + } + return $return; + } + , $Text); // Unhide all [noparse] contained bbtags unspacefying them // and triming the [noparse] tag. - $Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_unspacefy_and_trim',$Text); - $Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_unspacefy_and_trim',$Text); - $Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_unspacefy_and_trim',$Text); + $Text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'bb_unspacefy_and_trim', $Text); + $Text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'bb_unspacefy_and_trim', $Text); + $Text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'bb_unspacefy_and_trim', $Text); - $Text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/','&$1;',$Text); - $Text = preg_replace('/\&\#039\;/','\'',$Text); - $Text = preg_replace('/\"\;/','"',$Text); + $Text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $Text); + $Text = preg_replace('/\&\#039\;/', '\'', $Text); + $Text = preg_replace('/\"\;/', '"', $Text); // fix any escaped ampersands that may have been converted into links $Text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $Text); @@ -1198,19 +1235,19 @@ function bbcode($Text,$preserve_nl = false, $tryoembed = true, $simplehtml = fal $Text = $doc->saveHTML(); $Text = str_replace(array("", "", $doctype, $encoding), array("", "", "", ""), $Text); - $Text = str_replace('
    ','', $Text); + $Text = str_replace('
    ', '', $Text); //$Text = mb_convert_encoding($Text, "UTF-8", 'HTML-ENTITIES'); } // Clean up some useless linebreaks in lists - //$Text = str_replace('

    ','', $Text); - //$Text = str_replace('
    ','', $Text); - //$Text = str_replace('
  • ','
  • ', $Text); - // $Text = str_replace('

    ', '', $Text); + //$Text = str_replace('

  • ', '', $Text); + //$Text = str_replace('
  • ', '
  • ', $Text); + //$Text = str_replace('
    diff --git a/library/html-to-markdown/.gitignore b/library/html-to-markdown/.gitignore deleted file mode 100644 index aa429a9a1..000000000 --- a/library/html-to-markdown/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -~* -vendor -composer.lock \ No newline at end of file diff --git a/library/html-to-markdown/.travis.yml b/library/html-to-markdown/.travis.yml deleted file mode 100644 index 48b3e64a4..000000000 --- a/library/html-to-markdown/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -language: php -php: - - "5.5" - - "5.4" - - "5.3" -script: phpunit --no-configuration HTML_To_MarkdownTest ./tests/HTML_To_MarkdownTest.php \ No newline at end of file diff --git a/library/html-to-markdown/HTML_To_Markdown.php b/library/html-to-markdown/HTML_To_Markdown.php deleted file mode 100644 index 109780edd..000000000 --- a/library/html-to-markdown/HTML_To_Markdown.php +++ /dev/null @@ -1,598 +0,0 @@ - - * @link https://github.com/nickcernis/html2markdown/ Latest version on GitHub. - * @link http://twitter.com/nickcernis Nick on twitter. - * @license http://www.opensource.org/licenses/mit-license.php MIT - */ -class HTML_To_Markdown -{ - /** - * @var DOMDocument The root of the document tree that holds our HTML. - */ - private $document; - - /** - * @var string|boolean The Markdown version of the original HTML, or false if conversion failed - */ - private $output; - - /** - * @var array Class-wide options users can override. - */ - private $options = array( - 'header_style' => 'setext', // Set to "atx" to output H1 and H2 headers as # Header1 and ## Header2 - 'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML - 'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output. - 'bold_style' => '**', // Set to '__' if you prefer the underlined style - 'italic_style' => '*', // Set to '_' if you prefer the underlined style - 'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: "meta style script" - ); - - - /** - * Constructor - * - * Set up a new DOMDocument from the supplied HTML, convert it to Markdown, and store it in $this->$output. - * - * @param string $html The HTML to convert to Markdown. - * @param array $overrides [optional] List of style and error display overrides. - */ - public function __construct($html = null, $overrides = null) - { - if ($overrides) - $this->options = array_merge($this->options, $overrides); - - if ($html) - $this->convert($html); - } - - - /** - * Setter for conversion options - * - * @param $name - * @param $value - */ - public function set_option($name, $value) - { - $this->options[$name] = $value; - } - - - /** - * Convert - * - * Loads HTML and passes to get_markdown() - * - * @param $html - * @return string The Markdown version of the html - */ - public function convert($html) - { - $html = preg_replace('~>\s+<~', '><', $html); // Strip white space between tags to prevent creation of empty #text nodes - - $this->document = new DOMDocument(); - - if ($this->options['suppress_errors']) - libxml_use_internal_errors(true); // Suppress conversion errors (from http://bit.ly/pCCRSX ) - - $this->document->loadHTML('' . $html); // Hack to load utf-8 HTML (from http://bit.ly/pVDyCt ) - $this->document->encoding = 'UTF-8'; - - if ($this->options['suppress_errors']) - libxml_clear_errors(); - - return $this->get_markdown($html); - } - - - /** - * Is Child Of? - * - * Is the node a child of the given parent tag? - * - * @param $parent_name string|array The name of the parent node(s) to search for e.g. 'code' or array('pre', 'code') - * @param $node - * @return bool - */ - private static function is_child_of($parent_name, $node) - { - for ($p = $node->parentNode; $p != false; $p = $p->parentNode) { - if (is_null($p)) - return false; - - if ( is_array($parent_name) && in_array($p->nodeName, $parent_name) ) - return true; - - if ($p->nodeName == $parent_name) - return true; - } - return false; - } - - - /** - * Convert Children - * - * Recursive function to drill into the DOM and convert each node into Markdown from the inside out. - * - * Finds children of each node and convert those to #text nodes containing their Markdown equivalent, - * starting with the innermost element and working up to the outermost element. - * - * @param $node - */ - private function convert_children($node) - { - // Don't convert HTML code inside and
     blocks to Markdown - that should stay as HTML
    -        if (self::is_child_of(array('pre', 'code'), $node))
    -            return;
    -
    -        // If the node has children, convert those to Markdown first
    -        if ($node->hasChildNodes()) {
    -            $length = $node->childNodes->length;
    -
    -            for ($i = 0; $i < $length; $i++) {
    -                $child = $node->childNodes->item($i);
    -                $this->convert_children($child);
    -            }
    -        }
    -
    -        // Now that child nodes have been converted, convert the original node
    -        $markdown = $this->convert_to_markdown($node);
    -
    -        // Create a DOM text node containing the Markdown equivalent of the original node
    -        $markdown_node = $this->document->createTextNode($markdown);
    -
    -        // Replace the old $node e.g. "

    Title

    " with the new $markdown_node e.g. "### Title" - $node->parentNode->replaceChild($markdown_node, $node); - } - - - /** - * Get Markdown - * - * Sends the body node to convert_children() to change inner nodes to Markdown #text nodes, then saves and - * returns the resulting converted document as a string in Markdown format. - * - * @return string|boolean The converted HTML as Markdown, or false if conversion failed - */ - private function get_markdown() - { - // Work on the entire DOM tree (including head and body) - $input = $this->document->getElementsByTagName("html")->item(0); - - if (!$input) - return false; - - // Convert all children of this root element. The DOMDocument stored in $this->doc will - // then consist of #text nodes, each containing a Markdown version of the original node - // that it replaced. - $this->convert_children($input); - - // Sanitize and return the body contents as a string. - $markdown = $this->document->saveHTML(); // stores the DOMDocument as a string - $markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8'); - $markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8'); // Double decode to cover cases like &nbsp; http://www.php.net/manual/en/function.htmlentities.php#99984 - $markdown = preg_replace("/]+>/", "", $markdown); // Strip doctype declaration - $unwanted = array('', '', '', '', '', '', '', ' '); - $markdown = str_replace($unwanted, '', $markdown); // Strip unwanted tags - $markdown = trim($markdown, "\n\r\0\x0B"); - - $this->output = $markdown; - - return $markdown; - } - - - /** - * Convert to Markdown - * - * Converts an individual node into a #text node containing a string of its Markdown equivalent. - * - * Example: An

    node with text content of "Title" becomes a text node with content of "### Title" - * - * @param $node - * @return string The converted HTML as Markdown - */ - private function convert_to_markdown($node) - { - $tag = $node->nodeName; // the type of element, e.g. h1 - $value = $node->nodeValue; // the value of that element, e.g. The Title - - // Strip nodes named in remove_nodes - $tags_to_remove = explode(' ', $this->options['remove_nodes']); - if ( in_array($tag, $tags_to_remove) ) - return false; - - switch ($tag) { - case "p": - $markdown = (trim($value)) ? rtrim($value) . PHP_EOL . PHP_EOL : ''; - break; - case "pre": - $markdown = PHP_EOL . $this->convert_code($node) . PHP_EOL; - break; - case "h1": - case "h2": - $markdown = $this->convert_header($tag, $node); - break; - case "h3": - $markdown = "### " . $value . PHP_EOL . PHP_EOL; - break; - case "h4": - $markdown = "#### " . $value . PHP_EOL . PHP_EOL; - break; - case "h5": - $markdown = "##### " . $value . PHP_EOL . PHP_EOL; - break; - case "h6": - $markdown = "###### " . $value . PHP_EOL . PHP_EOL; - break; - case "em": - case "i": - case "strong": - case "b": - $markdown = $this->convert_emphasis($tag, $value); - break; - case "hr": - $markdown = "- - - - - -" . PHP_EOL . PHP_EOL; - break; - case "br": - $markdown = " " . PHP_EOL; - break; - case "blockquote": - $markdown = $this->convert_blockquote($node); - break; - case "code": - $markdown = $this->convert_code($node); - break; - case "ol": - case "ul": - $markdown = $value . PHP_EOL; - break; - case "li": - $markdown = $this->convert_list($node); - break; - case "img": - $markdown = $this->convert_image($node); - break; - case "a": - $markdown = $this->convert_anchor($node); - break; - case "#text": - $markdown = preg_replace('~\s+~', ' ', $value); - $markdown = preg_replace('~^#~', '\\\\#', $markdown); - break; - case "#comment": - $markdown = ''; - break; - case "div": - $markdown = ($this->options['strip_tags']) ? $value . PHP_EOL . PHP_EOL : html_entity_decode($node->C14N()); - break; - default: - // If strip_tags is false (the default), preserve tags that don't have Markdown equivalents, - // such as nodes on their own. C14N() canonicalizes the node to a string. - // See: http://www.php.net/manual/en/domnode.c14n.php - $markdown = ($this->options['strip_tags']) ? $value : html_entity_decode($node->C14N()); - } - - return $markdown; - } - - - /** - * Convert Header - * - * Converts h1 and h2 headers to Markdown-style headers in setext style, - * matching the number of underscores with the length of the title. - * - * e.g. Header 1 Header Two - * ======== ---------- - * - * Returns atx headers instead if $this->options['header_style'] is "atx" - * - * e.g. # Header 1 ## Header Two - * - * @param string $level The header level, including the "h". e.g. h1 - * @param string $node The node to convert. - * @return string The Markdown version of the header. - */ - private function convert_header($level, $node) - { - $content = $node->nodeValue; - - if (!$this->is_child_of('blockquote', $node) && $this->options['header_style'] == "setext") { - $length = (function_exists('mb_strlen')) ? mb_strlen($content, 'utf-8') : strlen($content); - $underline = ($level == "h1") ? "=" : "-"; - $markdown = $content . PHP_EOL . str_repeat($underline, $length) . PHP_EOL . PHP_EOL; // setext style - } else { - $prefix = ($level == "h1") ? "# " : "## "; - $markdown = $prefix . $content . PHP_EOL . PHP_EOL; // atx style - } - - return $markdown; - } - - - /** - * Converts inline styles - * This function is used to render strong and em tags - * - * eg bold text becomes **bold text** or __bold text__ - * - * @param string $tag - * @param string $value - * @return string - */ - private function convert_emphasis($tag, $value) - { - if ($tag == 'i' || $tag == 'em') { - $markdown = $this->options['italic_style'] . $value . $this->options['italic_style']; - } else { - $markdown = $this->options['bold_style'] . $value . $this->options['bold_style']; - } - - return $markdown; - } - - - /** - * Convert Image - * - * Converts tags to Markdown. - * - * e.g. alt text - * becomes ![alt text](/path/img.jpg "Title") - * - * @param $node - * @return string - */ - private function convert_image($node) - { - $src = $node->getAttribute('src'); - $alt = $node->getAttribute('alt'); - $title = $node->getAttribute('title'); - - if ($title != "") { - $markdown = '![' . $alt . '](' . $src . ' "' . $title . '")'; // No newlines added. should be in a block-level element. - } else { - $markdown = '![' . $alt . '](' . $src . ')'; - } - - return $markdown; - } - - - /** - * Convert Anchor - * - * Converts tags to Markdown. - * - * e.g. Modern Nerd - * becomes [Modern Nerd](http://modernnerd.net "Title") - * - * @param $node - * @return string - */ - private function convert_anchor($node) - { - $href = $node->getAttribute('href'); - $title = $node->getAttribute('title'); - $text = $node->nodeValue; - - if ($title != "") { - $markdown = '[' . $text . '](' . $href . ' "' . $title . '")'; - } else { - $markdown = '[' . $text . '](' . $href . ')'; - } - - if (! $href) - $markdown = html_entity_decode($node->C14N()); - - // Append a space if the node after this one is also an anchor - $next_node_name = $this->get_next_node_name($node); - - if ($next_node_name == 'a') - $markdown = $markdown . ' '; - - return $markdown; - } - - - /** - * Convert List - * - * Converts