2011-04-11 14:59:26 +02:00
< ? php
#
# Markdown Extra - A text-to-HTML conversion tool for web writers
#
# PHP Markdown & Extra
2012-02-19 22:47:32 +01:00
# Copyright (c) 2004-2012 Michel Fortin
# <http://michelf.com/projects/php-markdown/>
2011-04-11 14:59:26 +02:00
#
# Original Markdown
2012-02-19 22:47:32 +01:00
# Copyright (c) 2004-2006 John Gruber
2011-04-11 14:59:26 +02:00
# <http://daringfireball.net/projects/markdown/>
#
2012-02-19 22:47:32 +01:00
define ( 'MARKDOWN_VERSION' , " 1.0.1o " ); # Sun 8 Jan 2012
define ( 'MARKDOWNEXTRA_VERSION' , " 1.2.5 " ); # Sun 8 Jan 2012
2011-04-11 14:59:26 +02:00
#
# Global default settings:
#
# Change to ">" for HTML output
@ define ( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX' , " /> " );
# Define the width of a tab for code blocks.
@ define ( 'MARKDOWN_TAB_WIDTH' , 4 );
# Optional title attribute for footnote links and backlinks.
@ define ( 'MARKDOWN_FN_LINK_TITLE' , " " );
@ define ( 'MARKDOWN_FN_BACKLINK_TITLE' , " " );
# Optional class attribute for footnote links and backlinks.
@ define ( 'MARKDOWN_FN_LINK_CLASS' , " " );
@ define ( 'MARKDOWN_FN_BACKLINK_CLASS' , " " );
#
# WordPress settings:
#
# Change to false to remove Markdown from posts and/or comments.
@ define ( 'MARKDOWN_WP_POSTS' , true );
@ define ( 'MARKDOWN_WP_COMMENTS' , true );
### Standard Function Interface ###
@ define ( 'MARKDOWN_PARSER_CLASS' , 'MarkdownExtra_Parser' );
function Markdown ( $text ) {
#
# Initialize the parser and return the result of its transform method.
#
# Setup static parser variable.
static $parser ;
if ( ! isset ( $parser )) {
$parser_class = MARKDOWN_PARSER_CLASS ;
$parser = new $parser_class ;
}
# Transform text using parser.
return $parser -> transform ( $text );
}
### WordPress Plugin Interface ###
/*
Plugin Name : Markdown Extra
2012-02-19 22:47:32 +01:00
Plugin URI : http :// michelf . com / projects / php - markdown /
Description : < a href = " http://daringfireball.net/projects/markdown/syntax " > Markdown syntax </ a > allows you to write using an easy - to - read , easy - to - write plain text format . Based on the original Perl version by < a href = " http://daringfireball.net/ " > John Gruber </ a >. < a href = " http://michelf.com/projects/php-markdown/ " > More ...</ a >
Version : 1.2 . 5
2011-04-11 14:59:26 +02:00
Author : Michel Fortin
2012-02-19 22:47:32 +01:00
Author URI : http :// michelf . com /
2011-04-11 14:59:26 +02:00
*/
if ( isset ( $wp_version )) {
# More details about how it works here:
2012-02-19 22:47:32 +01:00
# <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
2011-04-11 14:59:26 +02:00
# Post content and excerpts
# - Remove WordPress paragraph generator.
# - Run Markdown on excerpt, then remove all tags.
# - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
if ( MARKDOWN_WP_POSTS ) {
remove_filter ( 'the_content' , 'wpautop' );
remove_filter ( 'the_content_rss' , 'wpautop' );
remove_filter ( 'the_excerpt' , 'wpautop' );
add_filter ( 'the_content' , 'mdwp_MarkdownPost' , 6 );
add_filter ( 'the_content_rss' , 'mdwp_MarkdownPost' , 6 );
add_filter ( 'get_the_excerpt' , 'mdwp_MarkdownPost' , 6 );
add_filter ( 'get_the_excerpt' , 'trim' , 7 );
add_filter ( 'the_excerpt' , 'mdwp_add_p' );
add_filter ( 'the_excerpt_rss' , 'mdwp_strip_p' );
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
remove_filter ( 'content_save_pre' , 'balanceTags' , 50 );
remove_filter ( 'excerpt_save_pre' , 'balanceTags' , 50 );
add_filter ( 'the_content' , 'balanceTags' , 50 );
add_filter ( 'get_the_excerpt' , 'balanceTags' , 9 );
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Add a footnote id prefix to posts when inside a loop.
function mdwp_MarkdownPost ( $text ) {
static $parser ;
if ( ! $parser ) {
$parser_class = MARKDOWN_PARSER_CLASS ;
$parser = new $parser_class ;
}
if ( is_single () || is_page () || is_feed ()) {
$parser -> fn_id_prefix = " " ;
} else {
$parser -> fn_id_prefix = get_the_ID () . " . " ;
}
return $parser -> transform ( $text );
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Comments
# - Remove WordPress paragraph generator.
# - Remove WordPress auto-link generator.
# - Scramble important tags before passing them to the kses filter.
# - Run Markdown on excerpt then remove paragraph tags.
if ( MARKDOWN_WP_COMMENTS ) {
remove_filter ( 'comment_text' , 'wpautop' , 30 );
remove_filter ( 'comment_text' , 'make_clickable' );
add_filter ( 'pre_comment_content' , 'Markdown' , 6 );
add_filter ( 'pre_comment_content' , 'mdwp_hide_tags' , 8 );
add_filter ( 'pre_comment_content' , 'mdwp_show_tags' , 12 );
add_filter ( 'get_comment_text' , 'Markdown' , 6 );
add_filter ( 'get_comment_excerpt' , 'Markdown' , 6 );
add_filter ( 'get_comment_excerpt' , 'mdwp_strip_p' , 7 );
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
global $mdwp_hidden_tags , $mdwp_placeholders ;
$mdwp_hidden_tags = explode ( ' ' ,
'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>' );
$mdwp_placeholders = explode ( ' ' , str_rot13 (
'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR ' .
'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli' ));
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function mdwp_add_p ( $text ) {
if ( ! preg_match ( '{^$|^<(p|ul|ol|dl|pre|blockquote)>}i' , $text )) {
$text = '<p>' . $text . '</p>' ;
$text = preg_replace ( '{\n{2,}}' , " </p> \n \n <p> " , $text );
}
return $text ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function mdwp_strip_p ( $t ) { return preg_replace ( '{</?p>}i' , '' , $t ); }
function mdwp_hide_tags ( $text ) {
global $mdwp_hidden_tags , $mdwp_placeholders ;
return str_replace ( $mdwp_hidden_tags , $mdwp_placeholders , $text );
}
function mdwp_show_tags ( $text ) {
global $mdwp_hidden_tags , $mdwp_placeholders ;
return str_replace ( $mdwp_placeholders , $mdwp_hidden_tags , $text );
}
}
### bBlog Plugin Info ###
function identify_modifier_markdown () {
return array (
'name' => 'markdown' ,
'type' => 'modifier' ,
'nicename' => 'PHP Markdown Extra' ,
'description' => 'A text-to-HTML conversion tool for web writers' ,
'authors' => 'Michel Fortin and John Gruber' ,
'licence' => 'GPL' ,
'version' => MARKDOWNEXTRA_VERSION ,
2012-02-19 22:47:32 +01:00
'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>' ,
2011-04-11 14:59:26 +02:00
);
}
### Smarty Modifier Interface ###
function smarty_modifier_markdown ( $text ) {
return Markdown ( $text );
}
### Textile Compatibility Mode ###
# Rename this file to "classTextile.php" and it can replace Textile everywhere.
if ( strcasecmp ( substr ( __FILE__ , - 16 ), " classTextile.php " ) == 0 ) {
# Try to include PHP SmartyPants. Should be in the same directory.
@ include_once 'smartypants.php' ;
# Fake Textile class. It calls Markdown instead.
class Textile {
function TextileThis ( $text , $lite = '' , $encode = '' ) {
if ( $lite == '' && $encode == '' ) $text = Markdown ( $text );
if ( function_exists ( 'SmartyPants' )) $text = SmartyPants ( $text );
return $text ;
}
# Fake restricted version: restrictions are not supported for now.
function TextileRestricted ( $text , $lite = '' , $noimage = '' ) {
return $this -> TextileThis ( $text , $lite );
}
# Workaround to ensure compatibility with TextPattern 4.0.3.
function blockLite ( $text ) { return $text ; }
}
}
#
# Markdown Parser Class
#
class Markdown_Parser {
# Regex to match balanced [brackets].
# Needed to insert a maximum bracked depth while converting to PHP.
var $nested_brackets_depth = 6 ;
var $nested_brackets_re ;
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
var $nested_url_parenthesis_depth = 4 ;
var $nested_url_parenthesis_re ;
# Table of hash values for escaped characters:
var $escape_chars = '\`*_{}[]()>#+-.!' ;
var $escape_chars_re ;
# Change to ">" for HTML output.
var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX ;
var $tab_width = MARKDOWN_TAB_WIDTH ;
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Change to `true` to disallow markup or entities.
var $no_markup = false ;
var $no_entities = false ;
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Predefined urls and titles for reference links and images.
var $predef_urls = array ();
var $predef_titles = array ();
function Markdown_Parser () {
#
# Constructor function. Initialize appropriate member variables.
#
$this -> _initDetab ();
$this -> prepareItalicsAndBold ();
2012-02-19 22:47:32 +01:00
$this -> nested_brackets_re =
2011-04-11 14:59:26 +02:00
str_repeat ( '(?>[^\[\]]+|\[' , $this -> nested_brackets_depth ) .
str_repeat ( '\])*' , $this -> nested_brackets_depth );
2012-02-19 22:47:32 +01:00
$this -> nested_url_parenthesis_re =
2011-04-11 14:59:26 +02:00
str_repeat ( '(?>[^()\s]+|\(' , $this -> nested_url_parenthesis_depth ) .
str_repeat ( '(?>\)))*' , $this -> nested_url_parenthesis_depth );
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$this -> escape_chars_re = '[' . preg_quote ( $this -> escape_chars ) . ']' ;
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Sort document, block, and span gamut in ascendent priority order.
asort ( $this -> document_gamut );
asort ( $this -> block_gamut );
asort ( $this -> span_gamut );
}
# Internal hashes used during transformation.
var $urls = array ();
var $titles = array ();
var $html_hashes = array ();
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Status flag to avoid invalid nesting.
var $in_anchor = false ;
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function setup () {
#
2012-02-19 22:47:32 +01:00
# Called before the transformation process starts to setup parser
2011-04-11 14:59:26 +02:00
# states.
#
# Clear global hashes.
$this -> urls = $this -> predef_urls ;
$this -> titles = $this -> predef_titles ;
$this -> html_hashes = array ();
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$in_anchor = false ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function teardown () {
#
2012-02-19 22:47:32 +01:00
# Called after the transformation process to clear any variable
2011-04-11 14:59:26 +02:00
# which may be taking up memory unnecessarly.
#
$this -> urls = array ();
$this -> titles = array ();
$this -> html_hashes = array ();
}
function transform ( $text ) {
#
# Main function. Performs some preprocessing on the input text
# and pass it through the document gamut.
#
$this -> setup ();
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Remove UTF-8 BOM and marker character in input, if present.
$text = preg_replace ( '{^\xEF\xBB\xBF|\x1A}' , '' , $text );
# Standardize line endings:
# DOS to Unix and Mac to Unix
$text = preg_replace ( '{\r\n?}' , " \n " , $text );
# Make sure $text ends with a couple of newlines:
$text .= " \n \n " ;
# Convert all tabs to spaces.
$text = $this -> detab ( $text );
# Turn block-level HTML blocks into hash entries
$text = $this -> hashHTMLBlocks ( $text );
# Strip any lines consisting only of spaces and tabs.
# This makes subsequent regexen easier to write, because we can
# match consecutive blank lines with /\n+/ instead of something
# contorted like /[ ]*\n+/ .
$text = preg_replace ( '/^[ ]+$/m' , '' , $text );
# Run document gamut methods.
foreach ( $this -> document_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$this -> teardown ();
return $text . " \n " ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
var $document_gamut = array (
# Strip link definitions, store in hashes.
" stripLinkDefinitions " => 20 ,
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
" runBasicBlockGamut " => 30 ,
);
function stripLinkDefinitions ( $text ) {
#
# Strips link definitions from text, stores the URLs and titles in
# hash references.
#
$less_than_tab = $this -> tab_width - 1 ;
# Link defs are in the form: ^[id]: url "optional title"
$text = preg_replace_callback ( ' {
^ [ ]{ 0 , '.$less_than_tab.' } \ [( .+ ) \ ][ ] ? : # id = $1
[ ] *
\n ? # maybe *one* newline
[ ] *
2012-02-19 22:47:32 +01:00
( ? :
< ( .+ ? ) > # url = $2
|
( \S + ? ) # url = $3
)
2011-04-11 14:59:26 +02:00
[ ] *
\n ? # maybe one newline
[ ] *
( ? :
( ? <= \s ) # lookbehind for whitespace
[ " (]
2012-02-19 22:47:32 +01:00
( .* ? ) # title = $4
2011-04-11 14:59:26 +02:00
[ " )]
[ ] *
) ? # title is optional
( ? : \n +| \Z )
} xm ' ,
array ( & $this , '_stripLinkDefinitions_callback' ),
$text );
return $text ;
}
function _stripLinkDefinitions_callback ( $matches ) {
$link_id = strtolower ( $matches [ 1 ]);
2012-02-19 22:47:32 +01:00
$url = $matches [ 2 ] == '' ? $matches [ 3 ] : $matches [ 2 ];
$this -> urls [ $link_id ] = $url ;
$this -> titles [ $link_id ] =& $matches [ 4 ];
2011-04-11 14:59:26 +02:00
return '' ; # String that will replace the block
}
function hashHTMLBlocks ( $text ) {
if ( $this -> no_markup ) return $text ;
$less_than_tab = $this -> tab_width - 1 ;
# Hashify HTML blocks:
# We only want to do this for block-level HTML tags, such as headers,
# lists, and tables. That's because we still want to wrap <p>s around
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
# phrase emphasis, and spans. The list of tags we're looking for is
# hard-coded:
#
# * List "a" is made of tags which can be both inline or block-level.
2012-02-19 22:47:32 +01:00
# These will be treated block-level when the start tag is alone on
# its line, otherwise they're not matched here and will be taken as
2011-04-11 14:59:26 +02:00
# inline later.
# * List "b" is made of tags which are always block-level;
#
$block_tags_a_re = 'ins|del' ;
$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' .
2012-02-19 22:47:32 +01:00
'script|noscript|form|fieldset|iframe|math' ;
2011-04-11 14:59:26 +02:00
# Regular expression for the content of a block tag.
$nested_tags_level = 4 ;
$attr = '
( ?> # optional tag attributes
\s # starts with whitespace
( ?>
[ ^> " /]+ # text outside quotes
|
/+ ( ? !> ) # slash not followed by ">"
|
" [^ " ] * " # text inside double quotes (tolerate " > " )
|
\ ' [ ^ \ ' ] * \ ' # text inside single quotes (tolerate ">")
) *
2012-02-19 22:47:32 +01:00
) ?
2011-04-11 14:59:26 +02:00
' ;
$content =
str_repeat ( '
( ?>
[ ^< ] + # content without tag
|
< \2 # nested opening tag
'.$attr.' # attributes
( ?>
/>
|
> ' , $nested_tags_level ) . # end of opening tag
'.*?' . # last level nested tag content
str_repeat ( '
</ \2\s *> # closing nested tag
)
2012-02-19 22:47:32 +01:00
|
2011-04-11 14:59:26 +02:00
< ( ? !/ \2\s *> # other tags with a different name
)
) * ' ,
$nested_tags_level );
$content2 = str_replace ( '\2' , '\3' , $content );
# First, look for nested blocks, e.g.:
# <div>
# <div>
# tags for inner block must be indented.
# </div>
# </div>
#
# The outermost tags must start at the left margin for this to match, and
# the inner nested divs must be indented.
# We need to do this before the next, more liberal match, because the next
# match will start at the first `<div>` and stop at the first `</div>`.
$text = preg_replace_callback ( ' {( ?>
( ?>
( ? <= \n\n ) # Starting after a blank line
| # or
\A\n ? # the beginning of the doc
)
( # save in $1
2012-02-19 22:47:32 +01:00
# Match from `\n<tag>` to `</tag>\n`, handling nested tags
2011-04-11 14:59:26 +02:00
# in between.
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
[ ]{ 0 , '.$less_than_tab.' }
< ( '.$block_tags_b_re.' ) # start tag = $2
'.$attr.' > # attributes followed by > and \n
'.$content.' # content, support nesting
</ \2 > # the matching end tag
[ ] * # trailing spaces/tabs
( ? = \n +| \Z ) # followed by a newline or end of document
| # Special version for tags of group a.
[ ]{ 0 , '.$less_than_tab.' }
< ( '.$block_tags_a_re.' ) # start tag = $3
'.$attr.' > [ ] * \n # attributes followed by >
'.$content2.' # content, support nesting
</ \3 > # the matching end tag
[ ] * # trailing spaces/tabs
( ? = \n +| \Z ) # followed by a newline or end of document
2012-02-19 22:47:32 +01:00
| # Special case just for <hr />. It was easier to make a special
2011-04-11 14:59:26 +02:00
# case than to make the other regex more complicated.
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
[ ]{ 0 , '.$less_than_tab.' }
< ( hr ) # start tag = $2
'.$attr.' # attributes
/ ?> # the matching end tag
[ ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
| # Special case for standalone HTML comments:
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
<!-- .* ? -->
)
[ ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
| # PHP and ASP-style processor instructions (<? and <%)
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
< ([ ? % ]) # $2
.* ?
\2 >
)
[ ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
)
)} Sxmi ' ,
array ( & $this , '_hashHTMLBlocks_callback' ),
$text );
return $text ;
}
function _hashHTMLBlocks_callback ( $matches ) {
$text = $matches [ 1 ];
$key = $this -> hashBlock ( $text );
return " \n \n $key\n\n " ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function hashPart ( $text , $boundary = 'X' ) {
#
2012-02-19 22:47:32 +01:00
# Called whenever a tag must be hashed when a function insert an atomic
2011-04-11 14:59:26 +02:00
# element in the text stream. Passing $text to through this function gives
# a unique text-token which will be reverted back when calling unhash.
#
# The $boundary argument specify what character should be used to surround
# the token. By convension, "B" is used for block elements that needs not
# to be wrapped into paragraph tags at the end, ":" is used for elements
# that are word separators and "X" is used in the general case.
#
# Swap back any tag hash found in $text so we do not have to `unhash`
# multiple times at the end.
$text = $this -> unhash ( $text );
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Then hash the block.
static $i = 0 ;
$key = " $boundary\x1A " . ++ $i . $boundary ;
$this -> html_hashes [ $key ] = $text ;
return $key ; # String that will replace the tag.
}
function hashBlock ( $text ) {
#
# Shortcut function for hashPart with block-level boundaries.
#
return $this -> hashPart ( $text , 'B' );
}
var $block_gamut = array (
#
# These are all the transformations that form block-level
# tags like paragraphs, headers, and list items.
#
" doHeaders " => 10 ,
" doHorizontalRules " => 20 ,
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
" doLists " => 40 ,
" doCodeBlocks " => 50 ,
" doBlockQuotes " => 60 ,
);
function runBlockGamut ( $text ) {
#
# Run block gamut tranformations.
#
2012-02-19 22:47:32 +01:00
# We need to escape raw HTML in Markdown source before doing anything
# else. This need to be done for each block, and not only at the
2011-04-11 14:59:26 +02:00
# begining in the Markdown function since hashed blocks can be part of
2012-02-19 22:47:32 +01:00
# list items and could have been indented. Indented blocks would have
2011-04-11 14:59:26 +02:00
# been seen as a code block in a previous pass of hashHTMLBlocks.
$text = $this -> hashHTMLBlocks ( $text );
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
return $this -> runBasicBlockGamut ( $text );
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function runBasicBlockGamut ( $text ) {
#
2012-02-19 22:47:32 +01:00
# Run block gamut tranformations, without hashing HTML blocks. This is
2011-04-11 14:59:26 +02:00
# useful when HTML blocks are known to be already hashed, like in the first
# whole-document pass.
#
foreach ( $this -> block_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Finally form paragraph and restore hashed blocks.
$text = $this -> formParagraphs ( $text );
return $text ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function doHorizontalRules ( $text ) {
# Do Horizontal Rules:
return preg_replace (
' {
^ [ ]{ 0 , 3 } # Leading space
([ -* _ ]) # $1: First marker
( ?> # Repeated marker group
[ ]{ 0 , 2 } # Zero, one, or two spaces.
\1 # Marker character
){ 2 ,} # Group repeated at least twice
[ ] * # Tailing spaces
$ # End of line.
} mx ' ,
2012-02-19 22:47:32 +01:00
" \n " . $this -> hashBlock ( " <hr $this->empty_element_suffix " ) . " \n " ,
2011-04-11 14:59:26 +02:00
$text );
}
var $span_gamut = array (
#
# These are all the transformations that occur *within* block-level
# tags like paragraphs, headers, and list items.
#
# Process character escapes, code spans, and inline HTML
# in one shot.
" parseSpan " => - 30 ,
# Process anchor and image tags. Images must come first,
# because ![foo][f] looks like an anchor.
" doImages " => 10 ,
" doAnchors " => 20 ,
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# Make links out of things like `<http://example.com/>`
# Must come after doAnchors, because you can use < and >
# delimiters in inline links like [this](<url>).
" doAutoLinks " => 30 ,
" encodeAmpsAndAngles " => 40 ,
" doItalicsAndBold " => 50 ,
" doHardBreaks " => 60 ,
);
function runSpanGamut ( $text ) {
#
# Run span gamut tranformations.
#
foreach ( $this -> span_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
return $text ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
function doHardBreaks ( $text ) {
# Do hard breaks:
2012-02-19 22:47:32 +01:00
return preg_replace_callback ( '/ {2,}\n/' ,
2011-04-11 14:59:26 +02:00
array ( & $this , '_doHardBreaks_callback' ), $text );
}
function _doHardBreaks_callback ( $matches ) {
return $this -> hashPart ( " <br $this->empty_element_suffix\n " );
}
function doAnchors ( $text ) {
#
# Turn Markdown link shortcuts into XHTML <a> tags.
#
if ( $this -> in_anchor ) return $text ;
$this -> in_anchor = true ;
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
#
# First, handle reference-style links: [link text] [id]
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
( '.$this->nested_brackets_re.' ) # link text = $2
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
( .* ? ) # id = $3
\ ]
)
} xs ' ,
array ( & $this , '_doAnchors_reference_callback' ), $text );
#
# Next, inline-style links: [link text](url "optional title")
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
( '.$this->nested_brackets_re.' ) # link text = $2
\ ]
\ ( # literal paren
2012-02-19 22:47:32 +01:00
[ \n ] *
2011-04-11 14:59:26 +02:00
( ? :
2012-02-19 22:47:32 +01:00
< ( .+ ? ) > # href = $3
2011-04-11 14:59:26 +02:00
|
( '.$this->nested_url_parenthesis_re.' ) # href = $4
)
2012-02-19 22:47:32 +01:00
[ \n ] *
2011-04-11 14:59:26 +02:00
( # $5
([ \ ' " ]) # quote char = $ 6
( .* ? ) # Title = $7
\6 # matching quote
2012-02-19 22:47:32 +01:00
[ \n ] * # ignore any spaces/tabs between closing quote and )
2011-04-11 14:59:26 +02:00
) ? # title is optional
\ )
)
} xs ' ,
2012-02-19 22:47:32 +01:00
array ( & $this , '_doAnchors_inline_callback' ), $text );
2011-04-11 14:59:26 +02:00
#
# Last, handle reference-style shortcuts: [link text]
2012-02-19 22:47:32 +01:00
# These must come last in case you've also got [link text][1]
# or [link text](/foo)
2011-04-11 14:59:26 +02:00
#
2012-02-19 22:47:32 +01:00
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
([ ^ \ [ \ ]] + ) # link text = $2; can\'t contain [ or ]
\ ]
)
} xs ' ,
array ( & $this , '_doAnchors_reference_callback' ), $text );
2011-04-11 14:59:26 +02:00
$this -> in_anchor = false ;
return $text ;
}
function _doAnchors_reference_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$link_text = $matches [ 2 ];
$link_id =& $matches [ 3 ];
if ( $link_id == " " ) {
# for shortcut links like [this][] or [this].
$link_id = $link_text ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# lower-case and turn embedded newlines into spaces
$link_id = strtolower ( $link_id );
$link_id = preg_replace ( '{[ ]?\n}' , ' ' , $link_id );
if ( isset ( $this -> urls [ $link_id ])) {
$url = $this -> urls [ $link_id ];
$url = $this -> encodeAttribute ( $url );
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$result = " <a href= \" $url\ " " ;
if ( isset ( $this -> titles [ $link_id ] ) ) {
$title = $this -> titles [ $link_id ];
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$link_text = $this -> runSpanGamut ( $link_text );
$result .= " > $link_text </a> " ;
$result = $this -> hashPart ( $result );
}
else {
$result = $whole_match ;
}
return $result ;
}
function _doAnchors_inline_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$link_text = $this -> runSpanGamut ( $matches [ 2 ]);
$url = $matches [ 3 ] == '' ? $matches [ 4 ] : $matches [ 3 ];
$title =& $matches [ 7 ];
$url = $this -> encodeAttribute ( $url );
$result = " <a href= \" $url\ " " ;
if ( isset ( $title )) {
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ;
}
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$link_text = $this -> runSpanGamut ( $link_text );
$result .= " > $link_text </a> " ;
return $this -> hashPart ( $result );
}
function doImages ( $text ) {
#
# Turn Markdown image shortcuts into <img> tags.
#
#
# First, handle reference-style labeled images: ![alt text][id]
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
( '.$this->nested_brackets_re.' ) # alt text = $2
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
( .* ? ) # id = $3
\ ]
)
2012-02-19 22:47:32 +01:00
} xs ' ,
2011-04-11 14:59:26 +02:00
array ( & $this , '_doImages_reference_callback' ), $text );
#
# Next, handle inline images: ![alt text](url "optional title")
# Don't forget: encode * and _
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
( '.$this->nested_brackets_re.' ) # alt text = $2
\ ]
\s ? # One optional whitespace character
\ ( # literal paren
2012-02-19 22:47:32 +01:00
[ \n ] *
2011-04-11 14:59:26 +02:00
( ? :
< ( \S * ) > # src url = $3
|
( '.$this->nested_url_parenthesis_re.' ) # src url = $4
)
2012-02-19 22:47:32 +01:00
[ \n ] *
2011-04-11 14:59:26 +02:00
( # $5
([ \ ' " ]) # quote char = $ 6
( .* ? ) # title = $7
\6 # matching quote
2012-02-19 22:47:32 +01:00
[ \n ] *
2011-04-11 14:59:26 +02:00
) ? # title is optional
\ )
)
} xs ' ,
array ( & $this , '_doImages_inline_callback' ), $text );
return $text ;
}
function _doImages_reference_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$link_id = strtolower ( $matches [ 3 ]);
if ( $link_id == " " ) {
$link_id = strtolower ( $alt_text ); # for shortcut links like ![this][].
}
$alt_text = $this -> encodeAttribute ( $alt_text );
if ( isset ( $this -> urls [ $link_id ])) {
$url = $this -> encodeAttribute ( $this -> urls [ $link_id ]);
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $this -> titles [ $link_id ])) {
$title = $this -> titles [ $link_id ];
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ;
}
$result .= $this -> empty_element_suffix ;
$result = $this -> hashPart ( $result );
}
else {
# If there's no such link ID, leave intact:
$result = $whole_match ;
}
return $result ;
}
function _doImages_inline_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$url = $matches [ 3 ] == '' ? $matches [ 4 ] : $matches [ 3 ];
$title =& $matches [ 7 ];
$alt_text = $this -> encodeAttribute ( $alt_text );
$url = $this -> encodeAttribute ( $url );
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $title )) {
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ; # $title already quoted
}
$result .= $this -> empty_element_suffix ;
return $this -> hashPart ( $result );
}
function doHeaders ( $text ) {
# Setext-style headers:
# Header 1
# ========
2012-02-19 22:47:32 +01:00
#
2011-04-11 14:59:26 +02:00
# Header 2
# --------
#
$text = preg_replace_callback ( '{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx' ,
array ( & $this , '_doHeaders_callback_setext' ), $text );
# atx-style headers:
# # Header 1
# ## Header 2
# ## Header 2 with closing hashes ##
# ...
# ###### Header 6
#
$text = preg_replace_callback ( ' {
^ ( \ #{1,6}) # $1 = string of #\'s
[ ] *
( .+ ? ) # $2 = Header text
[ ] *
\ #* # optional closing #\'s (not counted)
\n +
} xm ' ,
array ( & $this , '_doHeaders_callback_atx' ), $text );
return $text ;
}
function _doHeaders_callback_setext ( $matches ) {
# Terrible hack to check we haven't found an empty list item.
if ( $matches [ 2 ] == '-' && preg_match ( '{^-(?: |$)}' , $matches [ 1 ]))
return $matches [ 0 ];
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
$level = $matches [ 2 ]{ 0 } == '=' ? 1 : 2 ;
$block = " <h $level > " . $this -> runSpanGamut ( $matches [ 1 ]) . " </h $level > " ;
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function _doHeaders_callback_atx ( $matches ) {
$level = strlen ( $matches [ 1 ]);
$block = " <h $level > " . $this -> runSpanGamut ( $matches [ 2 ]) . " </h $level > " ;
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function doLists ( $text ) {
#
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
$less_than_tab = $this -> tab_width - 1 ;
# Re-usable patterns to match list item bullets and number markers:
$marker_ul_re = '[*+-]' ;
2012-02-19 22:47:32 +01:00
$marker_ol_re = '\d+[\.]' ;
2011-04-11 14:59:26 +02:00
$marker_any_re = " (?: $marker_ul_re | $marker_ol_re ) " ;
2012-02-19 22:47:32 +01:00
$markers_relist = array (
$marker_ul_re => $marker_ol_re ,
$marker_ol_re => $marker_ul_re ,
);
2011-04-11 14:59:26 +02:00
2012-02-19 22:47:32 +01:00
foreach ( $markers_relist as $marker_re => $other_marker_re ) {
2011-04-11 14:59:26 +02:00
# Re-usable pattern to match any entirel ul or ol list:
$whole_list_re = '
( # $1 = whole list
( # $2
2012-02-19 22:47:32 +01:00
([ ]{ 0 , '.$less_than_tab.' }) # $3 = number of spaces
( '.$marker_re.' ) # $4 = first list item marker
2011-04-11 14:59:26 +02:00
[ ] +
)
( ? s :.+ ? )
2012-02-19 22:47:32 +01:00
( # $5
2011-04-11 14:59:26 +02:00
\z
|
\n { 2 ,}
( ? = \S )
( ? ! # Negative lookahead for another list item marker
[ ] *
'.$marker_re.' [ ] +
)
2012-02-19 22:47:32 +01:00
|
( ? = # Lookahead for another kind of list
\n
\3 # Must have the same indentation
'.$other_marker_re.' [ ] +
)
2011-04-11 14:59:26 +02:00
)
)
' ; // mx
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
2012-02-19 22:47:32 +01:00
2011-04-11 14:59:26 +02:00
if ( $this -> list_level ) {
$text = preg_replace_callback ( ' {
^
'.$whole_list_re.'
} mx ' ,
array ( & $this , '_doLists_callback' ), $text );
}