Dan's fix from MDL-7263
[moodle.git] / lib / markdown.php
blob9a1369cb415c882bcc6e89f4b4a1aa5a04200d4d
1 <?php
4 # PHP Markdown Extra - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2005 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
9 # Based on Markdown
10 # Copyright (c) 2004-2005 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
16 $md_empty_element_suffix, $md_tab_width,
17 $md_nested_brackets_depth, $md_nested_brackets,
18 $md_escape_table, $md_backslash_escape_table,
19 $md_list_level;
21 $MarkdownPHPVersion = 'Extra 1.0.1'; # Fri 9 Dec 2005
22 $MarkdownSyntaxVersion = '1.0.1'; # Sun 12 Dec 2004
26 # Global default settings:
28 $md_empty_element_suffix = " />"; # Change to ">" for HTML output
29 $md_tab_width = 4;
32 # WordPress settings:
34 $md_wp_posts = true; # Set to false to remove Markdown from posts.
35 $md_wp_comments = true; # Set to false to remove Markdown from comments.
38 # -- WordPress Plugin Interface -----------------------------------------------
40 Plugin Name: PHP Markdown Extra
41 Plugin URI: http://www.michelf.com/projects/php-markdown/
42 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
43 Version: Extra 1.0.1
44 Author: Michel Fortin
45 Author URI: http://www.michelf.com/
47 if (isset($wp_version)) {
48 # More details about how it works here:
49 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
51 # Post content and excerpts
52 if ($md_wp_posts) {
53 remove_filter('the_content', 'wpautop');
54 remove_filter('the_excerpt', 'wpautop');
55 add_filter('the_content', 'Markdown', 6);
56 add_filter('get_the_excerpt', 'Markdown', 6);
57 add_filter('get_the_excerpt', 'trim', 7);
58 add_filter('the_excerpt', 'md_add_p');
59 add_filter('the_excerpt_rss', 'md_strip_p');
61 remove_filter('content_save_pre', 'balanceTags', 50);
62 remove_filter('excerpt_save_pre', 'balanceTags', 50);
63 add_filter('the_content', 'balanceTags', 50);
64 add_filter('get_the_excerpt', 'balanceTags', 9);
66 function md_add_p($text) {
67 if (strlen($text) == 0) return;
68 if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
69 return '<p>'.$text.'</p>';
71 function md_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
74 # Comments
75 if ($md_wp_comments) {
76 remove_filter('comment_text', 'wpautop');
77 remove_filter('comment_text', 'make_clickable');
78 add_filter('pre_comment_content', 'Markdown', 6);
79 add_filter('pre_comment_content', 'md_hide_tags', 8);
80 add_filter('pre_comment_content', 'md_show_tags', 12);
81 add_filter('get_comment_text', 'Markdown', 6);
82 add_filter('get_comment_excerpt', 'Markdown', 6);
83 add_filter('get_comment_excerpt', 'md_strip_p', 7);
85 global $md_hidden_tags;
86 $md_hidden_tags = array(
87 '<p>' => md5('<p>'), '</p>' => md5('</p>'),
88 '<pre>' => md5('<pre>'), '</pre>'=> md5('</pre>'),
89 '<ol>' => md5('<ol>'), '</ol>' => md5('</ol>'),
90 '<ul>' => md5('<ul>'), '</ul>' => md5('</ul>'),
91 '<li>' => md5('<li>'), '</li>' => md5('</li>'),
94 function md_hide_tags($text) {
95 global $md_hidden_tags;
96 return str_replace(array_keys($md_hidden_tags),
97 array_values($md_hidden_tags), $text);
99 function md_show_tags($text) {
100 global $md_hidden_tags;
101 return str_replace(array_values($md_hidden_tags),
102 array_keys($md_hidden_tags), $text);
108 # -- bBlog Plugin Info --------------------------------------------------------
109 function identify_modifier_markdown() {
110 global $MarkdownPHPVersion;
111 return array(
112 'name' => 'markdown',
113 'type' => 'modifier',
114 'nicename' => 'PHP Markdown Extra',
115 'description' => 'A text-to-HTML conversion tool for web writers',
116 'authors' => 'Michel Fortin and John Gruber',
117 'licence' => 'GPL',
118 'version' => $MarkdownPHPVersion,
119 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
123 # -- Smarty Modifier Interface ------------------------------------------------
124 function smarty_modifier_markdown($text) {
125 return Markdown($text);
128 # -- Textile Compatibility Mode -----------------------------------------------
129 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
130 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
131 # Try to include PHP SmartyPants. Should be in the same directory.
132 @include_once 'smartypants.php';
133 # Fake Textile class. It calls Markdown instead.
134 class Textile {
135 function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
136 if ($lite == '' && $encode == '') $text = Markdown($text);
137 if (function_exists('SmartyPants')) $text = SmartyPants($text);
138 return $text;
146 # Globals:
149 # Regex to match balanced [brackets].
150 # Needed to insert a maximum bracked depth while converting to PHP.
151 $md_nested_brackets_depth = 6;
152 $md_nested_brackets =
153 str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
154 str_repeat('\])*', $md_nested_brackets_depth);
156 # Table of hash values for escaped characters:
157 $md_escape_table = array(
158 "\\" => md5("\\"),
159 "`" => md5("`"),
160 "*" => md5("*"),
161 "_" => md5("_"),
162 "{" => md5("{"),
163 "}" => md5("}"),
164 "[" => md5("["),
165 "]" => md5("]"),
166 "(" => md5("("),
167 ")" => md5(")"),
168 ">" => md5(">"),
169 "#" => md5("#"),
170 "+" => md5("+"),
171 "-" => md5("-"),
172 "." => md5("."),
173 "!" => md5("!"),
174 ":" => md5(":"),
175 "|" => md5("|"),
177 # Create an identical table but for escaped characters.
178 $md_backslash_escape_table;
179 foreach ($md_escape_table as $key => $char)
180 $md_backslash_escape_table["\\$key"] = $char;
184 function Markdown($text) {
186 # Main function. The order in which other subs are called here is
187 # essential. Link and image substitutions need to happen before
188 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
189 # and <img> tags get encoded.
191 # Clear the global hashes. If we don't clear these, you get conflicts
192 # from other articles when generating a page which contains more than
193 # one article (e.g. an index page that shows the N most recent
194 # articles):
195 global $md_urls, $md_titles, $md_html_blocks, $md_html_hashes;
196 $md_urls = array();
197 $md_titles = array();
198 $md_html_blocks = array();
199 $md_html_hashes = array();
201 # Standardize line endings:
202 # DOS to Unix and Mac to Unix
203 $text = str_replace(array("\r\n", "\r"), "\n", $text);
205 # Make sure $text ends with a couple of newlines:
206 $text .= "\n\n";
208 # Convert all tabs to spaces.
209 $text = _Detab($text);
211 # Turn block-level HTML blocks into hash entries
212 $text = _HashHTMLBlocks($text);
214 # Strip any lines consisting only of spaces and tabs.
215 # This makes subsequent regexen easier to write, because we can
216 # match consecutive blank lines with /\n+/ instead of something
217 # contorted like /[ \t]*\n+/ .
218 $text = preg_replace('/^[ \t]+$/m', '', $text);
220 # Strip link definitions, store in hashes.
221 $text = _StripLinkDefinitions($text);
223 $text = _RunBlockGamut($text, FALSE);
225 $text = _UnescapeSpecialChars($text);
227 return $text . "\n";
231 function _StripLinkDefinitions($text) {
233 # Strips link definitions from text, stores the URLs and titles in
234 # hash references.
236 global $md_tab_width;
237 $less_than_tab = $md_tab_width - 1;
239 # Link defs are in the form: ^[id]: url "optional title"
240 $text = preg_replace_callback('{
241 ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
242 [ \t]*
243 \n? # maybe *one* newline
244 [ \t]*
245 <?(\S+?)>? # url = $2
246 [ \t]*
247 \n? # maybe one newline
248 [ \t]*
250 (?<=\s) # lookbehind for whitespace
251 ["(]
252 (.+?) # title = $3
253 [")]
254 [ \t]*
255 )? # title is optional
256 (?:\n+|\Z)
257 }xm',
258 '_StripLinkDefinitions_callback',
259 $text);
260 return $text;
262 function _StripLinkDefinitions_callback($matches) {
263 global $md_urls, $md_titles;
264 $link_id = strtolower($matches[1]);
265 $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
266 if (isset($matches[3]))
267 $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
268 return ''; # String that will replace the block
272 function _HashHTMLBlocks($text) {
274 # Hashify HTML Blocks and "clean tags".
276 # We only want to do this for block-level HTML tags, such as headers,
277 # lists, and tables. That's because we still want to wrap <p>s around
278 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
279 # phrase emphasis, and spans. The list of tags we're looking for is
280 # hard-coded.
282 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
283 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
284 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
285 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
286 # These two functions are calling each other. It's recursive!
288 global $block_tags, $context_block_tags, $contain_span_tags,
289 $clean_tags, $auto_close_tags;
291 # Tags that are always treated as block tags:
292 $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
293 'form|fieldset|iframe|hr|legend';
295 # Tags treated as block tags only if the opening tag is alone on it's line:
296 $context_block_tags = 'script|noscript|math|ins|del';
298 # Tags where markdown="1" default to span mode:
299 $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend';
301 # Tags which must not have their contents modified, no matter where
302 # they appear:
303 $clean_tags = 'script|math';
305 # Tags that do not need to be closed.
306 $auto_close_tags = 'hr|img';
308 # Regex to match any tag.
309 global $tag_match;
310 $tag_match =
312 ( # $2: Capture hole tag.
313 </? # Any opening or closing tag.
314 [\w:$]+ # Tag name.
315 \s* # Whitespace.
317 ".*?" | # Double quotes (can contain `>`)
318 \'.*?\' | # Single quotes (can contain `>`)
319 .+? # Anything but quotes and `>`.
321 > # End of tag.
323 <!-- .*? --> # HTML Comment
325 <\? .*? \?> # Processing instruction
327 <!\[CDATA\[.*?\]\]> # CData Block
329 }xs';
332 # Call the HTML-in-Markdown hasher.
334 list($text, ) = _HashHTMLBlocks_InMarkdown($text);
336 return $text;
338 function _HashHTMLBlocks_InMarkdown($text, $indent = 0,
339 $enclosing_tag = '', $md_span = false)
342 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
344 # * $indent is the number of space to be ignored when checking for code
345 # blocks. This is important because if we don't take the indent into
346 # account, something like this (which looks right) won't work as expected:
348 # <div>
349 # <div markdown="1">
350 # Hello World. <-- Is this a Markdown code block or text?
351 # </div> <-- Is this a Markdown code block or a real tag?
352 # <div>
354 # If you don't like this, just don't indent the tag on which
355 # you apply the markdown="1" attribute.
357 # * If $enclosing_tag is not empty, stops at the first unmatched closing
358 # tag with that name. Nested tags supported.
360 # * If $md_span is true, text inside must treated as span. So any double
361 # newline will be replaced by a single newline so that it does not create
362 # paragraphs.
364 # Returns an array of that form: ( processed text , remaining text )
366 global $block_tags, $context_block_tags, $clean_tags, $auto_close_tags,
367 $tag_match;
369 if ($text === '') return array('', '');
371 # Regex to check for the presense of newlines around a block tag.
372 $newline_match_before = "/(?:^\n?|\n\n) *$/";
373 $newline_match_after =
375 ^ # Start of text following the tag.
376 (?:[ ]*<!--.*?-->)? # Optional comment.
377 [ ]*\n # Must be followed by newline.
378 }xs';
380 # Regex to match any tag.
381 $block_tag_match =
383 ( # $2: Capture hole tag.
384 </? # Any opening or closing tag.
385 (?: # Tag name.
386 '.$block_tags.' |
387 '.$context_block_tags.' |
388 '.$clean_tags.' |
389 (?!\s)'.$enclosing_tag.'
391 \s* # Whitespace.
393 ".*?" | # Double quotes (can contain `>`)
394 \'.*?\' | # Single quotes (can contain `>`)
395 .+? # Anything but quotes and `>`.
397 > # End of tag.
399 <!-- .*? --> # HTML Comment
401 <\? .*? \?> # Processing instruction
403 <!\[CDATA\[.*?\]\]> # CData Block
405 }xs';
408 $depth = 0; # Current depth inside the tag tree.
409 $parsed = ""; # Parsed text that will be returned.
412 # Loop through every tag until we find the closing tag of the parent
413 # or loop until reaching the end of text if no parent tag specified.
415 do {
417 # Split the text using the first $tag_match pattern found.
418 # Text before pattern will be first in the array, text after
419 # pattern will be at the end, and between will be any catches made
420 # by the pattern.
422 $parts = preg_split($block_tag_match, $text, 2,
423 PREG_SPLIT_DELIM_CAPTURE);
425 # If in Markdown span mode, replace any multiple newlines that would
426 # trigger a new paragraph.
427 if ($md_span) {
428 $parts[0] = preg_replace('/\n\n/', "\n", $parts[0]);
431 $parsed .= $parts[0]; # Text before current tag.
433 # If end of $text has been reached. Stop loop.
434 if (count($parts) < 3) {
435 $text = "";
436 break;
439 $tag = $parts[1]; # Tag to handle.
440 $text = $parts[2]; # Remaining text after current tag.
443 # Check for: Tag inside code block or span
445 if (# Find current paragraph
446 preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
448 # Then match in it either a code block...
449 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
450 '(?!\n)$/', $matches[1], $x) ||
451 # ...or unbalenced code span markers. (the regex matches balenced)
452 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
453 $matches[1])
456 # Tag is in code block or span and may not be a tag at all. So we
457 # simply skip the first char (should be a `<`).
458 $parsed .= $tag{0};
459 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
462 # Check for: Opening Block level tag or
463 # Opening Content Block tag (like ins and del)
464 # used as a block tag (tag is alone on it's line).
466 else if (preg_match("{^<(?:$block_tags)\b}", $tag) ||
467 ( preg_match("{^<(?:$context_block_tags)\b}", $tag) &&
468 preg_match($newline_match_before, $parsed) &&
469 preg_match($newline_match_after, $text) )
472 # Need to parse tag and following text using the HTML parser.
473 list($block_text, $text) =
474 _HashHTMLBlocks_InHTML($tag . $text,
475 "_HashHTMLBlocks_HashBlock", TRUE);
477 # Make sure it stays outside of any paragraph by adding newlines.
478 $parsed .= "\n\n$block_text\n\n";
481 # Check for: Clean tag (like script, math)
482 # HTML Comments, processing instructions.
484 else if (preg_match("{^<(?:$clean_tags)\b}", $tag) ||
485 $tag{1} == '!' || $tag{1} == '?')
487 # Need to parse tag and following text using the HTML parser.
488 # (don't check for markdown attribute)
489 list($block_text, $text) =
490 _HashHTMLBlocks_InHTML($tag . $text,
491 "_HashHTMLBlocks_HashClean", FALSE);
493 $parsed .= $block_text;
496 # Check for: Tag with same name as enclosing tag.
498 else if ($enclosing_tag !== '' &&
499 # Same name as enclosing tag.
500 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
503 # Increase/decrease nested tag count.
505 if ($tag{1} == '/') $depth--;
506 else if ($tag{strlen($tag)-2} != '/') $depth++;
508 if ($depth < 0) {
510 # Going out of parent element. Clean up and break so we
511 # return to the calling function.
513 $text = $tag . $text;
514 break;
517 $parsed .= $tag;
519 else {
520 $parsed .= $tag;
522 } while ($depth >= 0);
524 return array($parsed, $text);
526 function _HashHTMLBlocks_InHTML($text, $hash_function, $md_attr) {
528 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
530 # * Calls $hash_function to convert any blocks.
531 # * Stops when the first opening tag closes.
532 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
533 # (it is not inside clean tags)
535 # Returns an array of that form: ( processed text , remaining text )
537 global $auto_close_tags, $contain_span_tags, $tag_match;
539 if ($text === '') return array('', '');
541 # Regex to match `markdown` attribute inside of a tag.
542 $markdown_attr_match = '
544 \s* # Eat whitespace before the `markdown` attribute
545 markdown
546 \s*=\s*
547 (["\']) # $1: quote delimiter
548 (.*?) # $2: attribute value
549 \1 # matching delimiter
550 }xs';
552 $original_text = $text; # Save original text in case of faliure.
554 $depth = 0; # Current depth inside the tag tree.
555 $block_text = ""; # Temporary text holder for current text.
556 $parsed = ""; # Parsed text that will be returned.
559 # Get the name of the starting tag.
561 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
562 $base_tag_name = $matches[1];
565 # Loop through every tag until we find the corresponding closing tag.
567 do {
569 # Split the text using the first $tag_match pattern found.
570 # Text before pattern will be first in the array, text after
571 # pattern will be at the end, and between will be any catches made
572 # by the pattern.
574 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
576 if (count($parts) < 3) {
578 # End of $text reached with unbalenced tag(s).
579 # In that case, we return original text unchanged and pass the
580 # first character as filtered to prevent an infinite loop in the
581 # parent function.
583 return array($original_text{0}, substr($original_text, 1));
586 $block_text .= $parts[0]; # Text before current tag.
587 $tag = $parts[1]; # Tag to handle.
588 $text = $parts[2]; # Remaining text after current tag.
591 # Check for: Auto-close tag (like <hr/>)
592 # Comments and Processing Instructions.
594 if (preg_match("{^</?(?:$auto_close_tags)\b}", $tag) ||
595 $tag{1} == '!' || $tag{1} == '?')
597 # Just add the tag to the block as if it was text.
598 $block_text .= $tag;
600 else {
602 # Increase/decrease nested tag count. Only do so if
603 # the tag's name match base tag's.
605 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
606 if ($tag{1} == '/') $depth--;
607 else if ($tag{strlen($tag)-2} != '/') $depth++;
611 # Check for `markdown="1"` attribute and handle it.
613 if ($md_attr &&
614 preg_match($markdown_attr_match, $tag, $attr_matches) &&
615 preg_match('/^(?:1|block|span)$/', $attr_matches[2]))
617 # Remove `markdown` attribute from opening tag.
618 $tag = preg_replace($markdown_attr_match, '', $tag);
620 # Check if text inside this tag must be parsed in span mode.
621 $md_mode = $attr_matches[2];
622 $span_mode = $md_mode == 'span' || $md_mode != 'block' &&
623 preg_match("{^<(?:$contain_span_tags)\b}", $tag);
625 # Calculate indent before tag.
626 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
627 $indent = strlen($matches[1]);
629 # End preceding block with this tag.
630 $block_text .= $tag;
631 $parsed .= $hash_function($block_text, $span_mode);
633 # Get enclosing tag name for the ParseMarkdown function.
634 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
635 $tag_name = $matches[1];
637 # Parse the content using the HTML-in-Markdown parser.
638 list ($block_text, $text)
639 = _HashHTMLBlocks_InMarkdown($text, $indent,
640 $tag_name, $span_mode);
642 # Outdent markdown text.
643 if ($indent > 0) {
644 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
645 $block_text);
648 # Append tag content to parsed text.
649 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
650 else $parsed .= "$block_text";
652 # Start over a new block.
653 $block_text = "";
655 else $block_text .= $tag;
658 } while ($depth > 0);
661 # Hash last block text that wasn't processed inside the loop.
663 $parsed .= $hash_function($block_text);
665 return array($parsed, $text);
667 function _HashHTMLBlocks_HashBlock($text) {
668 global $md_html_hashes, $md_html_blocks;
669 $key = md5($text);
670 $md_html_hashes[$key] = $text;
671 $md_html_blocks[$key] = $text;
672 return $key; # String that will replace the tag.
674 function _HashHTMLBlocks_HashClean($text) {
675 global $md_html_hashes;
676 $key = md5($text);
677 $md_html_hashes[$key] = $text;
678 return $key; # String that will replace the clean tag.
682 function _HashBlock($text) {
684 # Called whenever a tag must be hashed. When a function insert a block-level
685 # tag in $text, it pass through this function and is automaticaly escaped,
686 # which remove the need to call _HashHTMLBlocks at every step.
688 # Swap back any tag hash found in $text so we do not have to _UnhashTags
689 # multiple times at the end. Must do this because of
690 $text = _UnhashTags($text);
692 # Then hash the block as normal.
693 return _HashHTMLBlocks_HashBlock($text);
697 function _RunBlockGamut($text, $hash_html_blocks = TRUE) {
699 # These are all the transformations that form block-level
700 # tags like paragraphs, headers, and list items.
702 if ($hash_html_blocks) {
703 # We need to escape raw HTML in Markdown source before doing anything
704 # else. This need to be done for each block, and not only at the
705 # begining in the Markdown function since hashed blocks can be part of
706 # a list item and could have been indented. Indented blocks would have
707 # been seen as a code block in previous pass of _HashHTMLBlocks.
708 $text = _HashHTMLBlocks($text);
711 $text = _DoHeaders($text);
712 $text = _DoTables($text);
714 # Do Horizontal Rules:
715 global $md_empty_element_suffix;
716 $text = preg_replace(
717 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}emx',
718 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}emx',
719 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}emx'),
720 "_HashBlock('\n<hr$md_empty_element_suffix\n')",
721 $text);
723 $text = _DoLists($text);
724 $text = _DoDefLists($text);
725 $text = _DoCodeBlocks($text);
726 $text = _DoBlockQuotes($text);
727 $text = _FormParagraphs($text);
729 return $text;
733 function _RunSpanGamut($text) {
735 # These are all the transformations that occur *within* block-level
736 # tags like paragraphs, headers, and list items.
738 global $md_empty_element_suffix;
740 $text = _DoCodeSpans($text);
742 $text = _EscapeSpecialChars($text);
744 # Process anchor and image tags. Images must come first,
745 # because ![foo][f] looks like an anchor.
746 $text = _DoImages($text);
747 $text = _DoAnchors($text);
749 # Make links out of things like `<http://example.com/>`
750 # Must come after _DoAnchors(), because you can use < and >
751 # delimiters in inline links like [this](<url>).
752 $text = _DoAutoLinks($text);
753 $text = _EncodeAmpsAndAngles($text);
754 $text = _DoItalicsAndBold($text);
756 # Do hard breaks:
757 $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
759 return $text;
763 function _EscapeSpecialChars($text) {
764 global $md_escape_table;
765 $tokens = _TokenizeHTML($text);
767 $text = ''; # rebuild $text from the tokens
768 # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
769 # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
771 foreach ($tokens as $cur_token) {
772 if ($cur_token[0] == 'tag') {
773 # Within tags, encode * and _ so they don't conflict
774 # with their use in Markdown for italics and strong.
775 # We're replacing each such character with its
776 # corresponding MD5 checksum value; this is likely
777 # overkill, but it should prevent us from colliding
778 # with the escape values by accident.
779 $cur_token[1] = str_replace(array('*', '_'),
780 array($md_escape_table['*'], $md_escape_table['_']),
781 $cur_token[1]);
782 $text .= $cur_token[1];
783 } else {
784 $t = $cur_token[1];
785 $t = _EncodeBackslashEscapes($t);
786 $text .= $t;
789 return $text;
793 function _DoAnchors($text) {
795 # Turn Markdown link shortcuts into XHTML <a> tags.
797 global $md_nested_brackets;
799 # First, handle reference-style links: [link text] [id]
801 $text = preg_replace_callback("{
802 ( # wrap whole match in $1
804 ($md_nested_brackets) # link text = $2
807 [ ]? # one optional space
808 (?:\\n[ ]*)? # one optional newline followed by spaces
811 (.*?) # id = $3
814 }xs",
815 '_DoAnchors_reference_callback', $text);
818 # Next, inline-style links: [link text](url "optional title")
820 $text = preg_replace_callback("{
821 ( # wrap whole match in $1
823 ($md_nested_brackets) # link text = $2
825 \\( # literal paren
826 [ \\t]*
827 <?(.*?)>? # href = $3
828 [ \\t]*
829 ( # $4
830 (['\"]) # quote char = $5
831 (.*?) # Title = $6
832 \\5 # matching quote
833 )? # title is optional
836 }xs",
837 '_DoAnchors_inline_callback', $text);
839 return $text;
841 function _DoAnchors_reference_callback($matches) {
842 global $md_urls, $md_titles, $md_escape_table;
843 $whole_match = $matches[1];
844 $link_text = $matches[2];
845 $link_id = strtolower($matches[3]);
847 if ($link_id == "") {
848 $link_id = strtolower($link_text); # for shortcut links like [this][].
851 if (isset($md_urls[$link_id])) {
852 $url = $md_urls[$link_id];
853 # We've got to encode these to avoid conflicting with italics/bold.
854 $url = str_replace(array('*', '_'),
855 array($md_escape_table['*'], $md_escape_table['_']),
856 $url);
857 $result = "<a href=\"$url\"";
858 if ( isset( $md_titles[$link_id] ) ) {
859 $title = $md_titles[$link_id];
860 $title = str_replace(array('*', '_'),
861 array($md_escape_table['*'],
862 $md_escape_table['_']), $title);
863 $result .= " title=\"$title\"";
865 $result .= ">$link_text</a>";
867 else {
868 $result = $whole_match;
870 return $result;
872 function _DoAnchors_inline_callback($matches) {
873 global $md_escape_table;
874 $whole_match = $matches[1];
875 $link_text = $matches[2];
876 $url = $matches[3];
877 $title =& $matches[6];
879 # We've got to encode these to avoid conflicting with italics/bold.
880 $url = str_replace(array('*', '_'),
881 array($md_escape_table['*'], $md_escape_table['_']),
882 $url);
883 $result = "<a href=\"$url\"";
884 if (isset($title)) {
885 $title = str_replace('"', '&quot;', $title);
886 $title = str_replace(array('*', '_'),
887 array($md_escape_table['*'], $md_escape_table['_']),
888 $title);
889 $result .= " title=\"$title\"";
892 $result .= ">$link_text</a>";
894 return $result;
898 function _DoImages($text) {
900 # Turn Markdown image shortcuts into <img> tags.
902 global $md_nested_brackets;
905 # First, handle reference-style labeled images: ![alt text][id]
907 $text = preg_replace_callback('{
908 ( # wrap whole match in $1
910 ('.$md_nested_brackets.') # alt text = $2
913 [ ]? # one optional space
914 (?:\n[ ]*)? # one optional newline followed by spaces
917 (.*?) # id = $3
921 }xs',
922 '_DoImages_reference_callback', $text);
925 # Next, handle inline images: ![alt text](url "optional title")
926 # Don't forget: encode * and _
928 $text = preg_replace_callback('{
929 ( # wrap whole match in $1
931 ('.$md_nested_brackets.') # alt text = $2
933 \( # literal paren
934 [ \t]*
935 <?(\S+?)>? # src url = $3
936 [ \t]*
937 ( # $4
938 ([\'"]) # quote char = $5
939 (.*?) # title = $6
940 \5 # matching quote
941 [ \t]*
942 )? # title is optional
945 }xs',
946 '_DoImages_inline_callback', $text);
948 return $text;
950 function _DoImages_reference_callback($matches) {
951 global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
952 $whole_match = $matches[1];
953 $alt_text = $matches[2];
954 $link_id = strtolower($matches[3]);
956 if ($link_id == "") {
957 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
960 $alt_text = str_replace('"', '&quot;', $alt_text);
961 if (isset($md_urls[$link_id])) {
962 $url = $md_urls[$link_id];
963 # We've got to encode these to avoid conflicting with italics/bold.
964 $url = str_replace(array('*', '_'),
965 array($md_escape_table['*'], $md_escape_table['_']),
966 $url);
967 $result = "<img src=\"$url\" alt=\"$alt_text\"";
968 if (isset($md_titles[$link_id])) {
969 $title = $md_titles[$link_id];
970 $title = str_replace(array('*', '_'),
971 array($md_escape_table['*'],
972 $md_escape_table['_']), $title);
973 $result .= " title=\"$title\"";
975 $result .= $md_empty_element_suffix;
977 else {
978 # If there's no such link ID, leave intact:
979 $result = $whole_match;
982 return $result;
984 function _DoImages_inline_callback($matches) {
985 global $md_empty_element_suffix, $md_escape_table;
986 $whole_match = $matches[1];
987 $alt_text = $matches[2];
988 $url = $matches[3];
989 $title = '';
990 if (isset($matches[6])) {
991 $title = $matches[6];
994 $alt_text = str_replace('"', '&quot;', $alt_text);
995 $title = str_replace('"', '&quot;', $title);
996 # We've got to encode these to avoid conflicting with italics/bold.
997 $url = str_replace(array('*', '_'),
998 array($md_escape_table['*'], $md_escape_table['_']),
999 $url);
1000 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1001 if (isset($title)) {
1002 $title = str_replace(array('*', '_'),
1003 array($md_escape_table['*'], $md_escape_table['_']),
1004 $title);
1005 $result .= " title=\"$title\""; # $title already quoted
1007 $result .= $md_empty_element_suffix;
1009 return $result;
1013 function _DoHeaders($text) {
1014 # Setext-style headers:
1015 # Header 1
1016 # ========
1018 # Header 2
1019 # --------
1021 $text = preg_replace(
1022 array('{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }emx',
1023 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }emx'),
1024 array("_HashBlock('<h1'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
1025 '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>'
1026 ) . '\n\n'",
1027 "_HashBlock('<h2'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
1028 '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>'
1029 ) . '\n\n'"),
1030 $text);
1032 # atx-style headers:
1033 # # Header 1
1034 # ## Header 2
1035 # ## Header 2 with closing hashes ##
1036 # ...
1037 # ###### Header 6
1039 $text = preg_replace('{
1040 ^(\#{1,6}) # $1 = string of #\'s
1041 [ \t]*
1042 (.+?) # $2 = Header text
1043 [ \t]*
1044 \#* # optional closing #\'s (not counted)
1045 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\}[ ]*)? # id attribute
1047 }xme',
1048 "_HashBlock(
1049 '<h'.strlen('\\1'). ('\\3'? ' id=\"'._UnslashQuotes('\\3').'\"':'').'>'.
1050 _RunSpanGamut(_UnslashQuotes('\\2')).
1051 '</h'.strlen('\\1').'>'
1052 ) . '\n\n'",
1053 $text);
1055 return $text;
1059 function _DoTables($text) {
1061 # Form HTML tables.
1063 global $md_tab_width;
1064 $less_than_tab = $md_tab_width - 1;
1066 # Find tables with leading pipe.
1068 # | Header 1 | Header 2
1069 # | -------- | --------
1070 # | Cell 1 | Cell 2
1071 # | Cell 3 | Cell 4
1073 $text = preg_replace_callback('
1075 ^ # Start of a line
1076 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1077 [|] # Optional leading pipe (present)
1078 (.+) \n # $1: Header row (at least one pipe)
1080 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1081 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
1083 ( # $3: Cells
1085 [ ]* # Allowed whitespace.
1086 [|] .* \n # Row content.
1089 (?=\n|\Z) # Stop at final double newline.
1090 }xm',
1091 '_DoTable_LeadingPipe_callback', $text);
1094 # Find tables without leading pipe.
1096 # Header 1 | Header 2
1097 # -------- | --------
1098 # Cell 1 | Cell 2
1099 # Cell 3 | Cell 4
1101 $text = preg_replace_callback('
1103 ^ # Start of a line
1104 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1105 (\S.*[|].*) \n # $1: Header row (at least one pipe)
1107 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1108 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
1110 ( # $3: Cells
1112 .* [|] .* \n # Row content
1115 (?=\n|\Z) # Stop at final double newline.
1116 }xm',
1117 '_DoTable_callback', $text);
1119 return $text;
1121 function _DoTable_LeadingPipe_callback($matches) {
1122 $head = $matches[1];
1123 $underline = $matches[2];
1124 $content = $matches[3];
1126 # Remove leading pipe for each row.
1127 $content = preg_replace('/^ *[|]/m', '', $content);
1129 return _DoTable_callback(array($matches[0], $head, $underline, $content));
1131 function _DoTable_callback($matches) {
1132 $head = $matches[1];
1133 $underline = $matches[2];
1134 $content = $matches[3];
1136 # Remove any tailing pipes for each line.
1137 $head = preg_replace('/[|] *$/m', '', $head);
1138 $underline = preg_replace('/[|] *$/m', '', $underline);
1139 $content = preg_replace('/[|] *$/m', '', $content);
1141 # Reading alignement from header underline.
1142 $separators = preg_split('/ *[|] */', $underline);
1143 foreach ($separators as $n => $s) {
1144 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
1145 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
1146 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
1147 else $attr[$n] = '';
1150 # Creating code spans before splitting the row is an easy way to
1151 # handle a code span containg pipes.
1152 $head = _DoCodeSpans($head);
1153 $headers = preg_split('/ *[|] */', $head);
1154 $col_count = count($headers);
1156 # Write column headers.
1157 $text = "<table>\n";
1158 $text .= "<thead>\n";
1159 $text .= "<tr>\n";
1160 foreach ($headers as $n => $header)
1161 $text .= " <th$attr[$n]>"._RunSpanGamut(trim($header))."</th>\n";
1162 $text .= "</tr>\n";
1163 $text .= "</thead>\n";
1165 # Split content by row.
1166 $rows = explode("\n", trim($content, "\n"));
1168 $text .= "<tbody>\n";
1169 foreach ($rows as $row) {
1170 # Creating code spans before splitting the row is an easy way to
1171 # handle a code span containg pipes.
1172 $row = _DoCodeSpans($row);
1174 # Split row by cell.
1175 $row_cells = preg_split('/ *[|] */', $row, $col_count);
1176 $row_cells = array_pad($row_cells, $col_count, '');
1178 $text .= "<tr>\n";
1179 foreach ($row_cells as $n => $cell)
1180 $text .= " <td$attr[$n]>"._RunSpanGamut(trim($cell))."</td>\n";
1181 $text .= "</tr>\n";
1183 $text .= "</tbody>\n";
1184 $text .= "</table>";
1186 return _HashBlock($text) . "\n";
1190 function _DoLists($text) {
1192 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1194 global $md_tab_width, $md_list_level;
1195 $less_than_tab = $md_tab_width - 1;
1197 # Re-usable patterns to match list item bullets and number markers:
1198 $marker_ul = '[*+-]';
1199 $marker_ol = '\d+[.]';
1200 $marker_any = "(?:$marker_ul|$marker_ol)";
1202 $markers = array($marker_ul, $marker_ol);
1204 foreach ($markers as $marker) {
1205 # Re-usable pattern to match any entirel ul or ol list:
1206 $whole_list = '
1207 ( # $1 = whole list
1208 ( # $2
1209 [ ]{0,'.$less_than_tab.'}
1210 ('.$marker.') # $3 = first list item marker
1211 [ \t]+
1213 (?s:.+?)
1214 ( # $4
1217 \n{2,}
1218 (?=\S)
1219 (?! # Negative lookahead for another list item marker
1220 [ \t]*
1221 '.$marker.'[ \t]+
1225 '; // mx
1227 # We use a different prefix before nested lists than top-level lists.
1228 # See extended comment in _ProcessListItems().
1230 if ($md_list_level) {
1231 $text = preg_replace_callback('{
1233 '.$whole_list.'
1234 }mx',
1235 '_DoLists_callback', $text);
1237 else {
1238 $text = preg_replace_callback('{
1239 (?:(?<=\n\n)|\A\n?)
1240 '.$whole_list.'
1241 }mx',
1242 '_DoLists_callback', $text);
1246 return $text;
1248 function _DoLists_callback($matches) {
1249 # Re-usable patterns to match list item bullets and number markers:
1250 $marker_ul = '[*+-]';
1251 $marker_ol = '\d+[.]';
1252 $marker_any = "(?:$marker_ul|$marker_ol)";
1254 $list = $matches[1];
1255 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
1257 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
1259 # Turn double returns into triple returns, so that we can make a
1260 # paragraph for the last item in a list, if necessary:
1261 $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
1262 $result = _ProcessListItems($list, $marker_any);
1263 $result = "<$list_type>\n" . $result . "</$list_type>";
1264 return "\n" . _HashBlock($result) . "\n\n";
1268 function _ProcessListItems($list_str, $marker_any) {
1270 # Process the contents of a single ordered or unordered list, splitting it
1271 # into individual list items.
1273 global $md_list_level;
1275 # The $md_list_level global keeps track of when we're inside a list.
1276 # Each time we enter a list, we increment it; when we leave a list,
1277 # we decrement. If it's zero, we're not in a list anymore.
1279 # We do this because when we're not inside a list, we want to treat
1280 # something like this:
1282 # I recommend upgrading to version
1283 # 8. Oops, now this line is treated
1284 # as a sub-list.
1286 # As a single paragraph, despite the fact that the second line starts
1287 # with a digit-period-space sequence.
1289 # Whereas when we're inside a list (or sub-list), that line will be
1290 # treated as the start of a sub-list. What a kludge, huh? This is
1291 # an aspect of Markdown's syntax that's hard to parse perfectly
1292 # without resorting to mind-reading. Perhaps the solution is to
1293 # change the syntax rules such that sub-lists must start with a
1294 # starting cardinal number; e.g. "1." or "a.".
1296 $md_list_level++;
1298 # trim trailing blank lines:
1299 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1301 $list_str = preg_replace_callback('{
1302 (\n)? # leading line = $1
1303 (^[ \t]*) # leading whitespace = $2
1304 ('.$marker_any.') [ \t]+ # list marker = $3
1305 ((?s:.+?) # list item text = $4
1306 (\n{1,2}))
1307 (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
1308 }xm',
1309 '_ProcessListItems_callback', $list_str);
1311 $md_list_level--;
1312 return $list_str;
1314 function _ProcessListItems_callback($matches) {
1315 $item = $matches[4];
1316 $leading_line =& $matches[1];
1317 $leading_space =& $matches[2];
1319 if ($leading_line || preg_match('/\n{2,}/', $item)) {
1320 $item = _RunBlockGamut(_Outdent($item));
1322 else {
1323 # Recursion for sub-lists:
1324 $item = _DoLists(_Outdent($item));
1325 $item = preg_replace('/\n+$/', '', $item);
1326 $item = _RunSpanGamut($item);
1329 return "<li>" . $item . "</li>\n";
1333 function _DoDefLists($text) {
1335 # Form HTML definition lists.
1337 global $md_tab_width;
1338 $less_than_tab = $md_tab_width - 1;
1340 # Re-usable patterns to match list item bullets and number markers:
1342 # Re-usable pattern to match any entire dl list:
1343 $whole_list = '
1344 ( # $1 = whole list
1345 ( # $2
1346 [ ]{0,'.$less_than_tab.'}
1347 ((?>.*\S.*\n)+) # $3 = defined term
1349 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1351 (?s:.+?)
1352 ( # $4
1355 \n{2,}
1356 (?=\S)
1357 (?! # Negative lookahead for another term
1358 [ ]{0,'.$less_than_tab.'}
1359 (?: \S.*\n )+? # defined term
1361 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1363 (?! # Negative lookahead for another definition
1364 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1368 '; // mx
1370 $text = preg_replace_callback('{
1371 (?:(?<=\n\n)|\A\n?)
1372 '.$whole_list.'
1373 }mx',
1374 '_DoDefLists_callback', $text);
1376 return $text;
1378 function _DoDefLists_callback($matches) {
1379 # Re-usable patterns to match list item bullets and number markers:
1380 $list = $matches[1];
1382 # Turn double returns into triple returns, so that we can make a
1383 # paragraph for the last item in a list, if necessary:
1384 $result = trim(_ProcessDefListItems($list));
1385 $result = "<dl>\n" . $result . "\n</dl>";
1386 return _HashBlock($result) . "\n\n";
1390 function _ProcessDefListItems($list_str) {
1392 # Process the contents of a single ordered or unordered list, splitting it
1393 # into individual list items.
1395 global $md_tab_width;
1396 $less_than_tab = $md_tab_width - 1;
1398 # trim trailing blank lines:
1399 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1401 # Process definition terms.
1402 $list_str = preg_replace_callback('{
1403 (?:\n\n+|\A\n?) # leading line
1404 ( # definition terms = $1
1405 [ ]{0,'.$less_than_tab.'} # leading whitespace
1406 (?![:][ ]|[ ]) # negative lookahead for a definition
1407 # mark (colon) or more whitespace.
1408 (?: \S.* \n)+? # actual term (not whitespace).
1410 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
1411 # with a definition mark.
1412 }xm',
1413 '_ProcessDefListItems_callback_dt', $list_str);
1415 # Process actual definitions.
1416 $list_str = preg_replace_callback('{
1417 \n(\n+)? # leading line = $1
1418 [ ]{0,'.$less_than_tab.'} # whitespace before colon
1419 [:][ ]+ # definition mark (colon)
1420 ((?s:.+?)) # definition text = $2
1421 (?= \n+ # stop at next definition mark,
1422 (?: # next term or end of text
1423 [ ]{0,'.$less_than_tab.'} [:][ ] |
1424 <dt> | \z
1427 }xm',
1428 '_ProcessDefListItems_callback_dd', $list_str);
1430 return $list_str;
1432 function _ProcessDefListItems_callback_dt($matches) {
1433 $terms = explode("\n", trim($matches[1]));
1434 $text = '';
1435 foreach ($terms as $term) {
1436 $term = _RunSpanGamut(trim($term));
1437 $text .= "\n<dt>" . $term . "</dt>";
1439 return $text . "\n";
1441 function _ProcessDefListItems_callback_dd($matches) {
1442 $leading_line = $matches[1];
1443 $def = $matches[2];
1445 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1446 $def = _RunBlockGamut(_Outdent($def . "\n\n"));
1447 $def = "\n". $def ."\n";
1449 else {
1450 $def = rtrim($def);
1451 $def = _RunSpanGamut(_Outdent($def));
1454 return "\n<dd>" . $def . "</dd>\n";
1458 function _DoCodeBlocks($text) {
1460 # Process Markdown `<pre><code>` blocks.
1462 global $md_tab_width;
1463 $text = preg_replace_callback('{
1464 (?:\n\n|\A)
1465 ( # $1 = the code block -- one or more lines, starting with a space/tab
1467 (?:[ ]{'.$md_tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
1468 .*\n+
1471 ((?=^[ ]{0,'.$md_tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1472 }xm',
1473 '_DoCodeBlocks_callback', $text);
1475 return $text;
1477 function _DoCodeBlocks_callback($matches) {
1478 $codeblock = $matches[1];
1480 $codeblock = _EncodeCode(_Outdent($codeblock));
1481 // $codeblock = _Detab($codeblock);
1482 # trim leading newlines and trailing whitespace
1483 $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
1485 $result = "<pre><code>" . $codeblock . "\n</code></pre>";
1487 return "\n\n" . _HashBlock($result) . "\n\n";
1491 function _DoCodeSpans($text) {
1493 # * Backtick quotes are used for <code></code> spans.
1495 # * You can use multiple backticks as the delimiters if you want to
1496 # include literal backticks in the code span. So, this input:
1498 # Just type ``foo `bar` baz`` at the prompt.
1500 # Will translate to:
1502 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1504 # There's no arbitrary limit to the number of backticks you
1505 # can use as delimters. If you need three consecutive backticks
1506 # in your code, use four for delimiters, etc.
1508 # * You can use spaces to get literal backticks at the edges:
1510 # ... type `` `bar` `` ...
1512 # Turns to:
1514 # ... type <code>`bar`</code> ...
1516 $text = preg_replace_callback('@
1517 (?<!\\\) # Character before opening ` can\'t be a backslash
1518 (`+) # $1 = Opening run of `
1519 (.+?) # $2 = The code block
1520 (?<!`)
1521 \1 # Matching closer
1522 (?!`)
1523 @xs',
1524 '_DoCodeSpans_callback', $text);
1526 return $text;
1528 function _DoCodeSpans_callback($matches) {
1529 $c = $matches[2];
1530 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
1531 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
1532 $c = _EncodeCode($c);
1533 return "<code>$c</code>";
1537 function _EncodeCode($_) {
1539 # Encode/escape certain characters inside Markdown code runs.
1540 # The point is that in code, these characters are literals,
1541 # and lose their special Markdown meanings.
1543 global $md_escape_table;
1545 # Encode all ampersands; HTML entities are not
1546 # entities within a Markdown code span.
1547 $_ = str_replace('&', '&amp;', $_);
1549 # Do the angle bracket song and dance:
1550 $_ = str_replace(array('<', '>'),
1551 array('&lt;', '&gt;'), $_);
1553 # Now, escape characters that are magic in Markdown:
1554 $_ = str_replace(array_keys($md_escape_table),
1555 array_values($md_escape_table), $_);
1557 return $_;
1561 function _DoItalicsAndBold($text) {
1562 # <strong> must go first:
1563 $text = preg_replace(array(
1565 ( (?<!\w) __ ) # $1: Marker (not preceded by alphanum)
1566 (?=\S) # Not followed by whitespace
1567 (?!__) # or two others marker chars.
1568 ( # $2: Content
1570 [^_]+? # Anthing not em markers.
1572 # Balence any regular _ emphasis inside.
1573 (?<![a-zA-Z0-9])_ (?=\S) (?! _) (.+?)
1574 (?<=\S) _ (?![a-zA-Z0-9])
1577 (?<=\S) __ # End mark not preceded by whitespace.
1578 (?!\w) # Not followed by alphanum.
1579 }sx',
1581 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
1582 (?=\S) # Not followed by whitespace
1583 (?!\1) # or two others marker chars.
1584 ( # $2: Content
1586 [^*]+? # Anthing not em markers.
1588 # Balence any regular * emphasis inside.
1589 \* (?=\S) (?! \*) (.+?) (?<=\S) \*
1592 (?<=\S) \*\* # End mark not preceded by whitespace.
1593 }sx',
1595 '<strong>\2</strong>', $text);
1596 # Then <em>:
1597 $text = preg_replace(array(
1598 '{ ( (?<!\w) _ ) (?=\S) (?! _) (.+?) (?<=\S) _ (?!\w) }sx',
1599 '{ ( (?<!\*)\* ) (?=\S) (?! \*) (.+?) (?<=\S) \* }sx',
1601 '<em>\2</em>', $text);
1603 return $text;
1607 function _DoBlockQuotes($text) {
1608 $text = preg_replace_callback('/
1609 ( # Wrap whole match in $1
1611 ^[ \t]*>[ \t]? # ">" at the start of a line
1612 .+\n # rest of the first line
1613 (.+\n)* # subsequent consecutive lines
1614 \n* # blanks
1617 /xm',
1618 '_DoBlockQuotes_callback', $text);
1620 return $text;
1622 function _DoBlockQuotes_callback($matches) {
1623 $bq = $matches[1];
1624 # trim one level of quoting - trim whitespace-only lines
1625 $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
1626 $bq = _RunBlockGamut($bq); # recurse
1628 $bq = preg_replace('/^/m', " ", $bq);
1629 # These leading spaces screw with <pre> content, so we need to fix that:
1630 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1631 '_DoBlockQuotes_callback2', $bq);
1633 return _HashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1635 function _DoBlockQuotes_callback2($matches) {
1636 $pre = $matches[1];
1637 $pre = preg_replace('/^ /m', '', $pre);
1638 return $pre;
1642 function _FormParagraphs($text) {
1644 # Params:
1645 # $text - string to process with html <p> tags
1647 global $md_html_blocks, $md_html_hashes;
1649 # Strip leading and trailing lines:
1650 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1652 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1655 # Wrap <p> tags and unhashify HTML blocks
1657 foreach ($grafs as $key => $value) {
1658 $value = trim(_RunSpanGamut($value));
1660 # Check if this should be enclosed in a paragraph.
1661 # Text equaling to a clean tag hash are not enclosed.
1662 # Text starting with a block tag hash are not either.
1663 $clean_key = $value;
1664 $block_key = substr($value, 0, 32);
1666 $is_p = (!isset($md_html_blocks[$block_key]) &&
1667 !isset($md_html_hashes[$clean_key]));
1669 if ($is_p) {
1670 $value = "<p>$value</p>";
1672 $grafs[$key] = $value;
1675 # Join grafs in one text, then unhash HTML tags.
1676 $text = implode("\n\n", $grafs);
1678 # Finish by removing any tag hashes still present in $text.
1679 $text = _UnhashTags($text);
1681 return $text;
1685 function _EncodeAmpsAndAngles($text) {
1686 # Smart processing for ampersands and angle brackets that need to be encoded.
1688 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1689 # http://bumppo.net/projects/amputator/
1690 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1691 '&amp;', $text);;
1693 # Encode naked <'s
1694 $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
1696 return $text;
1700 function _EncodeBackslashEscapes($text) {
1702 # Parameter: String.
1703 # Returns: The string, with after processing the following backslash
1704 # escape sequences.
1706 global $md_escape_table, $md_backslash_escape_table;
1707 # Must process escaped backslashes first.
1708 return str_replace(array_keys($md_backslash_escape_table),
1709 array_values($md_backslash_escape_table), $text);
1713 function _DoAutoLinks($text) {
1714 $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
1715 '<a href="\1">\1</a>', $text);
1717 # Email addresses: <address@domain.foo>
1718 $text = preg_replace('{
1720 (?:mailto:)?
1722 [-.\w]+
1724 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1727 }exi',
1728 "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
1729 $text);
1731 return $text;
1735 function _EncodeEmailAddress($addr) {
1737 # Input: an email address, e.g. "foo@example.com"
1739 # Output: the email address as a mailto link, with each character
1740 # of the address encoded as either a decimal or hex entity, in
1741 # the hopes of foiling most address harvesting spam bots. E.g.:
1743 # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1744 # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1745 # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1747 # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
1748 # mailing list: <http://tinyurl.com/yu7ue>
1750 $addr = "mailto:" . $addr;
1751 $length = strlen($addr);
1753 # leave ':' alone (to spot mailto: later)
1754 $addr = preg_replace_callback('/([^\:])/',
1755 '_EncodeEmailAddress_callback', $addr);
1757 $addr = "<a href=\"$addr\">$addr</a>";
1758 # strip the mailto: from the visible part
1759 $addr = preg_replace('/">.+?:/', '">', $addr);
1761 return $addr;
1763 function _EncodeEmailAddress_callback($matches) {
1764 $char = $matches[1];
1765 $r = rand(0, 100);
1766 # roughly 10% raw, 45% hex, 45% dec
1767 # '@' *must* be encoded. I insist.
1768 if ($r > 90 && $char != '@') return $char;
1769 if ($r < 45) return '&#x'.dechex(ord($char)).';';
1770 return '&#'.ord($char).';';
1774 function _UnescapeSpecialChars($text) {
1776 # Swap back in all the special characters we've hidden.
1778 global $md_escape_table;
1779 return str_replace(array_values($md_escape_table),
1780 array_keys($md_escape_table), $text);
1784 function _UnhashTags($text) {
1786 # Swap back in all the tags hashed by _HashHTMLBlocks.
1788 global $md_html_hashes;
1789 return str_replace(array_keys($md_html_hashes),
1790 array_values($md_html_hashes), $text);
1794 # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
1795 # We only define it if it is not already defined.
1796 if (!function_exists('_TokenizeHTML')) :
1797 function _TokenizeHTML($str) {
1799 # Parameter: String containing HTML markup.
1800 # Returns: An array of the tokens comprising the input
1801 # string. Each token is either a tag (possibly with nested,
1802 # tags contained therein, such as <a href="<MTFoo>">, or a
1803 # run of text between tags. Each element of the array is a
1804 # two-element array; the first is either 'tag' or 'text';
1805 # the second is the actual value.
1808 # Regular expression derived from the _tokenize() subroutine in
1809 # Brad Choate's MTRegex plugin.
1810 # <http://www.bradchoate.com/past/mtregex.php>
1812 $index = 0;
1813 $tokens = array();
1815 $match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
1816 '(?s:<\?.*?\?>)|'. # processing instruction
1817 # regular tags
1818 '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
1820 $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1822 foreach ($parts as $part) {
1823 if (++$index % 2 && $part != '')
1824 $tokens[] = array('text', $part);
1825 else
1826 $tokens[] = array('tag', $part);
1829 return $tokens;
1831 endif;
1834 function _Outdent($text) {
1836 # Remove one level of line-leading tabs or spaces
1838 global $md_tab_width;
1839 return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
1843 function _Detab($text) {
1845 # Replace tabs with the appropriate amount of space.
1847 global $md_tab_width;
1849 # For each line we separate the line in blocks delemited by
1850 # tab characters. Then we reconstruct every line by adding the
1851 # appropriate number of space between each blocks.
1853 $lines = explode("\n", $text);
1854 $text = "";
1856 foreach ($lines as $line) {
1857 # Split in blocks.
1858 $blocks = explode("\t", $line);
1859 # Add each blocks to the line.
1860 $line = $blocks[0];
1861 unset($blocks[0]); # Do not add first block twice.
1862 foreach ($blocks as $block) {
1863 # Calculate amount of space, insert spaces, insert block.
1864 $amount = $md_tab_width - strlen($line) % $md_tab_width;
1865 $line .= str_repeat(" ", $amount) . $block;
1867 $text .= "$line\n";
1869 return $text;
1873 function _UnslashQuotes($text) {
1875 # This function is useful to remove automaticaly slashed double quotes
1876 # when using preg_replace and evaluating an expression.
1877 # Parameter: String.
1878 # Returns: The string with any slash-double-quote (\") sequence replaced
1879 # by a single double quote.
1881 return str_replace('\"', '"', $text);
1887 PHP Markdown Extra
1888 ==================
1890 Description
1891 -----------
1893 This is a PHP translation of the original Markdown formatter written in
1894 Perl by John Gruber. This special version of PHP Markdown also include
1895 syntax additions by myself.
1897 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1898 easy-to-write structured text format into HTML. Markdown's text format
1899 is most similar to that of plain text email, and supports features such
1900 as headers, *emphasis*, code blocks, blockquotes, and links.
1902 Markdown's syntax is designed not as a generic markup language, but
1903 specifically to serve as a front-end to (X)HTML. You can use span-level
1904 HTML tags anywhere in a Markdown document, and you can use block level
1905 HTML tags (like <div> and <table> as well).
1907 For more information about Markdown's syntax, see:
1909 <http://daringfireball.net/projects/markdown/>
1912 Bugs
1913 ----
1915 To file bug reports please send email to:
1917 <michel.fortin@michelf.com>
1919 Please include with your report: (1) the example input; (2) the output you
1920 expected; (3) the output Markdown actually produced.
1923 Version History
1924 ---------------
1926 See Readme file for details.
1928 Extra 1.0.1 - 9 December 2005
1930 Extra 1.0 - 5 September 2005
1932 Extra 1.0b4 - 1 August 2005
1934 Extra 1.0b3 - 29 July 2005
1936 Extra 1.0b2 - 26 July 2005
1938 Extra 1.0b1 - 25 July 2005
1941 Author & Contributors
1942 ---------------------
1944 Original Markdown in Perl by John Gruber
1945 <http://daringfireball.net/>
1947 PHP port and extras by Michel Fortin
1948 <http://www.michelf.com/>
1951 Copyright and License
1952 ---------------------
1954 Copyright (c) 2004-2005 Michel Fortin
1955 <http://www.michelf.com/>
1956 All rights reserved.
1958 Based on Markdown
1959 Copyright (c) 2003-2004 John Gruber
1960 <http://daringfireball.net/>
1961 All rights reserved.
1963 Redistribution and use in source and binary forms, with or without
1964 modification, are permitted provided that the following conditions are
1965 met:
1967 * Redistributions of source code must retain the above copyright notice,
1968 this list of conditions and the following disclaimer.
1970 * Redistributions in binary form must reproduce the above copyright
1971 notice, this list of conditions and the following disclaimer in the
1972 documentation and/or other materials provided with the distribution.
1974 * Neither the name "Markdown" nor the names of its contributors may
1975 be used to endorse or promote products derived from this software
1976 without specific prior written permission.
1978 This software is provided by the copyright holders and contributors "as
1979 is" and any express or implied warranties, including, but not limited
1980 to, the implied warranties of merchantability and fitness for a
1981 particular purpose are disclaimed. In no event shall the copyright owner
1982 or contributors be liable for any direct, indirect, incidental, special,
1983 exemplary, or consequential damages (including, but not limited to,
1984 procurement of substitute goods or services; loss of use, data, or
1985 profits; or business interruption) however caused and on any theory of
1986 liability, whether in contract, strict liability, or tort (including
1987 negligence or otherwise) arising in any way out of the use of this
1988 software, even if advised of the possibility of such damage.