Updated version with bug fixes (see bug 2179)
[moodle.git] / lib / markdown.php
blobf52a30f584b87060206f0261597307cbead0b0d1
1 <?php
4 # Markdown - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004 John Gruber
7 # <http://daringfireball.net/projects/markdown/>
9 # Copyright (c) 2004 Michel Fortin - Translation to PHP
10 # <http://www.michelf.com/projects/php-markdown/>
15 global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
16 $md_empty_element_suffix, $md_tab_width,
17 $md_nested_brackets_depth, $md_nested_brackets,
18 $md_escape_table, $md_backslash_escape_table;
21 $MarkdownPHPVersion = '1.0'; # Sat 21 Aug 2004
22 $MarkdownSyntaxVersion = '1.0'; # Fri 20 Aug 2004
26 # Global default settings:
28 $md_empty_element_suffix = " />"; # Change to ">" for HTML output
29 $md_tab_width = 4;
32 # -- WordPress Plugin Interface -----------------------------------------------
34 Plugin Name: Markdown
35 Plugin URI: http://www.michelf.com/projects/php-markdown/
36 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
37 Version: 1.0
38 Author: Michel Fortin
39 Author URI: http://www.michelf.com/
41 if (isset($wp_version)) {
42 # Remove default WordPress auto-paragraph filter.
43 remove_filter('the_content', 'wpautop');
44 remove_filter('the_excerpt', 'wpautop');
45 remove_filter('comment_text', 'wpautop');
46 # Add Markdown filter with priority 6 (same as Textile).
47 add_filter('the_content', 'Markdown', 6);
48 add_filter('the_excerpt', 'Markdown', 6);
49 add_filter('comment_text', 'Markdown', 6);
52 # -- bBlog Plugin Info --------------------------------------------------------
53 function identify_modifier_markdown() {
54 global $MarkdownPHPVersion;
55 return array(
56 'name' => 'markdown',
57 'type' => 'modifier',
58 'nicename' => 'Markdown',
59 'description' => 'A text-to-HTML conversion tool for web writers',
60 'authors' => 'Michel Fortin and John Gruber',
61 'licence' => 'GPL',
62 'version' => $MarkdownPHPVersion,
63 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
67 # -- Smarty Modifier Interface ------------------------------------------------
68 function smarty_modifier_markdown($text) {
69 return Markdown($text);
72 # -- Textile Compatibility Mode -----------------------------------------------
73 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
74 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
75 # Try to include PHP SmartyPants. Should be in the same directory.
76 @include_once 'smartypants.php';
77 # Fake Textile class. It calls Markdown instead.
78 class Textile {
79 function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
80 if ($lite == '' && $encode == '') $text = Markdown($text);
81 if (function_exists('SmartyPants')) $text = SmartyPants($text);
82 return $text;
90 # Globals:
93 # Regex to match balanced [brackets].
94 # Needed to insert a maximum bracked depth while converting to PHP.
95 $md_nested_brackets_depth = 6;
96 $md_nested_brackets =
97 str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
98 str_repeat('\])*', $md_nested_brackets_depth);
100 # Table of hash values for escaped characters:
101 $md_escape_table = array(
102 "\\" => md5("\\"),
103 "`" => md5("`"),
104 "*" => md5("*"),
105 "_" => md5("_"),
106 "{" => md5("{"),
107 "}" => md5("}"),
108 "[" => md5("["),
109 "]" => md5("]"),
110 "(" => md5("("),
111 ")" => md5(")"),
112 "#" => md5("#"),
113 "." => md5("."),
114 "!" => md5("!")
116 # Create an identical table but for escaped characters.
117 $md_backslash_escape_table;
118 foreach ($md_escape_table as $key => $char)
119 $md_backslash_escape_table["\\$key"] = $char;
122 function Markdown($text) {
124 # Main function. The order in which other subs are called here is
125 # essential. Link and image substitutions need to happen before
126 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
127 # and <img> tags get encoded.
129 # Clear the global hashes. If we don't clear these, you get conflicts
130 # from other articles when generating a page which contains more than
131 # one article (e.g. an index page that shows the N most recent
132 # articles):
133 global $md_urls, $md_titles, $md_html_blocks;
134 $md_urls = array();
135 $md_titles = array();
136 $md_html_blocks = array();
138 # Standardize line endings:
139 # DOS to Unix and Mac to Unix
140 $text = str_replace(array("\r\n", "\r"), "\n", $text);
142 # Make sure $text ends with a couple of newlines:
143 $text .= "\n\n";
145 # Convert all tabs to spaces.
146 $text = _Detab($text);
148 # Strip any lines consisting only of spaces and tabs.
149 # This makes subsequent regexen easier to write, because we can
150 # match consecutive blank lines with /\n+/ instead of something
151 # contorted like /[ \t]*\n+/ .
152 $text = preg_replace('/^[ \t]+$/m', '', $text);
154 # Turn block-level HTML blocks into hash entries
155 $text = _HashHTMLBlocks($text);
157 # Strip link definitions, store in hashes.
158 $text = _StripLinkDefinitions($text);
160 # _EscapeSpecialChars() must be called very early, to get
161 # backslash escapes processed.
162 $text = _EscapeSpecialChars($text);
164 $text = _RunBlockGamut($text);
166 $text = _UnescapeSpecialChars($text);
168 return $text . "\n";
172 function _StripLinkDefinitions($text) {
174 # Strips link definitions from text, stores the URLs and titles in
175 # hash references.
177 # Link defs are in the form: ^[id]: url "optional title"
178 $text = preg_replace_callback('{
179 ^[ \t]*\[(.+)\]: # id = $1
180 [ \t]*
181 \n? # maybe *one* newline
182 [ \t]*
183 <?(\S+?)>? # url = $2
184 [ \t]*
185 \n? # maybe one newline
186 [ \t]*
188 # Todo: Titles are delimited by "quotes" or (parens).
189 ["(]
190 (.+?) # title = $3
191 [")]
192 [ \t]*
193 )? # title is optional
194 (?:\n+|\Z)
195 }xm',
196 '_StripLinkDefinitions_callback',
197 $text);
198 return $text;
200 function _StripLinkDefinitions_callback($matches) {
201 global $md_urls, $md_titles;
202 $link_id = strtolower($matches[1]);
203 $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
204 if (isset($matches[3]))
205 $md_titles[$link_id] = htmlentities($matches[3]);
206 return ''; # String that will replace the block
210 function _HashHTMLBlocks($text) {
211 # Hashify HTML blocks:
212 # We only want to do this for block-level HTML tags, such as headers,
213 # lists, and tables. That's because we still want to wrap <p>s around
214 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
215 # phrase emphasis, and spans. The list of tags we're looking for is
216 # hard-coded:
217 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
218 'script|noscript|form|fieldset|iframe|math|ins|del';
219 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
220 'script|noscript|form|fieldset|iframe|math';
222 # First, look for nested blocks, e.g.:
223 # <div>
224 # <div>
225 # tags for inner block must be indented.
226 # </div>
227 # </div>
229 # The outermost tags must start at the left margin for this to match, and
230 # the inner nested divs must be indented.
231 # We need to do this before the next, more liberal match, because the next
232 # match will start at the first `<div>` and stop at the first `</div>`.
233 $text = preg_replace_callback("{
234 ( # save in $1
235 ^ # start of line (with /m)
236 <($block_tags_a) # start tag = $2
237 \\b # word break
238 (.*\\n)*? # any number of lines, minimally matching
239 </\\2> # the matching end tag
240 [ \\t]* # trailing spaces/tabs
241 (?=\\n+|\\Z) # followed by a newline or end of document
243 }xm",
244 '_HashHTMLBlocks_callback',
245 $text);
248 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
250 $text = preg_replace_callback("{
251 ( # save in $1
252 ^ # start of line (with /m)
253 <($block_tags_b) # start tag = $2
254 \\b # word break
255 (.*\\n)*? # any number of lines, minimally matching
256 .*</\\2> # the matching end tag
257 [ \\t]* # trailing spaces/tabs
258 (?=\\n+|\\Z) # followed by a newline or end of document
260 }xm",
261 '_HashHTMLBlocks_callback',
262 $text);
264 # Special case just for <hr />. It was easier to make a special case than
265 # to make the other regex more complicated.
266 $text = preg_replace_callback('{
268 (?<=\n\n) # Starting after a blank line
269 | # or
270 \A\n? # the beginning of the doc
272 ( # save in $1
273 [ \t]*
274 <(hr) # start tag = $2
275 \b # word break
276 ([^<>])*? #
277 /?> # the matching end tag
278 (?=\n{2,}|\Z) # followed by a blank line or end of document
280 }x',
281 '_HashHTMLBlocks_callback',
282 $text);
284 return $text;
286 function _HashHTMLBlocks_callback($matches) {
287 global $md_html_blocks;
288 $text = $matches[1];
289 $key = md5($text);
290 $md_html_blocks[$key] = $text;
291 return "\n\n$key\n\n"; # String that will replace the block
295 function _RunBlockGamut($text) {
297 # These are all the transformations that form block-level
298 # tags like paragraphs, headers, and list items.
300 global $md_empty_element_suffix;
302 $text = _DoHeaders($text);
304 # Do Horizontal Rules:
305 $text = preg_replace(
306 array('/^( ?\* ?){3,}$/m',
307 '/^( ?- ?){3,}$/m',
308 '/^( ?_ ?){3,}$/m'),
309 "\n<hr$md_empty_element_suffix\n",
310 $text);
312 $text = _DoLists($text);
314 $text = _DoCodeBlocks($text);
316 $text = _DoBlockQuotes($text);
318 # Make links out of things like `<http://example.com/>`
319 $text = _DoAutoLinks($text);
321 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
322 # was to escape raw HTML in the original Markdown source. This time,
323 # we're escaping the markup we've just created, so that we don't wrap
324 # <p> tags around block-level tags.
325 $text = _HashHTMLBlocks($text);
327 $text = _FormParagraphs($text);
329 return $text;
333 function _RunSpanGamut($text) {
335 # These are all the transformations that occur *within* block-level
336 # tags like paragraphs, headers, and list items.
338 global $md_empty_element_suffix;
339 $text = _DoCodeSpans($text);
341 # Fix unencoded ampersands and <'s:
342 $text = _EncodeAmpsAndAngles($text);
344 # Process anchor and image tags. Images must come first,
345 # because ![foo][f] looks like an anchor.
346 $text = _DoImages($text);
347 $text = _DoAnchors($text);
350 $text = _DoItalicsAndBold($text);
352 # Do hard breaks:
353 $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
355 return $text;
359 function _EscapeSpecialChars($text) {
360 global $md_escape_table;
361 $tokens = _TokenizeHTML($text);
363 $text = ''; # rebuild $text from the tokens
364 # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
365 # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
367 foreach ($tokens as $cur_token) {
368 if ($cur_token[0] == 'tag') {
369 # Within tags, encode * and _ so they don't conflict
370 # with their use in Markdown for italics and strong.
371 # We're replacing each such character with its
372 # corresponding MD5 checksum value; this is likely
373 # overkill, but it should prevent us from colliding
374 # with the escape values by accident.
375 $cur_token[1] = str_replace(array('*', '_'),
376 array($md_escape_table['*'], $md_escape_table['_']),
377 $cur_token[1]);
378 $text .= $cur_token[1];
379 } else {
380 $t = $cur_token[1];
381 $t = _EncodeBackslashEscapes($t);
382 $text .= $t;
385 return $text;
389 function _DoAnchors($text) {
391 # Turn Markdown link shortcuts into XHTML <a> tags.
393 global $md_nested_brackets;
395 # First, handle reference-style links: [link text] [id]
397 $text = preg_replace_callback("{
398 ( # wrap whole match in $1
400 ($md_nested_brackets) # link text = $2
403 [ ]? # one optional space
404 (?:\\n[ ]*)? # one optional newline followed by spaces
407 (.*?) # id = $3
410 }xs",
411 '_DoAnchors_reference_callback', $text);
414 # Next, inline-style links: [link text](url "optional title")
416 $text = preg_replace_callback("{
417 ( # wrap whole match in $1
419 ($md_nested_brackets) # link text = $2
421 \\( # literal paren
422 [ \\t]*
423 <?(.+?)>? # href = $3
424 [ \\t]*
425 ( # $4
426 (['\"]) # quote char = $5
427 (.*?) # Title = $6
428 \\5 # matching quote
429 )? # title is optional
432 }xs",
433 '_DoAnchors_inline_callback', $text);
435 return $text;
437 function _DoAnchors_reference_callback($matches) {
438 global $md_urls, $md_titles, $md_escape_table;
439 $whole_match = $matches[1];
440 $link_text = $matches[2];
441 $link_id = strtolower($matches[3]);
443 if ($link_id == "") {
444 $link_id = strtolower($link_text); # for shortcut links like [this][].
447 if (isset($md_urls[$link_id])) {
448 $url = $md_urls[$link_id];
449 # We've got to encode these to avoid conflicting with italics/bold.
450 $url = str_replace(array('*', '_'),
451 array($md_escape_table['*'], $md_escape_table['_']),
452 $url);
453 $result = "<a href=\"$url\"";
454 if ( isset( $md_titles[$link_id] ) ) {
455 $title = $md_titles[$link_id];
456 $title = str_replace(array('*', '_'),
457 array($md_escape_table['*'],
458 $md_escape_table['_']), $title);
459 $result .= " title=\"$title\"";
461 $result .= ">$link_text</a>";
463 else {
464 $result = $whole_match;
466 return $result;
468 function _DoAnchors_inline_callback($matches) {
469 global $md_escape_table;
470 $whole_match = $matches[1];
471 $link_text = $matches[2];
472 $url = $matches[3];
473 $title = $matches[6];
475 # We've got to encode these to avoid conflicting with italics/bold.
476 $url = str_replace(array('*', '_'),
477 array($md_escape_table['*'], $md_escape_table['_']),
478 $url);
479 $result = "<a href=\"$url\"";
480 if (isset($title)) {
481 $title = str_replace('"', '&quot', $title);
482 $title = str_replace(array('*', '_'),
483 array($md_escape_table['*'], $md_escape_table['_']),
484 $title);
485 $result .= " title=\"$title\"";
488 $result .= ">$link_text</a>";
490 return $result;
494 function _DoImages($text) {
496 # Turn Markdown image shortcuts into <img> tags.
499 # First, handle reference-style labeled images: ![alt text][id]
501 $text = preg_replace_callback('{
502 ( # wrap whole match in $1
504 (.*?) # alt text = $2
507 [ ]? # one optional space
508 (?:\n[ ]*)? # one optional newline followed by spaces
511 (.*?) # id = $3
515 }xs',
516 '_DoImages_reference_callback', $text);
519 # Next, handle inline images: ![alt text](url "optional title")
520 # Don't forget: encode * and _
522 $text = preg_replace_callback("{
523 ( # wrap whole match in $1
524 !\\[
525 (.*?) # alt text = $2
527 \\( # literal paren
528 [ \\t]*
529 <?(\S+?)>? # src url = $3
530 [ \\t]*
531 ( # $4
532 (['\"]) # quote char = $5
533 (.*?) # title = $6
534 \\5 # matching quote
535 [ \\t]*
536 )? # title is optional
539 }xs",
540 '_DoImages_inline_callback', $text);
542 return $text;
544 function _DoImages_reference_callback($matches) {
545 global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
546 $whole_match = $matches[1];
547 $alt_text = $matches[2];
548 $link_id = strtolower($matches[3]);
550 if ($link_id == "") {
551 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
554 $alt_text = str_replace('"', '&quot;', $alt_text);
555 if (isset($md_urls[$link_id])) {
556 $url = $md_urls[$link_id];
557 # We've got to encode these to avoid conflicting with italics/bold.
558 $url = str_replace(array('*', '_'),
559 array($md_escape_table['*'], $md_escape_table['_']),
560 $url);
561 $result = "<img src=\"$url\" alt=\"$alt_text\"";
562 if (isset($md_titles[$link_id])) {
563 $title = $md_titles[$link_id];
564 $title = str_replace(array('*', '_'),
565 array($md_escape_table['*'],
566 $md_escape_table['_']), $title);
567 $result .= " title=\"$title\"";
569 $result .= $md_empty_element_suffix;
571 else {
572 # If there's no such link ID, leave intact:
573 $result = $whole_match;
576 return $result;
578 function _DoImages_inline_callback($matches) {
579 global $md_empty_element_suffix, $md_escape_table;
580 $whole_match = $matches[1];
581 $alt_text = $matches[2];
582 $url = $matches[3];
583 $title = '';
584 if (isset($matches[6])) {
585 $title = $matches[6];
588 $alt_text = str_replace('"', '&quot;', $alt_text);
589 $title = str_replace('"', '&quot;', $title);
590 # We've got to encode these to avoid conflicting with italics/bold.
591 $url = str_replace(array('*', '_'),
592 array($md_escape_table['*'], $md_escape_table['_']),
593 $url);
594 $result = "<img src=\"$url\" alt=\"$alt_text\"";
595 if (isset($title)) {
596 $title = str_replace(array('*', '_'),
597 array($md_escape_table['*'], $md_escape_table['_']),
598 $title);
599 $result .= " title=\"$title\""; # $title already quoted
601 $result .= $md_empty_element_suffix;
603 return $result;
607 function _DoHeaders($text) {
608 # Setext-style headers:
609 # Header 1
610 # ========
612 # Header 2
613 # --------
615 $text = preg_replace(
616 array("/(.+)[ \t]*\n=+[ \t]*\n+/e",
617 "/(.+)[ \t]*\n-+[ \t]*\n+/e"),
618 array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
619 "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
620 $text);
622 # atx-style headers:
623 # # Header 1
624 # ## Header 2
625 # ## Header 2 with closing hashes ##
626 # ...
627 # ###### Header 6
629 $text = preg_replace("{
630 ^(\\#{1,6}) # $1 = string of #'s
631 [ \\t]*
632 (.+?) # $2 = Header text
633 [ \\t]*
634 \\#* # optional closing #'s (not counted)
635 \\n+
636 }xme",
637 "'<h'.strlen('\\1').'>'._RunSpanGamut(_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
638 $text);
640 return $text;
644 function _DoLists($text) {
646 # Form HTML ordered (numbered) and unordered (bulleted) lists.
648 global $md_tab_width;
649 $less_than_tab = $md_tab_width - 1;
651 # Re-usable patterns to match list item bullets and number markers:
652 $marker_ul = '[*+-]';
653 $marker_ol = '\d+[.]';
654 $marker_any = "(?:$marker_ul|$marker_ol)";
656 $text = preg_replace_callback("{
657 ( # $1
658 ( # $2
659 ^[ ]{0,$less_than_tab}
660 ($marker_any) # $3 - first list item marker
661 [ \\t]+
663 (?s:.+?)
664 ( # $4
667 \\n{2,}
668 (?=\\S)
669 (?! # Negative lookahead for another list item marker
670 [ \\t]*
671 {$marker_any}[ \\t]+
675 }xm",
676 '_DoLists_callback', $text);
678 return $text;
680 function _DoLists_callback($matches) {
681 # Re-usable patterns to match list item bullets and number markers:
682 $marker_ul = '[*+-]';
683 $marker_ol = '\d+[.]';
684 $marker_any = "(?:$marker_ul|$marker_ol)";
686 $list = $matches[1];
687 $list_type = preg_match('/[*+-]/', $matches[3]) ? "ul" : "ol";
688 # Turn double returns into triple returns, so that we can make a
689 # paragraph for the last item in a list, if necessary:
690 $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
691 $result = _ProcessListItems($list, $marker_any);
692 $result = "<$list_type>\n" . $result . "</$list_type>\n\n";
693 return $result;
697 function _ProcessListItems($list_str, $marker_any) {
698 # trim trailing blank lines:
699 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
701 $list_str = preg_replace_callback('{
702 (\n)? # leading line = $1
703 (^[ \t]*) # leading whitespace = $2
704 ('.$marker_any.') [ \t]+ # list marker = $3
705 ((?s:.+?) # list item text = $4
706 (\n{1,2}))
707 (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
708 }xm',
709 '_ProcessListItems_callback', $list_str);
711 return $list_str;
713 function _ProcessListItems_callback($matches) {
714 $item = $matches[4];
715 $leading_line = $matches[1];
716 $leading_space = $matches[2];
718 if ($leading_line || preg_match('/\n{2,}/', $item)) {
719 $item = _RunBlockGamut(_Outdent($item));
720 #$item =~ s/\n+/\n/g;
722 else {
723 # Recursion for sub-lists:
724 $item = _DoLists(_Outdent($item));
725 $item = rtrim($item, "\n");
726 $item = _RunSpanGamut($item);
729 return "<li>" . $item . "</li>\n";
733 function _DoCodeBlocks($text) {
735 # Process Markdown `<pre><code>` blocks.
737 global $md_tab_width;
738 $text = preg_replace_callback("{
739 (?:\\n\\n|\\A)
740 ( # $1 = the code block -- one or more lines, starting with a space/tab
742 (?:[ ]\{$md_tab_width} | \\t) # Lines must start with a tab or a tab-width of spaces
743 .*\\n+
746 ((?=^[ ]{0,$md_tab_width}\\S)|\\Z) # Lookahead for non-space at line-start, or end of doc
747 }xm",
748 '_DoCodeBlocks_callback', $text);
750 return $text;
752 function _DoCodeBlocks_callback($matches) {
753 $codeblock = $matches[1];
755 $codeblock = _EncodeCode(_Outdent($codeblock));
756 $codeblock = _Detab($codeblock);
757 # trim leading newlines and trailing whitespace
758 $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
760 $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
762 return $result;
766 function _DoCodeSpans($text) {
768 # * Backtick quotes are used for <code></code> spans.
770 # * You can use multiple backticks as the delimiters if you want to
771 # include literal backticks in the code span. So, this input:
773 # Just type ``foo `bar` baz`` at the prompt.
775 # Will translate to:
777 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
779 # There's no arbitrary limit to the number of backticks you
780 # can use as delimters. If you need three consecutive backticks
781 # in your code, use four for delimiters, etc.
783 # * You can use spaces to get literal backticks at the edges:
785 # ... type `` `bar` `` ...
787 # Turns to:
789 # ... type <code>`bar`</code> ...
791 $text = preg_replace_callback("@
792 (`+) # $1 = Opening run of `
793 (.+?) # $2 = The code block
794 (?<!`)
796 (?!`)
797 @xs",
798 '_DoCodeSpans_callback', $text);
800 return $text;
802 function _DoCodeSpans_callback($matches) {
803 $c = $matches[2];
804 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
805 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
806 $c = _EncodeCode($c);
807 return "<code>$c</code>";
811 function _EncodeCode($_) {
813 # Encode/escape certain characters inside Markdown code runs.
814 # The point is that in code, these characters are literals,
815 # and lose their special Markdown meanings.
817 global $md_escape_table;
819 # Encode all ampersands; HTML entities are not
820 # entities within a Markdown code span.
821 $_ = str_replace('&', '&amp;', $_);
823 # Do the angle bracket song and dance:
824 $_ = str_replace(array('<', '>'),
825 array('&lt;', '&gt;'), $_);
827 # Now, escape characters that are magic in Markdown:
828 $_ = str_replace(array_keys($md_escape_table),
829 array_values($md_escape_table), $_);
831 return $_;
835 function _DoItalicsAndBold($text) {
836 # <strong> must go first:
837 $text = preg_replace('{ (\*\*|__) (?=\S) (.+?) (?<=\S) \1 }sx',
838 '<strong>\2</strong>', $text);
839 # Then <em>:
840 $text = preg_replace('{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx',
841 '<em>\2</em>', $text);
843 return $text;
847 function _DoBlockQuotes($text) {
848 $text = preg_replace_callback('/
849 ( # Wrap whole match in $1
851 ^[ \t]*>[ \t]? # ">" at the start of a line
852 .+\n # rest of the first line
853 (.+\n)* # subsequent consecutive lines
854 \n* # blanks
857 /xm',
858 '_DoBlockQuotes_callback', $text);
860 return $text;
862 function _DoBlockQuotes_callback($matches) {
863 $bq = $matches[1];
864 # trim one level of quoting - trim whitespace-only lines
865 $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
866 $bq = _RunBlockGamut($bq); # recurse
868 $bq = preg_replace('/^/m', " ", $bq);
869 # These leading spaces screw with <pre> content, so we need to fix that:
870 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
871 '_DoBlockQuotes_callback2', $bq);
873 return "<blockquote>\n$bq\n</blockquote>\n\n";
875 function _DoBlockQuotes_callback2($matches) {
876 $pre = $matches[1];
877 $pre = preg_replace('/^ /m', '', $pre);
878 return $pre;
882 function _FormParagraphs($text) {
884 # Params:
885 # $text - string to process with html <p> tags
887 global $md_html_blocks;
889 # Strip leading and trailing lines:
890 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
892 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
893 $count = count($grafs);
896 # Wrap <p> tags.
898 foreach ($grafs as $key => $value) {
899 if (!isset( $md_html_blocks[$value] )) {
900 $value = _RunSpanGamut($value);
901 $value = preg_replace('/^([ \t]*)/', '<p>', $value);
902 $value .= "</p>";
903 $grafs[$key] = $value;
908 # Unhashify HTML blocks
910 foreach ($grafs as $key => $value) {
911 if (isset( $md_html_blocks[$value] )) {
912 $grafs[$key] = $md_html_blocks[$value];
916 return implode("\n\n", $grafs);
920 function _EncodeAmpsAndAngles($text) {
921 # Smart processing for ampersands and angle brackets that need to be encoded.
923 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
924 # http://bumppo.net/projects/amputator/
925 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
926 '&amp;', $text);;
928 # Encode naked <'s
929 $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
931 return $text;
935 function _EncodeBackslashEscapes($text) {
937 # Parameter: String.
938 # Returns: The string, with after processing the following backslash
939 # escape sequences.
941 global $md_escape_table, $md_backslash_escape_table;
942 # Must process escaped backslashes first.
943 return str_replace(array_keys($md_backslash_escape_table),
944 array_values($md_backslash_escape_table), $text);
948 function _DoAutoLinks($text) {
949 $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
950 '<a href="\1">\1</a>', $text);
952 # Email addresses: <address@domain.foo>
953 $text = preg_replace('{
956 [-.\w]+
958 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
961 }exi',
962 "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
963 $text);
965 return $text;
969 function _EncodeEmailAddress($addr) {
971 # Input: an email address, e.g. "foo@example.com"
973 # Output: the email address as a mailto link, with each character
974 # of the address encoded as either a decimal or hex entity, in
975 # the hopes of foiling most address harvesting spam bots. E.g.:
977 # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
978 # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
979 # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
981 # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
982 # mailing list: <http://tinyurl.com/yu7ue>
984 $addr = "mailto:" . $addr;
985 $length = strlen($addr);
987 # leave ':' alone (to spot mailto: later)
988 $addr = preg_replace_callback('/([^\:])/',
989 '_EncodeEmailAddress_callback', $addr);
991 $addr = "<a href=\"$addr\">$addr</a>";
992 # strip the mailto: from the visible part
993 $addr = preg_replace('/">.+?:/', '">', $addr);
995 return $addr;
997 function _EncodeEmailAddress_callback($matches) {
998 $char = $matches[1];
999 $r = rand(0, 100);
1000 # roughly 10% raw, 45% hex, 45% dec
1001 # '@' *must* be encoded. I insist.
1002 if ($r > 90 && $char != '@') return $char;
1003 if ($r < 45) return '&#x'.dechex(ord($char)).';';
1004 return '&#'.ord($char).';';
1008 function _UnescapeSpecialChars($text) {
1010 # Swap back in all the special characters we've hidden.
1012 global $md_escape_table;
1013 return str_replace(array_values($md_escape_table),
1014 array_keys($md_escape_table), $text);
1018 # Tokenize_HTML is shared between PHP Markdown and PHP SmartyPants.
1019 # We only define it if it is not already defined.
1020 if (!function_exists('_TokenizeHTML')) {
1021 function _TokenizeHTML($str) {
1023 # Parameter: String containing HTML markup.
1024 # Returns: An array of the tokens comprising the input
1025 # string. Each token is either a tag (possibly with nested,
1026 # tags contained therein, such as <a href="<MTFoo>">, or a
1027 # run of text between tags. Each element of the array is a
1028 # two-element array; the first is either 'tag' or 'text';
1029 # the second is the actual value.
1032 # Regular expression derived from the _tokenize() subroutine in
1033 # Brad Choate's MTRegex plugin.
1034 # <http://www.bradchoate.com/past/mtregex.php>
1036 $index = 0;
1037 $tokens = array();
1039 $depth = 6;
1040 $nested_tags = str_repeat('(?:<[a-z\/!$](?:[^<>]|',$depth)
1041 .str_repeat(')*>)', $depth);
1042 $match = "(?s:<!(?:--.*?--\s*)+>)|". # comment
1043 "(?s:<\?.*?\?>)|". # processing instruction
1044 "$nested_tags"; # nested tags
1046 $parts = preg_split("/($match)/", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1048 foreach ($parts as $part) {
1049 if (++$index % 2 && $part != '')
1050 array_push($tokens, array('text', $part));
1051 else
1052 array_push($tokens, array('tag', $part));
1055 return $tokens;
1060 function _Outdent($text) {
1062 # Remove one level of line-leading tabs or spaces
1064 global $md_tab_width;
1065 return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
1069 function _Detab($text) {
1071 # Inspired from a post by Bart Lateur:
1072 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1074 global $md_tab_width;
1075 $text = preg_replace(
1076 "/(.*?)\t/e",
1077 "'\\1'.str_repeat(' ', $md_tab_width - strlen('\\1') % $md_tab_width)",
1078 $text);
1079 return $text;
1083 function _UnslashQuotes($text) {
1085 # This function is useful to remove automaticaly slashed double quotes
1086 # when using preg_replace and evaluating an expression.
1087 # Parameter: String.
1088 # Returns: The string with any slash-double-quote (\") sequence replaced
1089 # by a single double quote.
1091 return str_replace('\"', '"', $text);
1097 PHP Markdown
1098 ============
1100 Description
1101 -----------
1103 This is a PHP translation of the original Markdown formatter written in
1104 Perl by John Gruber.
1106 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1107 easy-to-write structured text format into HTML. Markdown's text format
1108 is most similar to that of plain text email, and supports features such
1109 as headers, *emphasis*, code blocks, blockquotes, and links.
1111 Markdown's syntax is designed not as a generic markup language, but
1112 specifically to serve as a front-end to (X)HTML. You can use span-level
1113 HTML tags anywhere in a Markdown document, and you can use block level
1114 HTML tags (like <div> and <table> as well).
1116 For more information about Markdown's syntax, see:
1118 <http://daringfireball.net/projects/markdown/>
1121 Bugs
1122 ----
1124 To file bug reports please send email to:
1126 <michel.fortin@michelf.com>
1128 Please include with your report: (1) the example input; (2) the output you
1129 expected; (3) the output Markdown actually produced.
1132 Version History
1133 ---------------
1135 1.0: Sat 21 Aug 2004
1137 * Fixed a couple of bugs in _DoLists() and _ProcessListItems() that
1138 caused unordered lists starting with `+` or `-` to be turned into
1139 *ordered* lists.
1141 * Added to the list of block-level HTML tags:
1143 noscript, form, fieldset, iframe, math
1145 * Fixed an odd bug where, with input like this:
1147 > This line starts the blockquote
1148 * This list is part of the quote.
1149 * Second item.
1151 This paragraph is not part of the blockquote.
1153 The trailing paragraph was incorrectly included in the
1154 blockquote. (The solution was to add an extra "\n" after
1155 lists.)
1157 * The contents of `<pre>` tags inside `<blockquote>` are no longer
1158 indented in the HTML output.
1160 * PHP Markdown can now be used as a modifier by the Smarty
1161 templating engine. Rename the file to "modifier.markdown.php"
1162 and put it in your smarty plugins folder.
1164 * Now works as a bBlog formatter. Rename the file to
1165 "modifier.markdown.php" and place it in the "bBlog_plugins"
1166 folder.
1169 1.0fc1: Wed 8 Jul 2004
1171 * Greatly simplified the rules for code blocks. No more colons
1172 necessary; if it's indented (4 spaces or 1 tab), it's a code block.
1174 * Unordered list items can now be denoted by any of the following
1175 bullet markers: [*+-]
1177 * Replacing `"` with `&quot;` to fix literal quotes within title
1178 attributes.
1181 1.0b9: Sun 27 Jun 2004
1183 * Replacing `"` with `&quot;` to fix literal quotes within img alt
1184 attributes.
1187 1.0b8: Wed 23 Jun 2004
1189 * In WordPress, solved a bug where PHP Markdown did not deactivate
1190 the paragraph filter, converting all returns to a line break.
1191 The "texturize" filter was being disabled instead.
1193 * Added 'math' tags to block-level tag patterns in `_HashHTMLBlocks()`.
1194 Please disregard all the 'math'-tag related items in 1.0b7.
1196 * Commented out some vestigial code in `_EscapeSpecialChars()`
1199 1.0b7: Sat 12 Jun 2004
1201 * Added 'math' to `$tags_to_skip` pattern, for MathML users.
1203 * Tweaked regex for identifying HTML entities in
1204 `_EncodeAmpsAndAngles()`, so as to allow for the very long entity
1205 names used by MathML. (Thanks to Jacques Distler for the patch.)
1207 * Changed the internals of `_TokenizeHTML` to lower the PHP version
1208 requirement to PHP 4.0.5.
1211 1.0b6: Sun 6 Jun 2004
1213 * Added a WordPress plugin interface. This means that you can
1214 directly put the "markdown.php" file into the "wp-content/plugins"
1215 directory and then activate it from the administrative interface.
1217 * Added a Textile compatibility interface. Rename this file to
1218 "classTextile.php" and it can replace Textile anywhere.
1220 * The title attribute of reference-style links were ignored.
1221 This is now fixed.
1223 * Changed internal variables names so that they begin with `md_`
1224 instead of `g_`. This should reduce the risk of name collision with
1225 other programs.
1228 1.0b5: Sun 2 May 2004
1230 * Workaround for supporting `<ins>` and `<del>` as block-level tags.
1231 This only works if the start and end tags are on lines by
1232 themselves.
1234 * Three or more underscores can now be used for horizontal rules.
1236 * Lines containing only whitespace are trimmed from blockquotes.
1238 * You can now optionally wrap URLs with angle brackets -- like so:
1239 `<http://example.com>` -- in link definitions and inline links and
1240 images.
1242 * `_` and `*` characters in links and images are no longer escaped
1243 as HTML entities. Instead, we use the ridiculous but effective MD5
1244 hashing trick that's used to hide these characters elsewhere. The
1245 end result is that the HTML output uses the literal `*` and `_`
1246 characters, rather than the ugly entities.
1248 * Passing an empty string to the Markdown function no longer creates
1249 an empty paragraph.
1251 * Added a global declaration at the beginning of the file. This
1252 means you can now `include 'markdown.php'` from inside a function.
1255 1.0b4.1: Sun 4 Apr 2004
1257 * Fixed a bug where image tags did not close.
1259 * Fixed a bug where brakets `[]` inside a link caused the link to be
1260 ignored. PHP Markdown support only 6 (!) level of brakets inside a link
1261 (while John's original version of Markdown in Perl support much more).
1264 1.0b4: Sat 27 Mar 2004
1266 * First release of PHP Markdown, based on the 1.0b4 release.
1269 Author & Contributors
1270 ---------------------
1272 Original version by John Gruber
1273 <http://daringfireball.net/>
1275 PHP translation by Michel Fortin
1276 <http://www.michelf.com/>
1278 First WordPress plugin interface written by Matt Mullenweg
1279 <http://photomatt.net/>
1282 Copyright and License
1283 ---------------------
1285 Copyright (c) 2004 Michel Fortin
1286 <http://www.michelf.com/>
1287 All rights reserved.
1289 Copyright (c) 2003-2004 John Gruber
1290 <http://daringfireball.net/>
1291 All rights reserved.
1293 Markdown is free software; you can redistribute it and/or modify it
1294 under the terms of the GNU General Public License as published by the
1295 Free Software Foundation; either version 2 of the License, or (at your
1296 option) any later version.
1298 Markdown is distributed in the hope that it will be useful, but WITHOUT
1299 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1300 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1301 for more details.