Applied patch #411
[elgg.git] / lib / markdown.php
blob48a21b44e1187341dc75a8a23d0b69fa42e7a3ba
1 <?php
4 # Markdown - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004 John Gruber
7 # <http://daringfireball.net/projects/markdown/>
9 # Copyright (c) 2004-2005 Michel Fortin - PHP Port
10 # <http://www.michelf.com/projects/php-markdown/>
14 global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
15 $md_empty_element_suffix, $md_tab_width,
16 $md_nested_brackets_depth, $md_nested_brackets,
17 $md_escape_table, $md_backslash_escape_table,
18 $md_list_level;
20 $MarkdownPHPVersion = '1.0.1a'; # Fri 15 Apr 2005
21 $MarkdownSyntaxVersion = '1.0.1'; # Sun 12 Dec 2004
25 # Global default settings:
27 $md_empty_element_suffix = " />"; # Change to ">" for HTML output
28 $md_tab_width = 4;
31 # -- WordPress Plugin Interface -----------------------------------------------
33 Plugin Name: Markdown
34 Plugin URI: http://www.michelf.com/projects/php-markdown/
35 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
36 Version: 1.0.1a
37 Author: Michel Fortin
38 Author URI: http://www.michelf.com/
40 if (isset($wp_version)) {
41 # Remove default WordPress auto-paragraph filter.
42 remove_filter('the_content', 'wpautop');
43 remove_filter('the_excerpt', 'wpautop');
44 remove_filter('comment_text', 'wpautop');
45 # Add Markdown filter with priority 6 (same as Textile).
46 add_filter('the_content', 'Markdown', 6);
47 add_filter('the_excerpt', 'Markdown', 6);
48 add_filter('the_excerpt_rss', 'Markdown', 6);
49 add_filter('comment_text', 'Markdown', 6);
50 add_filter('comment_excerpt', 'Markdown', 6);
52 # Postpone the not-allowed-tag-filter until Markdown has run. For comments,
53 # it would probably be better to filter with Markdown before they are
54 # added in the database, but doing this would break older sites.
55 remove_filter('pre_comment_content', 'wp_filter_kses');
56 add_filter('comment_text', 'wp_filter_kses', 45);
58 # Make balenceTags work *after* Markdown. You can still disable
59 # balanceTags from the admin interface (in Options > Writing).
60 remove_filter('content_save_pre', 'balanceTags', 50);
61 remove_filter('excerpt_save_pre', 'balanceTags', 50);
62 remove_filter('comment_save_pre', 'balanceTags', 50);
63 add_filter('the_content', 'balanceTags', 50);
64 add_filter('the_excerpt', 'balanceTags', 50);
65 add_filter('the_excerpt_rss', 'balanceTags', 50);
66 add_filter('comment_text', 'balanceTags', 50);
67 add_filter('comment_excerpt', 'balanceTags', 50);
71 # -- bBlog Plugin Info --------------------------------------------------------
72 function identify_modifier_markdown() {
73 global $MarkdownPHPVersion;
74 return array(
75 'name' => 'markdown',
76 'type' => 'modifier',
77 'nicename' => 'Markdown',
78 'description' => 'A text-to-HTML conversion tool for web writers',
79 'authors' => 'Michel Fortin and John Gruber',
80 'licence' => 'GPL',
81 'version' => $MarkdownPHPVersion,
82 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
86 # -- Smarty Modifier Interface ------------------------------------------------
87 function smarty_modifier_markdown($text) {
88 return Markdown($text);
91 # -- Textile Compatibility Mode -----------------------------------------------
92 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
93 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
94 # Try to include PHP SmartyPants. Should be in the same directory.
95 @include_once 'smartypants.php';
96 # Fake Textile class. It calls Markdown instead.
97 class Textile {
98 function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
99 if ($lite == '' && $encode == '') $text = Markdown($text);
100 if (function_exists('SmartyPants')) $text = SmartyPants($text);
101 return $text;
109 # Globals:
112 # Regex to match balanced [brackets].
113 # Needed to insert a maximum bracked depth while converting to PHP.
114 $md_nested_brackets_depth = 6;
115 $md_nested_brackets =
116 str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
117 str_repeat('\])*', $md_nested_brackets_depth);
119 # Table of hash values for escaped characters:
120 $md_escape_table = array(
121 "\\" => md5("\\"),
122 "`" => md5("`"),
123 "*" => md5("*"),
124 "_" => md5("_"),
125 "{" => md5("{"),
126 "}" => md5("}"),
127 "[" => md5("["),
128 "]" => md5("]"),
129 "(" => md5("("),
130 ")" => md5(")"),
131 ">" => md5(">"),
132 "#" => md5("#"),
133 "+" => md5("+"),
134 "-" => md5("-"),
135 "." => md5("."),
136 "!" => md5("!")
138 # Create an identical table but for escaped characters.
139 $md_backslash_escape_table;
140 foreach ($md_escape_table as $key => $char)
141 $md_backslash_escape_table["\\$key"] = $char;
144 function Markdown($text) {
146 # Main function. The order in which other subs are called here is
147 # essential. Link and image substitutions need to happen before
148 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
149 # and <img> tags get encoded.
151 # Clear the global hashes. If we don't clear these, you get conflicts
152 # from other articles when generating a page which contains more than
153 # one article (e.g. an index page that shows the N most recent
154 # articles):
155 global $md_urls, $md_titles, $md_html_blocks;
156 $md_urls = array();
157 $md_titles = array();
158 $md_html_blocks = array();
160 # Standardize line endings:
161 # DOS to Unix and Mac to Unix
162 $text = str_replace(array("\r\n", "\r"), "\n", $text);
164 # Make sure $text ends with a couple of newlines:
165 $text .= "\n\n";
167 # Convert all tabs to spaces.
168 $text = _Detab($text);
170 # Strip any lines consisting only of spaces and tabs.
171 # This makes subsequent regexen easier to write, because we can
172 # match consecutive blank lines with /\n+/ instead of something
173 # contorted like /[ \t]*\n+/ .
174 $text = preg_replace('/^[ \t]+$/m', '', $text);
176 # Turn block-level HTML blocks into hash entries
177 $text = _HashHTMLBlocks($text);
179 # Strip link definitions, store in hashes.
180 $text = _StripLinkDefinitions($text);
182 $text = _RunBlockGamut($text);
184 $text = _UnescapeSpecialChars($text);
186 return $text . "\n";
190 function _StripLinkDefinitions($text) {
192 # Strips link definitions from text, stores the URLs and titles in
193 # hash references.
195 global $md_tab_width;
196 $less_than_tab = $md_tab_width - 1;
198 # Link defs are in the form: ^[id]: url "optional title"
199 $text = preg_replace_callback('{
200 ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
201 [ \t]*
202 \n? # maybe *one* newline
203 [ \t]*
204 <?(\S+?)>? # url = $2
205 [ \t]*
206 \n? # maybe one newline
207 [ \t]*
209 (?<=\s) # lookbehind for whitespace
210 ["(]
211 (.+?) # title = $3
212 [")]
213 [ \t]*
214 )? # title is optional
215 (?:\n+|\Z)
216 }xm',
217 '_StripLinkDefinitions_callback',
218 $text);
219 return $text;
221 function _StripLinkDefinitions_callback($matches) {
222 global $md_urls, $md_titles;
223 $link_id = strtolower($matches[1]);
224 $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
225 if (isset($matches[3]))
226 $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
227 return ''; # String that will replace the block
231 function _HashHTMLBlocks($text) {
232 global $md_tab_width;
233 $less_than_tab = $md_tab_width - 1;
235 # Hashify HTML blocks:
236 # We only want to do this for block-level HTML tags, such as headers,
237 # lists, and tables. That's because we still want to wrap <p>s around
238 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
239 # phrase emphasis, and spans. The list of tags we're looking for is
240 # hard-coded:
241 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
242 'script|noscript|form|fieldset|iframe|math|ins|del';
243 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
244 'script|noscript|form|fieldset|iframe|math';
246 # First, look for nested blocks, e.g.:
247 # <div>
248 # <div>
249 # tags for inner block must be indented.
250 # </div>
251 # </div>
253 # The outermost tags must start at the left margin for this to match, and
254 # the inner nested divs must be indented.
255 # We need to do this before the next, more liberal match, because the next
256 # match will start at the first `<div>` and stop at the first `</div>`.
257 $text = preg_replace_callback("{
258 ( # save in $1
259 ^ # start of line (with /m)
260 <($block_tags_a) # start tag = $2
261 \\b # word break
262 (.*\\n)*? # any number of lines, minimally matching
263 </\\2> # the matching end tag
264 [ \\t]* # trailing spaces/tabs
265 (?=\\n+|\\Z) # followed by a newline or end of document
267 }xm",
268 '_HashHTMLBlocks_callback',
269 $text);
272 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
274 $text = preg_replace_callback("{
275 ( # save in $1
276 ^ # start of line (with /m)
277 <($block_tags_b) # start tag = $2
278 \\b # word break
279 (.*\\n)*? # any number of lines, minimally matching
280 .*</\\2> # the matching end tag
281 [ \\t]* # trailing spaces/tabs
282 (?=\\n+|\\Z) # followed by a newline or end of document
284 }xm",
285 '_HashHTMLBlocks_callback',
286 $text);
288 # Special case just for <hr />. It was easier to make a special case than
289 # to make the other regex more complicated.
290 $text = preg_replace_callback('{
292 (?<=\n\n) # Starting after a blank line
293 | # or
294 \A\n? # the beginning of the doc
296 ( # save in $1
297 [ ]{0,'.$less_than_tab.'}
298 <(hr) # start tag = $2
299 \b # word break
300 ([^<>])*? #
301 /?> # the matching end tag
302 [ \t]*
303 (?=\n{2,}|\Z) # followed by a blank line or end of document
305 }x',
306 '_HashHTMLBlocks_callback',
307 $text);
309 # Special case for standalone HTML comments:
310 $text = preg_replace_callback('{
312 (?<=\n\n) # Starting after a blank line
313 | # or
314 \A\n? # the beginning of the doc
316 ( # save in $1
317 [ ]{0,'.$less_than_tab.'}
318 (?s:
320 (--.*?--\s*)+
323 [ \t]*
324 (?=\n{2,}|\Z) # followed by a blank line or end of document
326 }x',
327 '_HashHTMLBlocks_callback',
328 $text);
330 return $text;
332 function _HashHTMLBlocks_callback($matches) {
333 global $md_html_blocks;
334 $text = $matches[1];
335 $key = md5($text);
336 $md_html_blocks[$key] = $text;
337 return "\n\n$key\n\n"; # String that will replace the block
341 function _RunBlockGamut($text) {
343 # These are all the transformations that form block-level
344 # tags like paragraphs, headers, and list items.
346 global $md_empty_element_suffix;
348 $text = _DoHeaders($text);
350 # Do Horizontal Rules:
351 $text = preg_replace(
352 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
353 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
354 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
355 "\n<hr$md_empty_element_suffix\n",
356 $text);
358 $text = _DoLists($text);
360 $text = _DoCodeBlocks($text);
362 $text = _DoBlockQuotes($text);
364 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
365 # was to escape raw HTML in the original Markdown source. This time,
366 # we're escaping the markup we've just created, so that we don't wrap
367 # <p> tags around block-level tags.
368 $text = _HashHTMLBlocks($text);
370 $text = _FormParagraphs($text);
372 return $text;
376 function _RunSpanGamut($text) {
378 # These are all the transformations that occur *within* block-level
379 # tags like paragraphs, headers, and list items.
381 global $md_empty_element_suffix;
383 $text = _DoCodeSpans($text);
385 $text = _EscapeSpecialChars($text);
387 # Process anchor and image tags. Images must come first,
388 # because ![foo][f] looks like an anchor.
389 $text = _DoImages($text);
390 $text = _DoAnchors($text);
392 # Make links out of things like `<http://example.com/>`
393 # Must come after _DoAnchors(), because you can use < and >
394 # delimiters in inline links like [this](<url>).
395 $text = _DoAutoLinks($text);
397 # Fix unencoded ampersands and <'s:
398 $text = _EncodeAmpsAndAngles($text);
400 $text = _DoItalicsAndBold($text);
402 # Do hard breaks:
403 $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
405 return $text;
409 function _EscapeSpecialChars($text) {
410 global $md_escape_table;
411 $tokens = _TokenizeHTML($text);
413 $text = ''; # rebuild $text from the tokens
414 # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
415 # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
417 foreach ($tokens as $cur_token) {
418 if ($cur_token[0] == 'tag') {
419 # Within tags, encode * and _ so they don't conflict
420 # with their use in Markdown for italics and strong.
421 # We're replacing each such character with its
422 # corresponding MD5 checksum value; this is likely
423 # overkill, but it should prevent us from colliding
424 # with the escape values by accident.
425 $cur_token[1] = str_replace(array('*', '_'),
426 array($md_escape_table['*'], $md_escape_table['_']),
427 $cur_token[1]);
428 $text .= $cur_token[1];
429 } else {
430 $t = $cur_token[1];
431 $t = _EncodeBackslashEscapes($t);
432 $text .= $t;
435 return $text;
439 function _DoAnchors($text) {
441 # Turn Markdown link shortcuts into XHTML <a> tags.
443 global $md_nested_brackets;
445 # First, handle reference-style links: [link text] [id]
447 $text = preg_replace_callback("{
448 ( # wrap whole match in $1
450 ($md_nested_brackets) # link text = $2
453 [ ]? # one optional space
454 (?:\\n[ ]*)? # one optional newline followed by spaces
457 (.*?) # id = $3
460 }xs",
461 '_DoAnchors_reference_callback', $text);
464 # Next, inline-style links: [link text](url "optional title")
466 $text = preg_replace_callback("{
467 ( # wrap whole match in $1
469 ($md_nested_brackets) # link text = $2
471 \\( # literal paren
472 [ \\t]*
473 <?(.*?)>? # href = $3
474 [ \\t]*
475 ( # $4
476 (['\"]) # quote char = $5
477 (.*?) # Title = $6
478 \\5 # matching quote
479 )? # title is optional
482 }xs",
483 '_DoAnchors_inline_callback', $text);
485 return $text;
487 function _DoAnchors_reference_callback($matches) {
488 global $md_urls, $md_titles, $md_escape_table;
489 $whole_match = $matches[1];
490 $link_text = $matches[2];
491 $link_id = strtolower($matches[3]);
493 if ($link_id == "") {
494 $link_id = strtolower($link_text); # for shortcut links like [this][].
497 if (isset($md_urls[$link_id])) {
498 $url = $md_urls[$link_id];
499 # We've got to encode these to avoid conflicting with italics/bold.
500 $url = str_replace(array('*', '_'),
501 array($md_escape_table['*'], $md_escape_table['_']),
502 $url);
503 $result = "<a href=\"$url\"";
504 if ( isset( $md_titles[$link_id] ) ) {
505 $title = $md_titles[$link_id];
506 $title = str_replace(array('*', '_'),
507 array($md_escape_table['*'],
508 $md_escape_table['_']), $title);
509 $result .= " title=\"$title\"";
511 $result .= ">$link_text</a>";
513 else {
514 $result = $whole_match;
516 return $result;
518 function _DoAnchors_inline_callback($matches) {
519 global $md_escape_table;
520 $whole_match = $matches[1];
521 $link_text = $matches[2];
522 $url = $matches[3];
523 $title =& $matches[6];
525 # We've got to encode these to avoid conflicting with italics/bold.
526 $url = str_replace(array('*', '_'),
527 array($md_escape_table['*'], $md_escape_table['_']),
528 $url);
529 $result = "<a href=\"$url\"";
530 if (isset($title)) {
531 $title = str_replace('"', '&quot;', $title);
532 $title = str_replace(array('*', '_'),
533 array($md_escape_table['*'], $md_escape_table['_']),
534 $title);
535 $result .= " title=\"$title\"";
538 $result .= ">$link_text</a>";
540 return $result;
544 function _DoImages($text) {
546 # Turn Markdown image shortcuts into <img> tags.
549 # First, handle reference-style labeled images: ![alt text][id]
551 $text = preg_replace_callback('{
552 ( # wrap whole match in $1
554 (.*?) # alt text = $2
557 [ ]? # one optional space
558 (?:\n[ ]*)? # one optional newline followed by spaces
561 (.*?) # id = $3
565 }xs',
566 '_DoImages_reference_callback', $text);
569 # Next, handle inline images: ![alt text](url "optional title")
570 # Don't forget: encode * and _
572 $text = preg_replace_callback("{
573 ( # wrap whole match in $1
574 !\\[
575 (.*?) # alt text = $2
577 \\( # literal paren
578 [ \\t]*
579 <?(\S+?)>? # src url = $3
580 [ \\t]*
581 ( # $4
582 (['\"]) # quote char = $5
583 (.*?) # title = $6
584 \\5 # matching quote
585 [ \\t]*
586 )? # title is optional
589 }xs",
590 '_DoImages_inline_callback', $text);
592 return $text;
594 function _DoImages_reference_callback($matches) {
595 global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
596 $whole_match = $matches[1];
597 $alt_text = $matches[2];
598 $link_id = strtolower($matches[3]);
600 if ($link_id == "") {
601 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
604 $alt_text = str_replace('"', '&quot;', $alt_text);
605 if (isset($md_urls[$link_id])) {
606 $url = $md_urls[$link_id];
607 # We've got to encode these to avoid conflicting with italics/bold.
608 $url = str_replace(array('*', '_'),
609 array($md_escape_table['*'], $md_escape_table['_']),
610 $url);
611 $result = "<img src=\"$url\" alt=\"$alt_text\"";
612 if (isset($md_titles[$link_id])) {
613 $title = $md_titles[$link_id];
614 $title = str_replace(array('*', '_'),
615 array($md_escape_table['*'],
616 $md_escape_table['_']), $title);
617 $result .= " title=\"$title\"";
619 $result .= $md_empty_element_suffix;
621 else {
622 # If there's no such link ID, leave intact:
623 $result = $whole_match;
626 return $result;
628 function _DoImages_inline_callback($matches) {
629 global $md_empty_element_suffix, $md_escape_table;
630 $whole_match = $matches[1];
631 $alt_text = $matches[2];
632 $url = $matches[3];
633 $title = '';
634 if (isset($matches[6])) {
635 $title = $matches[6];
638 $alt_text = str_replace('"', '&quot;', $alt_text);
639 $title = str_replace('"', '&quot;', $title);
640 # We've got to encode these to avoid conflicting with italics/bold.
641 $url = str_replace(array('*', '_'),
642 array($md_escape_table['*'], $md_escape_table['_']),
643 $url);
644 $result = "<img src=\"$url\" alt=\"$alt_text\"";
645 if (isset($title)) {
646 $title = str_replace(array('*', '_'),
647 array($md_escape_table['*'], $md_escape_table['_']),
648 $title);
649 $result .= " title=\"$title\""; # $title already quoted
651 $result .= $md_empty_element_suffix;
653 return $result;
657 function _DoHeaders($text) {
658 # Setext-style headers:
659 # Header 1
660 # ========
662 # Header 2
663 # --------
665 $text = preg_replace(
666 array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
667 '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
668 array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
669 "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
670 $text);
672 # atx-style headers:
673 # # Header 1
674 # ## Header 2
675 # ## Header 2 with closing hashes ##
676 # ...
677 # ###### Header 6
679 $text = preg_replace("{
680 ^(\\#{1,6}) # $1 = string of #'s
681 [ \\t]*
682 (.+?) # $2 = Header text
683 [ \\t]*
684 \\#* # optional closing #'s (not counted)
685 \\n+
686 }xme",
687 "'<h'.strlen('\\1').'>'._RunSpanGamut(_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
688 $text);
690 return $text;
694 function _DoLists($text) {
696 # Form HTML ordered (numbered) and unordered (bulleted) lists.
698 global $md_tab_width, $md_list_level;
699 $less_than_tab = $md_tab_width - 1;
701 # Re-usable patterns to match list item bullets and number markers:
702 $marker_ul = '[*+-]';
703 $marker_ol = '\d+[.]';
704 $marker_any = "(?:$marker_ul|$marker_ol)";
706 # Re-usable pattern to match any entirel ul or ol list:
707 $whole_list = '
708 ( # $1 = whole list
709 ( # $2
710 [ ]{0,'.$less_than_tab.'}
711 ('.$marker_any.') # $3 = first list item marker
712 [ \t]+
714 (?s:.+?)
715 ( # $4
718 \n{2,}
719 (?=\S)
720 (?! # Negative lookahead for another list item marker
721 [ \t]*
722 '.$marker_any.'[ \t]+
726 '; // mx
728 # We use a different prefix before nested lists than top-level lists.
729 # See extended comment in _ProcessListItems().
731 if ($md_list_level) {
732 $text = preg_replace_callback('{
734 '.$whole_list.'
735 }mx',
736 '_DoLists_callback', $text);
738 else {
739 $text = preg_replace_callback('{
740 (?:(?<=\n\n)|\A\n?)
741 '.$whole_list.'
742 }mx',
743 '_DoLists_callback', $text);
746 return $text;
748 function _DoLists_callback($matches) {
749 # Re-usable patterns to match list item bullets and number markers:
750 $marker_ul = '[*+-]';
751 $marker_ol = '\d+[.]';
752 $marker_any = "(?:$marker_ul|$marker_ol)";
754 $list = $matches[1];
755 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
756 # Turn double returns into triple returns, so that we can make a
757 # paragraph for the last item in a list, if necessary:
758 $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
759 $result = _ProcessListItems($list, $marker_any);
760 $result = "<$list_type>\n" . $result . "</$list_type>\n";
761 return $result;
765 function _ProcessListItems($list_str, $marker_any) {
767 # Process the contents of a single ordered or unordered list, splitting it
768 # into individual list items.
770 global $md_list_level;
772 # The $md_list_level global keeps track of when we're inside a list.
773 # Each time we enter a list, we increment it; when we leave a list,
774 # we decrement. If it's zero, we're not in a list anymore.
776 # We do this because when we're not inside a list, we want to treat
777 # something like this:
779 # I recommend upgrading to version
780 # 8. Oops, now this line is treated
781 # as a sub-list.
783 # As a single paragraph, despite the fact that the second line starts
784 # with a digit-period-space sequence.
786 # Whereas when we're inside a list (or sub-list), that line will be
787 # treated as the start of a sub-list. What a kludge, huh? This is
788 # an aspect of Markdown's syntax that's hard to parse perfectly
789 # without resorting to mind-reading. Perhaps the solution is to
790 # change the syntax rules such that sub-lists must start with a
791 # starting cardinal number; e.g. "1." or "a.".
793 $md_list_level++;
795 # trim trailing blank lines:
796 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
798 $list_str = preg_replace_callback('{
799 (\n)? # leading line = $1
800 (^[ \t]*) # leading whitespace = $2
801 ('.$marker_any.') [ \t]+ # list marker = $3
802 ((?s:.+?) # list item text = $4
803 (\n{1,2}))
804 (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
805 }xm',
806 '_ProcessListItems_callback', $list_str);
808 $md_list_level--;
809 return $list_str;
811 function _ProcessListItems_callback($matches) {
812 $item = $matches[4];
813 $leading_line =& $matches[1];
814 $leading_space =& $matches[2];
816 if ($leading_line || preg_match('/\n{2,}/', $item)) {
817 $item = _RunBlockGamut(_Outdent($item));
819 else {
820 # Recursion for sub-lists:
821 $item = _DoLists(_Outdent($item));
822 $item = preg_replace('/\n+$/', '', $item);
823 $item = _RunSpanGamut($item);
826 return "<li>" . $item . "</li>\n";
830 function _DoCodeBlocks($text) {
832 # Process Markdown `<pre><code>` blocks.
834 global $md_tab_width;
835 $text = preg_replace_callback("{
836 (?:\\n\\n|\\A)
837 ( # $1 = the code block -- one or more lines, starting with a space/tab
839 (?:[ ]\{$md_tab_width} | \\t) # Lines must start with a tab or a tab-width of spaces
840 .*\\n+
843 ((?=^[ ]{0,$md_tab_width}\\S)|\\Z) # Lookahead for non-space at line-start, or end of doc
844 }xm",
845 '_DoCodeBlocks_callback', $text);
847 return $text;
849 function _DoCodeBlocks_callback($matches) {
850 $codeblock = $matches[1];
852 $codeblock = _EncodeCode(_Outdent($codeblock));
853 // $codeblock = _Detab($codeblock);
854 # trim leading newlines and trailing whitespace
855 $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
857 $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
859 return $result;
863 function _DoCodeSpans($text) {
865 # * Backtick quotes are used for <code></code> spans.
867 # * You can use multiple backticks as the delimiters if you want to
868 # include literal backticks in the code span. So, this input:
870 # Just type ``foo `bar` baz`` at the prompt.
872 # Will translate to:
874 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
876 # There's no arbitrary limit to the number of backticks you
877 # can use as delimters. If you need three consecutive backticks
878 # in your code, use four for delimiters, etc.
880 # * You can use spaces to get literal backticks at the edges:
882 # ... type `` `bar` `` ...
884 # Turns to:
886 # ... type <code>`bar`</code> ...
888 $text = preg_replace_callback("@
889 (`+) # $1 = Opening run of `
890 (.+?) # $2 = The code block
891 (?<!`)
893 (?!`)
894 @xs",
895 '_DoCodeSpans_callback', $text);
897 return $text;
899 function _DoCodeSpans_callback($matches) {
900 $c = $matches[2];
901 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
902 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
903 $c = _EncodeCode($c);
904 return "<code>$c</code>";
908 function _EncodeCode($_) {
910 # Encode/escape certain characters inside Markdown code runs.
911 # The point is that in code, these characters are literals,
912 # and lose their special Markdown meanings.
914 global $md_escape_table;
916 # Encode all ampersands; HTML entities are not
917 # entities within a Markdown code span.
918 $_ = str_replace('&', '&amp;', $_);
920 # Do the angle bracket song and dance:
921 $_ = str_replace(array('<', '>'),
922 array('&lt;', '&gt;'), $_);
924 # Now, escape characters that are magic in Markdown:
925 $_ = str_replace(array_keys($md_escape_table),
926 array_values($md_escape_table), $_);
928 return $_;
932 function _DoItalicsAndBold($text) {
933 # <strong> must go first:
934 $text = preg_replace('{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }sx',
935 '<strong>\2</strong>', $text);
936 # Then <em>:
937 $text = preg_replace('{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx',
938 '<em>\2</em>', $text);
940 return $text;
944 function _DoBlockQuotes($text) {
945 $text = preg_replace_callback('/
946 ( # Wrap whole match in $1
948 ^[ \t]*>[ \t]? # ">" at the start of a line
949 .+\n # rest of the first line
950 (.+\n)* # subsequent consecutive lines
951 \n* # blanks
954 /xm',
955 '_DoBlockQuotes_callback', $text);
957 return $text;
959 function _DoBlockQuotes_callback($matches) {
960 $bq = $matches[1];
961 # trim one level of quoting - trim whitespace-only lines
962 $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
963 $bq = _RunBlockGamut($bq); # recurse
965 $bq = preg_replace('/^/m', " ", $bq);
966 # These leading spaces screw with <pre> content, so we need to fix that:
967 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
968 '_DoBlockQuotes_callback2', $bq);
970 return "<blockquote>\n$bq\n</blockquote>\n\n";
972 function _DoBlockQuotes_callback2($matches) {
973 $pre = $matches[1];
974 $pre = preg_replace('/^ /m', '', $pre);
975 return $pre;
979 function _FormParagraphs($text) {
981 # Params:
982 # $text - string to process with html <p> tags
984 global $md_html_blocks;
986 # Strip leading and trailing lines:
987 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
989 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
992 # Wrap <p> tags.
994 foreach ($grafs as $key => $value) {
995 if (!isset( $md_html_blocks[$value] )) {
996 $value = _RunSpanGamut($value);
997 $value = preg_replace('/^([ \t]*)/', '<p>', $value);
998 $value .= "</p>";
999 $grafs[$key] = $value;
1004 # Unhashify HTML blocks
1006 foreach ($grafs as $key => $value) {
1007 if (isset( $md_html_blocks[$value] )) {
1008 $grafs[$key] = $md_html_blocks[$value];
1012 return implode("\n\n", $grafs);
1016 function _EncodeAmpsAndAngles($text) {
1017 # Smart processing for ampersands and angle brackets that need to be encoded.
1019 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1020 # http://bumppo.net/projects/amputator/
1021 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1022 '&amp;', $text);;
1024 # Encode naked <'s
1025 $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
1027 return $text;
1031 function _EncodeBackslashEscapes($text) {
1033 # Parameter: String.
1034 # Returns: The string, with after processing the following backslash
1035 # escape sequences.
1037 global $md_escape_table, $md_backslash_escape_table;
1038 # Must process escaped backslashes first.
1039 return str_replace(array_keys($md_backslash_escape_table),
1040 array_values($md_backslash_escape_table), $text);
1044 function _DoAutoLinks($text) {
1045 $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
1046 '<a href="\1">\1</a>', $text);
1048 # Email addresses: <address@domain.foo>
1049 $text = preg_replace('{
1051 (?:mailto:)?
1053 [-.\w]+
1055 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1058 }exi',
1059 "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
1060 $text);
1062 return $text;
1066 function _EncodeEmailAddress($addr) {
1068 # Input: an email address, e.g. "foo@example.com"
1070 # Output: the email address as a mailto link, with each character
1071 # of the address encoded as either a decimal or hex entity, in
1072 # the hopes of foiling most address harvesting spam bots. E.g.:
1074 # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1075 # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1076 # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1078 # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
1079 # mailing list: <http://tinyurl.com/yu7ue>
1081 $addr = "mailto:" . $addr;
1082 $length = strlen($addr);
1084 # leave ':' alone (to spot mailto: later)
1085 $addr = preg_replace_callback('/([^\:])/',
1086 '_EncodeEmailAddress_callback', $addr);
1088 $addr = "<a href=\"$addr\">$addr</a>";
1089 # strip the mailto: from the visible part
1090 $addr = preg_replace('/">.+?:/', '">', $addr);
1092 return $addr;
1094 function _EncodeEmailAddress_callback($matches) {
1095 $char = $matches[1];
1096 $r = rand(0, 100);
1097 # roughly 10% raw, 45% hex, 45% dec
1098 # '@' *must* be encoded. I insist.
1099 if ($r > 90 && $char != '@') return $char;
1100 if ($r < 45) return '&#x'.dechex(ord($char)).';';
1101 return '&#'.ord($char).';';
1105 function _UnescapeSpecialChars($text) {
1107 # Swap back in all the special characters we've hidden.
1109 global $md_escape_table;
1110 return str_replace(array_values($md_escape_table),
1111 array_keys($md_escape_table), $text);
1115 # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
1116 # We only define it if it is not already defined.
1117 if (!function_exists('_TokenizeHTML')) :
1118 function _TokenizeHTML($str) {
1120 # Parameter: String containing HTML markup.
1121 # Returns: An array of the tokens comprising the input
1122 # string. Each token is either a tag (possibly with nested,
1123 # tags contained therein, such as <a href="<MTFoo>">, or a
1124 # run of text between tags. Each element of the array is a
1125 # two-element array; the first is either 'tag' or 'text';
1126 # the second is the actual value.
1129 # Regular expression derived from the _tokenize() subroutine in
1130 # Brad Choate's MTRegex plugin.
1131 # <http://www.bradchoate.com/past/mtregex.php>
1133 $index = 0;
1134 $tokens = array();
1136 $match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
1137 '(?s:<\?.*?\?>)|'. # processing instruction
1138 '(?:</?[\w:$]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)'; # regular tags
1140 $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1142 foreach ($parts as $part) {
1143 if (++$index % 2 && $part != '')
1144 array_push($tokens, array('text', $part));
1145 else
1146 array_push($tokens, array('tag', $part));
1149 return $tokens;
1151 endif;
1154 function _Outdent($text) {
1156 # Remove one level of line-leading tabs or spaces
1158 global $md_tab_width;
1159 return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
1163 function _Detab($text) {
1165 # Replace tabs with the appropriate amount of space.
1167 global $md_tab_width;
1169 # For each line we separate the line in blocks delemited by
1170 # tab characters. Then we reconstruct the line adding the appropriate
1171 # number of space charcters.
1173 $lines = explode("\n", $text);
1174 $text = "";
1176 foreach ($lines as $line) {
1177 # Split in blocks.
1178 $blocks = explode("\t", $line);
1179 # Add each blocks to the line.
1180 $line = $blocks[0];
1181 unset($blocks[0]); # Do not add first block twice.
1182 foreach ($blocks as $block) {
1183 # Calculate amount of space, insert spaces, insert block.
1184 $amount = $md_tab_width - strlen($line) % $md_tab_width;
1185 $line .= str_repeat(" ", $amount) . $block;
1187 $text .= "$line\n";
1189 return $text;
1193 function _UnslashQuotes($text) {
1195 # This function is useful to remove automaticaly slashed double quotes
1196 # when using preg_replace and evaluating an expression.
1197 # Parameter: String.
1198 # Returns: The string with any slash-double-quote (\") sequence replaced
1199 # by a single double quote.
1201 return str_replace('\"', '"', $text);
1207 PHP Markdown
1208 ============
1210 Description
1211 -----------
1213 This is a PHP translation of the original Markdown formatter written in
1214 Perl by John Gruber.
1216 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1217 easy-to-write structured text format into HTML. Markdown's text format
1218 is most similar to that of plain text email, and supports features such
1219 as headers, *emphasis*, code blocks, blockquotes, and links.
1221 Markdown's syntax is designed not as a generic markup language, but
1222 specifically to serve as a front-end to (X)HTML. You can use span-level
1223 HTML tags anywhere in a Markdown document, and you can use block level
1224 HTML tags (like <div> and <table> as well).
1226 For more information about Markdown's syntax, see:
1228 <http://daringfireball.net/projects/markdown/>
1231 Bugs
1232 ----
1234 To file bug reports please send email to:
1236 <michel.fortin@michelf.com>
1238 Please include with your report: (1) the example input; (2) the output you
1239 expected; (3) the output Markdown actually produced.
1242 Version History
1243 ---------------
1245 See the readme file for detailed release notes for this version.
1247 1.0.1a - 15 Apr 2005
1249 1.0.1 - 17 Dec 2004
1251 1.0 - 21 Aug 2004
1254 Author & Contributors
1255 ---------------------
1257 Original Perl version by John Gruber
1258 <http://daringfireball.net/>
1260 PHP port and other contributions by Michel Fortin
1261 <http://www.michelf.com/>
1264 Copyright and License
1265 ---------------------
1267 Copyright (c) 2004-2005 Michel Fortin
1268 <http://www.michelf.com/>
1269 All rights reserved.
1271 Copyright (c) 2003-2004 John Gruber
1272 <http://daringfireball.net/>
1273 All rights reserved.
1275 Redistribution and use in source and binary forms, with or without
1276 modification, are permitted provided that the following conditions are
1277 met:
1279 * Redistributions of source code must retain the above copyright notice,
1280 this list of conditions and the following disclaimer.
1282 * Redistributions in binary form must reproduce the above copyright
1283 notice, this list of conditions and the following disclaimer in the
1284 documentation and/or other materials provided with the distribution.
1286 * Neither the name "Markdown" nor the names of its contributors may
1287 be used to endorse or promote products derived from this software
1288 without specific prior written permission.
1290 This software is provided by the copyright holders and contributors "as
1291 is" and any express or implied warranties, including, but not limited
1292 to, the implied warranties of merchantability and fitness for a
1293 particular purpose are disclaimed. In no event shall the copyright owner
1294 or contributors be liable for any direct, indirect, incidental, special,
1295 exemplary, or consequential damages (including, but not limited to,
1296 procurement of substitute goods or services; loss of use, data, or
1297 profits; or business interruption) however caused and on any theory of
1298 liability, whether in contract, strict liability, or tort (including
1299 negligence or otherwise) arising in any way out of the use of this
1300 software, even if advised of the possibility of such damage.