Updated the 19 build version to 20081106
[moodle.git] / lib / markdown.php
blob2ffa47205a0bb0039e6bcd9eb2920ce18103a45c
1 <?php
3 # Markdown Extra - A text-to-HTML conversion tool for web writers
5 # PHP Markdown & Extra
6 # Copyright (c) 2004-2007 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 define( 'MARKDOWN_VERSION', "1.0.1j" ); # Tue 4 Sep 2007
16 define( 'MARKDOWNEXTRA_VERSION', "1.1.6" ); # Tue 4 Sep 2007
20 # Global default settings:
23 # Change to ">" for HTML output
24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
26 # Define the width of a tab for code blocks.
27 @define( 'MARKDOWN_TAB_WIDTH', 4 );
29 # Optional title attribute for footnote links and backlinks.
30 @define( 'MARKDOWN_FN_LINK_TITLE', "" );
31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
33 # Optional class attribute for footnote links and backlinks.
34 @define( 'MARKDOWN_FN_LINK_CLASS', "" );
35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
39 # WordPress settings:
42 # Change to false to remove Markdown from posts and/or comments.
43 @define( 'MARKDOWN_WP_POSTS', true );
44 @define( 'MARKDOWN_WP_COMMENTS', true );
48 ### Standard Function Interface ###
50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
52 function Markdown($text) {
54 # Initialize the parser and return the result of its transform method.
56 # Setup static parser variable.
57 static $parser;
58 if (!isset($parser)) {
59 $parser_class = MARKDOWN_PARSER_CLASS;
60 $parser = new $parser_class;
63 # Transform text using parser.
64 return $parser->transform($text);
68 ### WordPress Plugin Interface ###
71 Plugin Name: Markdown Extra
72 Plugin URI: http://www.michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
74 Version: 1.1.6
75 Author: Michel Fortin
76 Author URI: http://www.michelf.com/
79 if (isset($wp_version)) {
80 # More details about how it works here:
81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
83 # Post content and excerpts
84 # - Remove WordPress paragraph generator.
85 # - Run Markdown on excerpt, then remove all tags.
86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87 if (MARKDOWN_WP_POSTS) {
88 remove_filter('the_content', 'wpautop');
89 remove_filter('the_content_rss', 'wpautop');
90 remove_filter('the_excerpt', 'wpautop');
91 add_filter('the_content', 'Markdown', 6);
92 add_filter('the_content_rss', 'Markdown', 6);
93 add_filter('get_the_excerpt', 'Markdown', 6);
94 add_filter('get_the_excerpt', 'trim', 7);
95 add_filter('the_excerpt', 'mdwp_add_p');
96 add_filter('the_excerpt_rss', 'mdwp_strip_p');
98 remove_filter('content_save_pre', 'balanceTags', 50);
99 remove_filter('excerpt_save_pre', 'balanceTags', 50);
100 add_filter('the_content', 'balanceTags', 50);
101 add_filter('get_the_excerpt', 'balanceTags', 9);
104 # Comments
105 # - Remove WordPress paragraph generator.
106 # - Remove WordPress auto-link generator.
107 # - Scramble important tags before passing them to the kses filter.
108 # - Run Markdown on excerpt then remove paragraph tags.
109 if (MARKDOWN_WP_COMMENTS) {
110 remove_filter('comment_text', 'wpautop', 30);
111 remove_filter('comment_text', 'make_clickable');
112 add_filter('pre_comment_content', 'Markdown', 6);
113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
114 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
115 add_filter('get_comment_text', 'Markdown', 6);
116 add_filter('get_comment_excerpt', 'Markdown', 6);
117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
119 global $mdwp_hidden_tags, $mdwp_placeholders;
120 $mdwp_hidden_tags = explode(' ',
121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
122 $mdwp_placeholders = explode(' ', str_rot13(
123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
127 function mdwp_add_p($text) {
128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
129 $text = '<p>'.$text.'</p>';
130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
132 return $text;
135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
137 function mdwp_hide_tags($text) {
138 global $mdwp_hidden_tags, $mdwp_placeholders;
139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
141 function mdwp_show_tags($text) {
142 global $mdwp_hidden_tags, $mdwp_placeholders;
143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
148 ### bBlog Plugin Info ###
150 function identify_modifier_markdown() {
151 return array(
152 'name' => 'markdown',
153 'type' => 'modifier',
154 'nicename' => 'PHP Markdown Extra',
155 'description' => 'A text-to-HTML conversion tool for web writers',
156 'authors' => 'Michel Fortin and John Gruber',
157 'licence' => 'GPL',
158 'version' => MARKDOWNEXTRA_VERSION,
159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
164 ### Smarty Modifier Interface ###
166 function smarty_modifier_markdown($text) {
167 return Markdown($text);
171 ### Textile Compatibility Mode ###
173 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
176 # Try to include PHP SmartyPants. Should be in the same directory.
177 @include_once 'smartypants.php';
178 # Fake Textile class. It calls Markdown instead.
179 class Textile {
180 function TextileThis($text, $lite='', $encode='') {
181 if ($lite == '' && $encode == '') $text = Markdown($text);
182 if (function_exists('SmartyPants')) $text = SmartyPants($text);
183 return $text;
185 # Fake restricted version: restrictions are not supported for now.
186 function TextileRestricted($text, $lite='', $noimage='') {
187 return $this->TextileThis($text, $lite);
189 # Workaround to ensure compatibility with TextPattern 4.0.3.
190 function blockLite($text) { return $text; }
197 # Markdown Parser Class
200 class Markdown_Parser {
202 # Regex to match balanced [brackets].
203 # Needed to insert a maximum bracked depth while converting to PHP.
204 var $nested_brackets_depth = 6;
205 var $nested_brackets;
207 var $nested_url_parenthesis_depth = 4;
208 var $nested_url_parenthesis;
210 # Table of hash values for escaped characters:
211 var $escape_chars = '\`*_{}[]()>#+-.!';
213 # Change to ">" for HTML output.
214 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
215 var $tab_width = MARKDOWN_TAB_WIDTH;
217 # Change to `true` to disallow markup or entities.
218 var $no_markup = false;
219 var $no_entities = false;
222 function Markdown_Parser() {
224 # Constructor function. Initialize appropriate member variables.
226 $this->_initDetab();
228 $this->nested_brackets =
229 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
230 str_repeat('\])*', $this->nested_brackets_depth);
232 $this->nested_url_parenthesis =
233 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
234 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
236 # Sort document, block, and span gamut in ascendent priority order.
237 asort($this->document_gamut);
238 asort($this->block_gamut);
239 asort($this->span_gamut);
243 # Internal hashes used during transformation.
244 var $urls = array();
245 var $titles = array();
246 var $html_hashes = array();
248 # Status flag to avoid invalid nesting.
249 var $in_anchor = false;
252 function transform($text) {
254 # Main function. The order in which other subs are called here is
255 # essential. Link and image substitutions need to happen before
256 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
257 # and <img> tags get encoded.
259 # Clear the global hashes. If we don't clear these, you get conflicts
260 # from other articles when generating a page which contains more than
261 # one article (e.g. an index page that shows the N most recent
262 # articles):
263 $this->urls = array();
264 $this->titles = array();
265 $this->html_hashes = array();
267 # Standardize line endings:
268 # DOS to Unix and Mac to Unix
269 $text = preg_replace('{\r\n?}', "\n", $text);
271 # Make sure $text ends with a couple of newlines:
272 $text .= "\n\n";
274 # Convert all tabs to spaces.
275 $text = $this->detab($text);
277 # Turn block-level HTML blocks into hash entries
278 $text = $this->hashHTMLBlocks($text);
280 # Strip any lines consisting only of spaces and tabs.
281 # This makes subsequent regexen easier to write, because we can
282 # match consecutive blank lines with /\n+/ instead of something
283 # contorted like /[ ]*\n+/ .
284 $text = preg_replace('/^[ ]+$/m', '', $text);
286 # Run document gamut methods.
287 foreach ($this->document_gamut as $method => $priority) {
288 $text = $this->$method($text);
291 return $text . "\n";
294 var $document_gamut = array(
295 # Strip link definitions, store in hashes.
296 "stripLinkDefinitions" => 20,
298 "runBasicBlockGamut" => 30,
302 function stripLinkDefinitions($text) {
304 # Strips link definitions from text, stores the URLs and titles in
305 # hash references.
307 $less_than_tab = $this->tab_width - 1;
309 # Link defs are in the form: ^[id]: url "optional title"
310 $text = preg_replace_callback('{
311 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
312 [ ]*
313 \n? # maybe *one* newline
314 [ ]*
315 <?(\S+?)>? # url = $2
316 [ ]*
317 \n? # maybe one newline
318 [ ]*
320 (?<=\s) # lookbehind for whitespace
321 ["(]
322 (.*?) # title = $3
323 [")]
324 [ ]*
325 )? # title is optional
326 (?:\n+|\Z)
327 }xm',
328 array(&$this, '_stripLinkDefinitions_callback'),
329 $text);
330 return $text;
332 function _stripLinkDefinitions_callback($matches) {
333 $link_id = strtolower($matches[1]);
334 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
335 if (isset($matches[3]))
336 $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
337 return ''; # String that will replace the block
341 function hashHTMLBlocks($text) {
342 if ($this->no_markup) return $text;
344 $less_than_tab = $this->tab_width - 1;
346 # Hashify HTML blocks:
347 # We only want to do this for block-level HTML tags, such as headers,
348 # lists, and tables. That's because we still want to wrap <p>s around
349 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
350 # phrase emphasis, and spans. The list of tags we're looking for is
351 # hard-coded:
353 # * List "a" is made of tags which can be both inline or block-level.
354 # These will be treated block-level when the start tag is alone on
355 # its line, otherwise they're not matched here and will be taken as
356 # inline later.
357 # * List "b" is made of tags which are always block-level;
359 $block_tags_a = 'ins|del';
360 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
361 'script|noscript|form|fieldset|iframe|math';
363 # Regular expression for the content of a block tag.
364 $nested_tags_level = 4;
365 $attr = '
366 (?> # optional tag attributes
367 \s # starts with whitespace
369 [^>"/]+ # text outside quotes
371 /+(?!>) # slash not followed by ">"
373 "[^"]*" # text inside double quotes (tolerate ">")
375 \'[^\']*\' # text inside single quotes (tolerate ">")
379 $content =
380 str_repeat('
382 [^<]+ # content without tag
384 <\2 # nested opening tag
385 '.$attr.' # attributes
389 >', $nested_tags_level). # end of opening tag
390 '.*?'. # last level nested tag content
391 str_repeat('
392 </\2\s*> # closing nested tag
395 <(?!/\2\s*> # other tags with a different name
397 )*',
398 $nested_tags_level);
399 $content2 = str_replace('\2', '\3', $content);
401 # First, look for nested blocks, e.g.:
402 # <div>
403 # <div>
404 # tags for inner block must be indented.
405 # </div>
406 # </div>
408 # The outermost tags must start at the left margin for this to match, and
409 # the inner nested divs must be indented.
410 # We need to do this before the next, more liberal match, because the next
411 # match will start at the first `<div>` and stop at the first `</div>`.
412 $text = preg_replace_callback('{(?>
414 (?<=\n\n) # Starting after a blank line
415 | # or
416 \A\n? # the beginning of the doc
418 ( # save in $1
420 # Match from `\n<tag>` to `</tag>\n`, handling nested tags
421 # in between.
423 [ ]{0,'.$less_than_tab.'}
424 <('.$block_tags_b.')# start tag = $2
425 '.$attr.'> # attributes followed by > and \n
426 '.$content.' # content, support nesting
427 </\2> # the matching end tag
428 [ ]* # trailing spaces/tabs
429 (?=\n+|\Z) # followed by a newline or end of document
431 | # Special version for tags of group a.
433 [ ]{0,'.$less_than_tab.'}
434 <('.$block_tags_a.')# start tag = $3
435 '.$attr.'>[ ]*\n # attributes followed by >
436 '.$content2.' # content, support nesting
437 </\3> # the matching end tag
438 [ ]* # trailing spaces/tabs
439 (?=\n+|\Z) # followed by a newline or end of document
441 | # Special case just for <hr />. It was easier to make a special
442 # case than to make the other regex more complicated.
444 [ ]{0,'.$less_than_tab.'}
445 <(hr) # start tag = $2
446 \b # word break
447 ([^<>])*? #
448 /?> # the matching end tag
449 [ ]*
450 (?=\n{2,}|\Z) # followed by a blank line or end of document
452 | # Special case for standalone HTML comments:
454 [ ]{0,'.$less_than_tab.'}
455 (?s:
456 <!-- .*? -->
458 [ ]*
459 (?=\n{2,}|\Z) # followed by a blank line or end of document
461 | # PHP and ASP-style processor instructions (<? and <%)
463 [ ]{0,'.$less_than_tab.'}
464 (?s:
465 <([?%]) # $2
469 [ ]*
470 (?=\n{2,}|\Z) # followed by a blank line or end of document
473 )}Sxmi',
474 array(&$this, '_hashHTMLBlocks_callback'),
475 $text);
477 return $text;
479 function _hashHTMLBlocks_callback($matches) {
480 $text = $matches[1];
481 $key = $this->hashBlock($text);
482 return "\n\n$key\n\n";
486 function hashPart($text, $boundary = 'X') {
488 # Called whenever a tag must be hashed when a function insert an atomic
489 # element in the text stream. Passing $text to through this function gives
490 # a unique text-token which will be reverted back when calling unhash.
492 # The $boundary argument specify what character should be used to surround
493 # the token. By convension, "B" is used for block elements that needs not
494 # to be wrapped into paragraph tags at the end, ":" is used for elements
495 # that are word separators and "S" is used for general span-level elements.
497 # Swap back any tag hash found in $text so we do not have to `unhash`
498 # multiple times at the end.
499 $text = $this->unhash($text);
501 # Then hash the block.
502 static $i = 0;
503 $key = "$boundary\x1A" . ++$i . $boundary;
504 $this->html_hashes[$key] = $text;
505 return $key; # String that will replace the tag.
509 function hashBlock($text) {
511 # Shortcut function for hashPart with block-level boundaries.
513 return $this->hashPart($text, 'B');
517 var $block_gamut = array(
519 # These are all the transformations that form block-level
520 # tags like paragraphs, headers, and list items.
522 "doHeaders" => 10,
523 "doHorizontalRules" => 20,
525 "doLists" => 40,
526 "doCodeBlocks" => 50,
527 "doBlockQuotes" => 60,
530 function runBlockGamut($text) {
532 # Run block gamut tranformations.
534 # We need to escape raw HTML in Markdown source before doing anything
535 # else. This need to be done for each block, and not only at the
536 # begining in the Markdown function since hashed blocks can be part of
537 # list items and could have been indented. Indented blocks would have
538 # been seen as a code block in a previous pass of hashHTMLBlocks.
539 $text = $this->hashHTMLBlocks($text);
541 return $this->runBasicBlockGamut($text);
544 function runBasicBlockGamut($text) {
546 # Run block gamut tranformations, without hashing HTML blocks. This is
547 # useful when HTML blocks are known to be already hashed, like in the first
548 # whole-document pass.
550 foreach ($this->block_gamut as $method => $priority) {
551 $text = $this->$method($text);
554 # Finally form paragraph and restore hashed blocks.
555 $text = $this->formParagraphs($text);
557 return $text;
561 function doHorizontalRules($text) {
562 # Do Horizontal Rules:
563 return preg_replace(
565 ^[ ]{0,3} # Leading space
566 ([*-_]) # $1: First marker
567 (?> # Repeated marker group
568 [ ]{0,2} # Zero, one, or two spaces.
569 \1 # Marker character
570 ){2,} # Group repeated at least twice
571 [ ]* # Tailing spaces
572 $ # End of line.
573 }mx',
574 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
575 $text);
579 var $span_gamut = array(
581 # These are all the transformations that occur *within* block-level
582 # tags like paragraphs, headers, and list items.
584 # Process character escapes, code spans, and inline HTML
585 # in one shot.
586 "parseSpan" => -30,
588 # Process anchor and image tags. Images must come first,
589 # because ![foo][f] looks like an anchor.
590 "doImages" => 10,
591 "doAnchors" => 20,
593 # Make links out of things like `<http://example.com/>`
594 # Must come after doAnchors, because you can use < and >
595 # delimiters in inline links like [this](<url>).
596 "doAutoLinks" => 30,
597 "encodeAmpsAndAngles" => 40,
599 "doItalicsAndBold" => 50,
600 "doHardBreaks" => 60,
603 function runSpanGamut($text) {
605 # Run span gamut tranformations.
607 foreach ($this->span_gamut as $method => $priority) {
608 $text = $this->$method($text);
611 return $text;
615 function doHardBreaks($text) {
616 # Do hard breaks:
617 return preg_replace_callback('/ {2,}\n/',
618 array(&$this, '_doHardBreaks_callback'), $text);
620 function _doHardBreaks_callback($matches) {
621 return $this->hashPart("<br$this->empty_element_suffix\n");
625 function doAnchors($text) {
627 # Turn Markdown link shortcuts into XHTML <a> tags.
629 if ($this->in_anchor) return $text;
630 $this->in_anchor = true;
633 # First, handle reference-style links: [link text] [id]
635 $text = preg_replace_callback('{
636 ( # wrap whole match in $1
638 ('.$this->nested_brackets.') # link text = $2
641 [ ]? # one optional space
642 (?:\n[ ]*)? # one optional newline followed by spaces
645 (.*?) # id = $3
648 }xs',
649 array(&$this, '_doAnchors_reference_callback'), $text);
652 # Next, inline-style links: [link text](url "optional title")
654 $text = preg_replace_callback('{
655 ( # wrap whole match in $1
657 ('.$this->nested_brackets.') # link text = $2
659 \( # literal paren
660 [ ]*
662 <(\S*)> # href = $3
664 ('.$this->nested_url_parenthesis.') # href = $4
666 [ ]*
667 ( # $5
668 ([\'"]) # quote char = $6
669 (.*?) # Title = $7
670 \6 # matching quote
671 [ ]* # ignore any spaces/tabs between closing quote and )
672 )? # title is optional
675 }xs',
676 array(&$this, '_DoAnchors_inline_callback'), $text);
679 # Last, handle reference-style shortcuts: [link text]
680 # These must come last in case you've also got [link test][1]
681 # or [link test](/foo)
683 // $text = preg_replace_callback('{
684 // ( # wrap whole match in $1
685 // \[
686 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
687 // \]
688 // )
689 // }xs',
690 // array(&$this, '_doAnchors_reference_callback'), $text);
692 $this->in_anchor = false;
693 return $text;
695 function _doAnchors_reference_callback($matches) {
696 $whole_match = $matches[1];
697 $link_text = $matches[2];
698 $link_id =& $matches[3];
700 if ($link_id == "") {
701 # for shortcut links like [this][] or [this].
702 $link_id = $link_text;
705 # lower-case and turn embedded newlines into spaces
706 $link_id = strtolower($link_id);
707 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
709 if (isset($this->urls[$link_id])) {
710 $url = $this->urls[$link_id];
711 $url = $this->encodeAmpsAndAngles($url);
713 $result = "<a href=\"$url\"";
714 if ( isset( $this->titles[$link_id] ) ) {
715 $title = $this->titles[$link_id];
716 $title = $this->encodeAmpsAndAngles($title);
717 $result .= " title=\"$title\"";
720 $link_text = $this->runSpanGamut($link_text);
721 $result .= ">$link_text</a>";
722 $result = $this->hashPart($result);
724 else {
725 $result = $whole_match;
727 return $result;
729 function _doAnchors_inline_callback($matches) {
730 $whole_match = $matches[1];
731 $link_text = $this->runSpanGamut($matches[2]);
732 $url = $matches[3] == '' ? $matches[4] : $matches[3];
733 $title =& $matches[7];
735 $url = $this->encodeAmpsAndAngles($url);
737 $result = "<a href=\"$url\"";
738 if (isset($title)) {
739 $title = str_replace('"', '&quot;', $title);
740 $title = $this->encodeAmpsAndAngles($title);
741 $result .= " title=\"$title\"";
744 $link_text = $this->runSpanGamut($link_text);
745 $result .= ">$link_text</a>";
747 return $this->hashPart($result);
751 function doImages($text) {
753 # Turn Markdown image shortcuts into <img> tags.
756 # First, handle reference-style labeled images: ![alt text][id]
758 $text = preg_replace_callback('{
759 ( # wrap whole match in $1
761 ('.$this->nested_brackets.') # alt text = $2
764 [ ]? # one optional space
765 (?:\n[ ]*)? # one optional newline followed by spaces
768 (.*?) # id = $3
772 }xs',
773 array(&$this, '_doImages_reference_callback'), $text);
776 # Next, handle inline images: ![alt text](url "optional title")
777 # Don't forget: encode * and _
779 $text = preg_replace_callback('{
780 ( # wrap whole match in $1
782 ('.$this->nested_brackets.') # alt text = $2
784 \s? # One optional whitespace character
785 \( # literal paren
786 [ ]*
788 <(\S*)> # src url = $3
790 ('.$this->nested_url_parenthesis.') # src url = $4
792 [ ]*
793 ( # $5
794 ([\'"]) # quote char = $6
795 (.*?) # title = $7
796 \6 # matching quote
797 [ ]*
798 )? # title is optional
801 }xs',
802 array(&$this, '_doImages_inline_callback'), $text);
804 return $text;
806 function _doImages_reference_callback($matches) {
807 $whole_match = $matches[1];
808 $alt_text = $matches[2];
809 $link_id = strtolower($matches[3]);
811 if ($link_id == "") {
812 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
815 $alt_text = str_replace('"', '&quot;', $alt_text);
816 if (isset($this->urls[$link_id])) {
817 $url = $this->urls[$link_id];
818 $result = "<img src=\"$url\" alt=\"$alt_text\"";
819 if (isset($this->titles[$link_id])) {
820 $title = $this->titles[$link_id];
821 $result .= " title=\"$title\"";
823 $result .= $this->empty_element_suffix;
824 $result = $this->hashPart($result);
826 else {
827 # If there's no such link ID, leave intact:
828 $result = $whole_match;
831 return $result;
833 function _doImages_inline_callback($matches) {
834 $whole_match = $matches[1];
835 $alt_text = $matches[2];
836 $url = $matches[3] == '' ? $matches[4] : $matches[3];
837 $title =& $matches[7];
839 $alt_text = str_replace('"', '&quot;', $alt_text);
840 $result = "<img src=\"$url\" alt=\"$alt_text\"";
841 if (isset($title)) {
842 $title = str_replace('"', '&quot;', $title);
843 $result .= " title=\"$title\""; # $title already quoted
845 $result .= $this->empty_element_suffix;
847 return $this->hashPart($result);
851 function doHeaders($text) {
852 # Setext-style headers:
853 # Header 1
854 # ========
856 # Header 2
857 # --------
859 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
860 array(&$this, '_doHeaders_callback_setext'), $text);
862 # atx-style headers:
863 # # Header 1
864 # ## Header 2
865 # ## Header 2 with closing hashes ##
866 # ...
867 # ###### Header 6
869 $text = preg_replace_callback('{
870 ^(\#{1,6}) # $1 = string of #\'s
871 [ ]*
872 (.+?) # $2 = Header text
873 [ ]*
874 \#* # optional closing #\'s (not counted)
876 }xm',
877 array(&$this, '_doHeaders_callback_atx'), $text);
879 return $text;
881 function _doHeaders_callback_setext($matches) {
882 $level = $matches[2]{0} == '=' ? 1 : 2;
883 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
884 return "\n" . $this->hashBlock($block) . "\n\n";
886 function _doHeaders_callback_atx($matches) {
887 $level = strlen($matches[1]);
888 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
889 return "\n" . $this->hashBlock($block) . "\n\n";
893 function doLists($text) {
895 # Form HTML ordered (numbered) and unordered (bulleted) lists.
897 $less_than_tab = $this->tab_width - 1;
899 # Re-usable patterns to match list item bullets and number markers:
900 $marker_ul = '[*+-]';
901 $marker_ol = '\d+[.]';
902 $marker_any = "(?:$marker_ul|$marker_ol)";
904 $markers = array($marker_ul, $marker_ol);
906 foreach ($markers as $marker) {
907 # Re-usable pattern to match any entirel ul or ol list:
908 $whole_list = '
909 ( # $1 = whole list
910 ( # $2
911 [ ]{0,'.$less_than_tab.'}
912 ('.$marker.') # $3 = first list item marker
913 [ ]+
915 (?s:.+?)
916 ( # $4
919 \n{2,}
920 (?=\S)
921 (?! # Negative lookahead for another list item marker
922 [ ]*
923 '.$marker.'[ ]+
927 '; // mx
929 # We use a different prefix before nested lists than top-level lists.
930 # See extended comment in _ProcessListItems().
932 if ($this->list_level) {
933 $text = preg_replace_callback('{
935 '.$whole_list.'
936 }mx',
937 array(&$this, '_doLists_callback'), $text);
939 else {
940 $text = preg_replace_callback('{
941 (?:(?<=\n)\n|\A\n?) # Must eat the newline
942 '.$whole_list.'
943 }mx',
944 array(&$this, '_doLists_callback'), $text);
948 return $text;
950 function _doLists_callback($matches) {
951 # Re-usable patterns to match list item bullets and number markers:
952 $marker_ul = '[*+-]';
953 $marker_ol = '\d+[.]';
954 $marker_any = "(?:$marker_ul|$marker_ol)";
956 $list = $matches[1];
957 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
959 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
961 $list .= "\n";
962 $result = $this->processListItems($list, $marker_any);
964 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
965 return "\n". $result ."\n\n";
968 var $list_level = 0;
970 function processListItems($list_str, $marker_any) {
972 # Process the contents of a single ordered or unordered list, splitting it
973 # into individual list items.
975 # The $this->list_level global keeps track of when we're inside a list.
976 # Each time we enter a list, we increment it; when we leave a list,
977 # we decrement. If it's zero, we're not in a list anymore.
979 # We do this because when we're not inside a list, we want to treat
980 # something like this:
982 # I recommend upgrading to version
983 # 8. Oops, now this line is treated
984 # as a sub-list.
986 # As a single paragraph, despite the fact that the second line starts
987 # with a digit-period-space sequence.
989 # Whereas when we're inside a list (or sub-list), that line will be
990 # treated as the start of a sub-list. What a kludge, huh? This is
991 # an aspect of Markdown's syntax that's hard to parse perfectly
992 # without resorting to mind-reading. Perhaps the solution is to
993 # change the syntax rules such that sub-lists must start with a
994 # starting cardinal number; e.g. "1." or "a.".
996 $this->list_level++;
998 # trim trailing blank lines:
999 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1001 $list_str = preg_replace_callback('{
1002 (\n)? # leading line = $1
1003 (^[ ]*) # leading whitespace = $2
1004 ('.$marker_any.') [ ]+ # list marker = $3
1005 ((?s:.+?)) # list item text = $4
1006 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1007 (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
1008 }xm',
1009 array(&$this, '_processListItems_callback'), $list_str);
1011 $this->list_level--;
1012 return $list_str;
1014 function _processListItems_callback($matches) {
1015 $item = $matches[4];
1016 $leading_line =& $matches[1];
1017 $leading_space =& $matches[2];
1018 $tailing_blank_line =& $matches[5];
1020 if ($leading_line || $tailing_blank_line ||
1021 preg_match('/\n{2,}/', $item))
1023 $item = $this->runBlockGamut($this->outdent($item)."\n");
1025 else {
1026 # Recursion for sub-lists:
1027 $item = $this->doLists($this->outdent($item));
1028 $item = preg_replace('/\n+$/', '', $item);
1029 $item = $this->runSpanGamut($item);
1032 return "<li>" . $item . "</li>\n";
1036 function doCodeBlocks($text) {
1038 # Process Markdown `<pre><code>` blocks.
1040 $text = preg_replace_callback('{
1041 (?:\n\n|\A)
1042 ( # $1 = the code block -- one or more lines, starting with a space/tab
1044 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
1045 .*\n+
1048 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1049 }xm',
1050 array(&$this, '_doCodeBlocks_callback'), $text);
1052 return $text;
1054 function _doCodeBlocks_callback($matches) {
1055 $codeblock = $matches[1];
1057 $codeblock = $this->outdent($codeblock);
1058 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1060 # trim leading newlines and trailing newlines
1061 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1063 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1064 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1068 function makeCodeSpan($code) {
1070 # Create a code span markup for $code. Called from handleSpanToken.
1072 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1073 return $this->hashPart("<code>$code</code>");
1077 function doItalicsAndBold($text) {
1078 # <strong> must go first:
1079 $text = preg_replace_callback('{
1080 ( # $1: Marker
1081 (?<!\*\*) \* | # (not preceded by two chars of
1082 (?<!__) _ # the same marker)
1085 (?=\S) # Not followed by whitespace
1086 (?!\1\1) # or two others marker chars.
1087 ( # $2: Content
1089 [^*_]+? # Anthing not em markers.
1091 # Balence any regular emphasis inside.
1092 \1 (?=\S) .+? (?<=\S) \1
1094 . # Allow unbalenced * and _.
1097 (?<=\S) \1\1 # End mark not preceded by whitespace.
1098 }sx',
1099 array(&$this, '_doItalicAndBold_strong_callback'), $text);
1100 # Then <em>:
1101 $text = preg_replace_callback(
1102 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
1103 array(&$this, '_doItalicAndBold_em_callback'), $text);
1105 return $text;
1107 function _doItalicAndBold_em_callback($matches) {
1108 $text = $matches[2];
1109 $text = $this->runSpanGamut($text);
1110 return $this->hashPart("<em>$text</em>");
1112 function _doItalicAndBold_strong_callback($matches) {
1113 $text = $matches[2];
1114 $text = $this->runSpanGamut($text);
1115 return $this->hashPart("<strong>$text</strong>");
1119 function doBlockQuotes($text) {
1120 $text = preg_replace_callback('/
1121 ( # Wrap whole match in $1
1123 ^[ ]*>[ ]? # ">" at the start of a line
1124 .+\n # rest of the first line
1125 (.+\n)* # subsequent consecutive lines
1126 \n* # blanks
1129 /xm',
1130 array(&$this, '_doBlockQuotes_callback'), $text);
1132 return $text;
1134 function _doBlockQuotes_callback($matches) {
1135 $bq = $matches[1];
1136 # trim one level of quoting - trim whitespace-only lines
1137 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1138 $bq = $this->runBlockGamut($bq); # recurse
1140 $bq = preg_replace('/^/m', " ", $bq);
1141 # These leading spaces cause problem with <pre> content,
1142 # so we need to fix that:
1143 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1144 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1146 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1148 function _doBlockQuotes_callback2($matches) {
1149 $pre = $matches[1];
1150 $pre = preg_replace('/^ /m', '', $pre);
1151 return $pre;
1155 function formParagraphs($text) {
1157 # Params:
1158 # $text - string to process with html <p> tags
1160 # Strip leading and trailing lines:
1161 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1163 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1166 # Wrap <p> tags and unhashify HTML blocks
1168 foreach ($grafs as $key => $value) {
1169 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1170 # Is a paragraph.
1171 $value = $this->runSpanGamut($value);
1172 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1173 $value .= "</p>";
1174 $grafs[$key] = $this->unhash($value);
1176 else {
1177 # Is a block.
1178 # Modify elements of @grafs in-place...
1179 $graf = $value;
1180 $block = $this->html_hashes[$graf];
1181 $graf = $block;
1182 // if (preg_match('{
1183 // \A
1184 // ( # $1 = <div> tag
1185 // <div \s+
1186 // [^>]*
1187 // \b
1188 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1189 // 1
1190 // \2
1191 // [^>]*
1192 // >
1193 // )
1194 // ( # $3 = contents
1195 // .*
1196 // )
1197 // (</div>) # $4 = closing tag
1198 // \z
1199 // }xs', $block, $matches))
1200 // {
1201 // list(, $div_open, , $div_content, $div_close) = $matches;
1203 // # We can't call Markdown(), because that resets the hash;
1204 // # that initialization code should be pulled into its own sub, though.
1205 // $div_content = $this->hashHTMLBlocks($div_content);
1207 // # Run document gamut methods on the content.
1208 // foreach ($this->document_gamut as $method => $priority) {
1209 // $div_content = $this->$method($div_content);
1210 // }
1212 // $div_open = preg_replace(
1213 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1215 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1216 // }
1217 $grafs[$key] = $graf;
1221 return implode("\n\n", $grafs);
1225 function encodeAmpsAndAngles($text) {
1226 # Smart processing for ampersands and angle brackets that need to be encoded.
1227 if ($this->no_entities) {
1228 $text = str_replace('&', '&amp;', $text);
1229 $text = str_replace('<', '&lt;', $text);
1230 return $text;
1233 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1234 # http://bumppo.net/projects/amputator/
1235 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1236 '&amp;', $text);;
1238 # Encode naked <'s
1239 $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
1241 return $text;
1245 function doAutoLinks($text) {
1246 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1247 array(&$this, '_doAutoLinks_url_callback'), $text);
1249 # Email addresses: <address@domain.foo>
1250 $text = preg_replace_callback('{
1252 (?:mailto:)?
1254 [-.\w\x80-\xFF]+
1256 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1259 }xi',
1260 array(&$this, '_doAutoLinks_email_callback'), $text);
1262 return $text;
1264 function _doAutoLinks_url_callback($matches) {
1265 $url = $this->encodeAmpsAndAngles($matches[1]);
1266 $link = "<a href=\"$url\">$url</a>";
1267 return $this->hashPart($link);
1269 function _doAutoLinks_email_callback($matches) {
1270 $address = $matches[1];
1271 $link = $this->encodeEmailAddress($address);
1272 return $this->hashPart($link);
1276 function encodeEmailAddress($addr) {
1278 # Input: an email address, e.g. "foo@example.com"
1280 # Output: the email address as a mailto link, with each character
1281 # of the address encoded as either a decimal or hex entity, in
1282 # the hopes of foiling most address harvesting spam bots. E.g.:
1284 # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1285 # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1286 # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1287 # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1289 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1290 # With some optimizations by Milian Wolff.
1292 $addr = "mailto:" . $addr;
1293 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1294 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1296 foreach ($chars as $key => $char) {
1297 $ord = ord($char);
1298 # Ignore non-ascii chars.
1299 if ($ord < 128) {
1300 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1301 # roughly 10% raw, 45% hex, 45% dec
1302 # '@' *must* be encoded. I insist.
1303 if ($r > 90 && $char != '@') /* do nothing */;
1304 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1305 else $chars[$key] = '&#'.$ord.';';
1309 $addr = implode('', $chars);
1310 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1311 $addr = "<a href=\"$addr\">$text</a>";
1313 return $addr;
1317 function parseSpan($str) {
1319 # Take the string $str and parse it into tokens, hashing embeded HTML,
1320 # escaped characters and handling code spans.
1322 $output = '';
1324 $regex = '{
1326 \\\\['.preg_quote($this->escape_chars).']
1328 (?<![`\\\\])
1329 `+ # code span marker
1330 '.( $this->no_markup ? '' : '
1332 <!-- .*? --> # comment
1334 <\?.*?\?> | <%.*?%> # processing instruction
1336 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1339 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1342 ').'
1344 }xs';
1346 while (1) {
1348 # Each loop iteration seach for either the next tag, the next
1349 # openning code span marker, or the next escaped character.
1350 # Each token is then passed to handleSpanToken.
1352 $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1354 # Create token from text preceding tag.
1355 if ($parts[0] != "") {
1356 $output .= $parts[0];
1359 # Check if we reach the end.
1360 if (isset($parts[1])) {
1361 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1362 $str = $parts[2];
1364 else {
1365 break;
1369 return $output;
1373 function handleSpanToken($token, &$str) {
1375 # Handle $token provided by parseSpan by determining its nature and
1376 # returning the corresponding value that should replace it.
1378 switch ($token{0}) {
1379 case "\\":
1380 return $this->hashPart("&#". ord($token{1}). ";");
1381 case "`":
1382 # Search for end marker in remaining text.
1383 if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm',
1384 $str, $matches))
1386 $str = $matches[2];
1387 $codespan = $this->makeCodeSpan($matches[1]);
1388 return $this->hashPart($codespan);
1390 return $token; // return as text since no ending marker found.
1391 default:
1392 return $this->hashPart($token);
1397 function outdent($text) {
1399 # Remove one level of line-leading tabs or spaces
1401 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1405 # String length function for detab. `_initDetab` will create a function to
1406 # hanlde UTF-8 if the default function does not exist.
1407 var $utf8_strlen = 'mb_strlen';
1409 function detab($text) {
1411 # Replace tabs with the appropriate amount of space.
1413 # For each line we separate the line in blocks delemited by
1414 # tab characters. Then we reconstruct every line by adding the
1415 # appropriate number of space between each blocks.
1417 $text = preg_replace_callback('/^.*\t.*$/m',
1418 array(&$this, '_detab_callback'), $text);
1420 return $text;
1422 function _detab_callback($matches) {
1423 $line = $matches[0];
1424 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1426 # Split in blocks.
1427 $blocks = explode("\t", $line);
1428 # Add each blocks to the line.
1429 $line = $blocks[0];
1430 unset($blocks[0]); # Do not add first block twice.
1431 foreach ($blocks as $block) {
1432 # Calculate amount of space, insert spaces, insert block.
1433 $amount = $this->tab_width -
1434 $strlen($line, 'UTF-8') % $this->tab_width;
1435 $line .= str_repeat(" ", $amount) . $block;
1437 return $line;
1439 function _initDetab() {
1441 # Check for the availability of the function in the `utf8_strlen` property
1442 # (initially `mb_strlen`). If the function is not available, create a
1443 # function that will loosely count the number of UTF-8 characters with a
1444 # regular expression.
1446 if (function_exists($this->utf8_strlen)) return;
1447 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1448 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1449 $text, $m);');
1453 function unhash($text) {
1455 # Swap back in all the tags hashed by _HashHTMLBlocks.
1457 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1458 array(&$this, '_unhash_callback'), $text);
1460 function _unhash_callback($matches) {
1461 return $this->html_hashes[$matches[0]];
1468 # Markdown Extra Parser Class
1471 class MarkdownExtra_Parser extends Markdown_Parser {
1473 # Prefix for footnote ids.
1474 var $fn_id_prefix = "";
1476 # Optional title attribute for footnote links and backlinks.
1477 var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1478 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1480 # Optional class attribute for footnote links and backlinks.
1481 var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1482 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1485 function MarkdownExtra_Parser() {
1487 # Constructor function. Initialize the parser object.
1489 # Add extra escapable characters before parent constructor
1490 # initialize the table.
1491 $this->escape_chars .= ':|';
1493 # Insert extra document, block, and span transformations.
1494 # Parent constructor will do the sorting.
1495 $this->document_gamut += array(
1496 "stripFootnotes" => 15,
1497 "stripAbbreviations" => 25,
1498 "appendFootnotes" => 50,
1500 $this->block_gamut += array(
1501 "doTables" => 15,
1502 "doDefLists" => 45,
1504 $this->span_gamut += array(
1505 "doFootnotes" => 5,
1506 "doAbbreviations" => 70,
1509 parent::Markdown_Parser();
1513 # Extra hashes used during extra transformations.
1514 var $footnotes = array();
1515 var $footnotes_ordered = array();
1516 var $abbr_desciptions = array();
1517 var $abbr_matches = array();
1519 # Status flag to avoid invalid nesting.
1520 var $in_footnote = false;
1523 function transform($text) {
1525 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
1526 # blank line stripping and added extra parameter to `runBlockGamut`.
1528 # Clear the global hashes. If we don't clear these, you get conflicts
1529 # from other articles when generating a page which contains more than
1530 # one article (e.g. an index page that shows the N most recent
1531 # articles):
1532 $this->footnotes = array();
1533 $this->footnotes_ordered = array();
1534 $this->abbr_desciptions = array();
1535 $this->abbr_matches = array();
1537 return parent::transform($text);
1541 ### HTML Block Parser ###
1543 # Tags that are always treated as block tags:
1544 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1546 # Tags treated as block tags only if the opening tag is alone on it's line:
1547 var $context_block_tags = 'script|noscript|math|ins|del';
1549 # Tags where markdown="1" default to span mode:
1550 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1552 # Tags which must not have their contents modified, no matter where
1553 # they appear:
1554 var $clean_tags = 'script|math';
1556 # Tags that do not need to be closed.
1557 var $auto_close_tags = 'hr|img';
1560 function hashHTMLBlocks($text) {
1562 # Hashify HTML Blocks and "clean tags".
1564 # We only want to do this for block-level HTML tags, such as headers,
1565 # lists, and tables. That's because we still want to wrap <p>s around
1566 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1567 # phrase emphasis, and spans. The list of tags we're looking for is
1568 # hard-coded.
1570 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1571 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1572 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1573 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1574 # These two functions are calling each other. It's recursive!
1577 # Call the HTML-in-Markdown hasher.
1579 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1581 return $text;
1583 function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1584 $enclosing_tag = '', $span = false)
1587 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1589 # * $indent is the number of space to be ignored when checking for code
1590 # blocks. This is important because if we don't take the indent into
1591 # account, something like this (which looks right) won't work as expected:
1593 # <div>
1594 # <div markdown="1">
1595 # Hello World. <-- Is this a Markdown code block or text?
1596 # </div> <-- Is this a Markdown code block or a real tag?
1597 # <div>
1599 # If you don't like this, just don't indent the tag on which
1600 # you apply the markdown="1" attribute.
1602 # * If $enclosing_tag is not empty, stops at the first unmatched closing
1603 # tag with that name. Nested tags supported.
1605 # * If $span is true, text inside must treated as span. So any double
1606 # newline will be replaced by a single newline so that it does not create
1607 # paragraphs.
1609 # Returns an array of that form: ( processed text , remaining text )
1611 if ($text === '') return array('', '');
1613 # Regex to check for the presense of newlines around a block tag.
1614 $newline_match_before = '/(?:^\n?|\n\n)*$/';
1615 $newline_match_after =
1617 ^ # Start of text following the tag.
1618 (?:[ ]*<!--.*?-->)? # Optional comment.
1619 [ ]*\n # Must be followed by newline.
1620 }xs';
1622 # Regex to match any tag.
1623 $block_tag_match =
1625 ( # $2: Capture hole tag.
1626 </? # Any opening or closing tag.
1627 (?: # Tag name.
1628 '.$this->block_tags.' |
1629 '.$this->context_block_tags.' |
1630 '.$this->clean_tags.' |
1631 (?!\s)'.$enclosing_tag.'
1633 \s* # Whitespace.
1635 ".*?" | # Double quotes (can contain `>`)
1636 \'.*?\' | # Single quotes (can contain `>`)
1637 .+? # Anything but quotes and `>`.
1639 > # End of tag.
1641 <!-- .*? --> # HTML Comment
1643 <\?.*?\?> | <%.*?%> # Processing instruction
1645 <!\[CDATA\[.*?\]\]> # CData Block
1647 }xs';
1650 $depth = 0; # Current depth inside the tag tree.
1651 $parsed = ""; # Parsed text that will be returned.
1654 # Loop through every tag until we find the closing tag of the parent
1655 # or loop until reaching the end of text if no parent tag specified.
1657 do {
1659 # Split the text using the first $tag_match pattern found.
1660 # Text before pattern will be first in the array, text after
1661 # pattern will be at the end, and between will be any catches made
1662 # by the pattern.
1664 $parts = preg_split($block_tag_match, $text, 2,
1665 PREG_SPLIT_DELIM_CAPTURE);
1667 # If in Markdown span mode, add a empty-string span-level hash
1668 # after each newline to prevent triggering any block element.
1669 if ($span) {
1670 $void = $this->hashPart("", ':');
1671 $newline = "$void\n";
1672 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1675 $parsed .= $parts[0]; # Text before current tag.
1677 # If end of $text has been reached. Stop loop.
1678 if (count($parts) < 3) {
1679 $text = "";
1680 break;
1683 $tag = $parts[1]; # Tag to handle.
1684 $text = $parts[2]; # Remaining text after current tag.
1687 # Check for: Tag inside code block or span
1689 if (# Find current paragraph
1690 preg_match('/(?>^\n?|\n\n)((?>.+\n?)*?)$/', $parsed, $matches) &&
1692 # Then match in it either a code block...
1693 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
1694 '(?!\n)$/', $matches[1], $x) ||
1695 # ...or unbalenced code span markers. (the regex matches balenced)
1696 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
1697 $matches[1])
1700 # Tag is in code block or span and may not be a tag at all. So we
1701 # simply skip the first char (should be a `<`).
1702 $parsed .= $tag{0};
1703 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
1706 # Check for: Opening Block level tag or
1707 # Opening Content Block tag (like ins and del)
1708 # used as a block tag (tag is alone on it's line).
1710 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
1711 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
1712 preg_match($newline_match_before, $parsed) &&
1713 preg_match($newline_match_after, $text) )
1716 # Need to parse tag and following text using the HTML parser.
1717 list($block_text, $text) =
1718 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1720 # Make sure it stays outside of any paragraph by adding newlines.
1721 $parsed .= "\n\n$block_text\n\n";
1724 # Check for: Clean tag (like script, math)
1725 # HTML Comments, processing instructions.
1727 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
1728 $tag{1} == '!' || $tag{1} == '?')
1730 # Need to parse tag and following text using the HTML parser.
1731 # (don't check for markdown attribute)
1732 list($block_text, $text) =
1733 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1735 $parsed .= $block_text;
1738 # Check for: Tag with same name as enclosing tag.
1740 else if ($enclosing_tag !== '' &&
1741 # Same name as enclosing tag.
1742 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
1745 # Increase/decrease nested tag count.
1747 if ($tag{1} == '/') $depth--;
1748 else if ($tag{strlen($tag)-2} != '/') $depth++;
1750 if ($depth < 0) {
1752 # Going out of parent element. Clean up and break so we
1753 # return to the calling function.
1755 $text = $tag . $text;
1756 break;
1759 $parsed .= $tag;
1761 else {
1762 $parsed .= $tag;
1764 } while ($depth >= 0);
1766 return array($parsed, $text);
1768 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1770 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1772 # * Calls $hash_method to convert any blocks.
1773 # * Stops when the first opening tag closes.
1774 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1775 # (it is not inside clean tags)
1777 # Returns an array of that form: ( processed text , remaining text )
1779 if ($text === '') return array('', '');
1781 # Regex to match `markdown` attribute inside of a tag.
1782 $markdown_attr_match = '
1784 \s* # Eat whitespace before the `markdown` attribute
1785 markdown
1786 \s*=\s*
1788 (["\']) # $1: quote delimiter
1789 (.*?) # $2: attribute value
1790 \1 # matching delimiter
1792 ([^\s>]*) # $3: unquoted attribute value
1794 () # $4: make $3 always defined (avoid warnings)
1795 }xs';
1797 # Regex to match any tag.
1798 $tag_match = '{
1799 ( # $2: Capture hole tag.
1800 </? # Any opening or closing tag.
1801 [\w:$]+ # Tag name.
1802 \s* # Whitespace.
1804 ".*?" | # Double quotes (can contain `>`)
1805 \'.*?\' | # Single quotes (can contain `>`)
1806 .+? # Anything but quotes and `>`.
1808 > # End of tag.
1810 <!-- .*? --> # HTML Comment
1812 <\?.*?\?> | <%.*?%> # Processing instruction
1814 <!\[CDATA\[.*?\]\]> # CData Block
1816 }xs';
1818 $original_text = $text; # Save original text in case of faliure.
1820 $depth = 0; # Current depth inside the tag tree.
1821 $block_text = ""; # Temporary text holder for current text.
1822 $parsed = ""; # Parsed text that will be returned.
1825 # Get the name of the starting tag.
1827 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
1828 $base_tag_name = $matches[1];
1831 # Loop through every tag until we find the corresponding closing tag.
1833 do {
1835 # Split the text using the first $tag_match pattern found.
1836 # Text before pattern will be first in the array, text after
1837 # pattern will be at the end, and between will be any catches made
1838 # by the pattern.
1840 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1842 if (count($parts) < 3) {
1844 # End of $text reached with unbalenced tag(s).
1845 # In that case, we return original text unchanged and pass the
1846 # first character as filtered to prevent an infinite loop in the
1847 # parent function.
1849 return array($original_text{0}, substr($original_text, 1));
1852 $block_text .= $parts[0]; # Text before current tag.
1853 $tag = $parts[1]; # Tag to handle.
1854 $text = $parts[2]; # Remaining text after current tag.
1857 # Check for: Auto-close tag (like <hr/>)
1858 # Comments and Processing Instructions.
1860 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||
1861 $tag{1} == '!' || $tag{1} == '?')
1863 # Just add the tag to the block as if it was text.
1864 $block_text .= $tag;
1866 else {
1868 # Increase/decrease nested tag count. Only do so if
1869 # the tag's name match base tag's.
1871 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
1872 if ($tag{1} == '/') $depth--;
1873 else if ($tag{strlen($tag)-2} != '/') $depth++;
1877 # Check for `markdown="1"` attribute and handle it.
1879 if ($md_attr &&
1880 preg_match($markdown_attr_match, $tag, $attr_m) &&
1881 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
1883 # Remove `markdown` attribute from opening tag.
1884 $tag = preg_replace($markdown_attr_match, '', $tag);
1886 # Check if text inside this tag must be parsed in span mode.
1887 $this->mode = $attr_m[2] . $attr_m[3];
1888 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
1889 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
1891 # Calculate indent before tag.
1892 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
1893 $indent = strlen($matches[1]);
1895 # End preceding block with this tag.
1896 $block_text .= $tag;
1897 $parsed .= $this->$hash_method($block_text);
1899 # Get enclosing tag name for the ParseMarkdown function.
1900 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
1901 $tag_name = $matches[1];
1903 # Parse the content using the HTML-in-Markdown parser.
1904 list ($block_text, $text)
1905 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
1906 $tag_name, $span_mode);
1908 # Outdent markdown text.
1909 if ($indent > 0) {
1910 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
1911 $block_text);
1914 # Append tag content to parsed text.
1915 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
1916 else $parsed .= "$block_text";
1918 # Start over a new block.
1919 $block_text = "";
1921 else $block_text .= $tag;
1924 } while ($depth > 0);
1927 # Hash last block text that wasn't processed inside the loop.
1929 $parsed .= $this->$hash_method($block_text);
1931 return array($parsed, $text);
1935 function hashClean($text) {
1937 # Called whenever a tag must be hashed when a function insert a "clean" tag
1938 # in $text, it pass through this function and is automaticaly escaped,
1939 # blocking invalid nested overlap.
1941 return $this->hashPart($text, 'C');
1945 function doHeaders($text) {
1947 # Redefined to add id attribute support.
1949 # Setext-style headers:
1950 # Header 1 {#header1}
1951 # ========
1953 # Header 2 {#header2}
1954 # --------
1956 $text = preg_replace_callback(
1958 (^.+?) # $1: Header text
1959 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute
1960 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
1961 }mx',
1962 array(&$this, '_doHeaders_callback_setext'), $text);
1964 # atx-style headers:
1965 # # Header 1 {#header1}
1966 # ## Header 2 {#header2}
1967 # ## Header 2 with closing hashes ## {#header3}
1968 # ...
1969 # ###### Header 6 {#header2}
1971 $text = preg_replace_callback('{
1972 ^(\#{1,6}) # $1 = string of #\'s
1973 [ ]*
1974 (.+?) # $2 = Header text
1975 [ ]*
1976 \#* # optional closing #\'s (not counted)
1977 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
1978 [ ]*
1980 }xm',
1981 array(&$this, '_doHeaders_callback_atx'), $text);
1983 return $text;
1985 function _doHeaders_attr($attr) {
1986 if (empty($attr)) return "";
1987 return " id=\"$attr\"";
1989 function _doHeaders_callback_setext($matches) {
1990 $level = $matches[3]{0} == '=' ? 1 : 2;
1991 $attr = $this->_doHeaders_attr($id =& $matches[2]);
1992 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
1993 return "\n" . $this->hashBlock($block) . "\n\n";
1995 function _doHeaders_callback_atx($matches) {
1996 $level = strlen($matches[1]);
1997 $attr = $this->_doHeaders_attr($id =& $matches[3]);
1998 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1999 return "\n" . $this->hashBlock($block) . "\n\n";
2003 function doTables($text) {
2005 # Form HTML tables.
2007 $less_than_tab = $this->tab_width - 1;
2009 # Find tables with leading pipe.
2011 # | Header 1 | Header 2
2012 # | -------- | --------
2013 # | Cell 1 | Cell 2
2014 # | Cell 3 | Cell 4
2016 $text = preg_replace_callback('
2018 ^ # Start of a line
2019 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2020 [|] # Optional leading pipe (present)
2021 (.+) \n # $1: Header row (at least one pipe)
2023 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2024 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2026 ( # $3: Cells
2028 [ ]* # Allowed whitespace.
2029 [|] .* \n # Row content.
2032 (?=\n|\Z) # Stop at final double newline.
2033 }xm',
2034 array(&$this, '_doTable_leadingPipe_callback'), $text);
2037 # Find tables without leading pipe.
2039 # Header 1 | Header 2
2040 # -------- | --------
2041 # Cell 1 | Cell 2
2042 # Cell 3 | Cell 4
2044 $text = preg_replace_callback('
2046 ^ # Start of a line
2047 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2048 (\S.*[|].*) \n # $1: Header row (at least one pipe)
2050 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2051 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2053 ( # $3: Cells
2055 .* [|] .* \n # Row content
2058 (?=\n|\Z) # Stop at final double newline.
2059 }xm',
2060 array(&$this, '_DoTable_callback'), $text);
2062 return $text;
2064 function _doTable_leadingPipe_callback($matches) {
2065 $head = $matches[1];
2066 $underline = $matches[2];
2067 $content = $matches[3];
2069 # Remove leading pipe for each row.
2070 $content = preg_replace('/^ *[|]/m', '', $content);
2072 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2074 function _doTable_callback($matches) {
2075 $head = $matches[1];
2076 $underline = $matches[2];
2077 $content = $matches[3];
2079 # Remove any tailing pipes for each line.
2080 $head = preg_replace('/[|] *$/m', '', $head);
2081 $underline = preg_replace('/[|] *$/m', '', $underline);
2082 $content = preg_replace('/[|] *$/m', '', $content);
2084 # Reading alignement from header underline.
2085 $separators = preg_split('/ *[|] */', $underline);
2086 foreach ($separators as $n => $s) {
2087 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
2088 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2089 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2090 else $attr[$n] = '';
2093 # Parsing span elements, including code spans, character escapes,
2094 # and inline HTML tags, so that pipes inside those gets ignored.
2095 $head = $this->parseSpan($head);
2096 $headers = preg_split('/ *[|] */', $head);
2097 $col_count = count($headers);
2099 # Write column headers.
2100 $text = "<table>\n";
2101 $text .= "<thead>\n";
2102 $text .= "<tr>\n";
2103 foreach ($headers as $n => $header)
2104 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2105 $text .= "</tr>\n";
2106 $text .= "</thead>\n";
2108 # Split content by row.
2109 $rows = explode("\n", trim($content, "\n"));
2111 $text .= "<tbody>\n";
2112 foreach ($rows as $row) {
2113 # Parsing span elements, including code spans, character escapes,
2114 # and inline HTML tags, so that pipes inside those gets ignored.
2115 $row = $this->parseSpan($row);
2117 # Split row by cell.
2118 $row_cells = preg_split('/ *[|] */', $row, $col_count);
2119 $row_cells = array_pad($row_cells, $col_count, '');
2121 $text .= "<tr>\n";
2122 foreach ($row_cells as $n => $cell)
2123 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2124 $text .= "</tr>\n";
2126 $text .= "</tbody>\n";
2127 $text .= "</table>";
2129 return $this->hashBlock($text) . "\n";
2133 function doDefLists($text) {
2135 # Form HTML definition lists.
2137 $less_than_tab = $this->tab_width - 1;
2139 # Re-usable pattern to match any entire dl list:
2140 $whole_list = '(?>
2141 ( # $1 = whole list
2142 ( # $2
2143 [ ]{0,'.$less_than_tab.'}
2144 ((?>.*\S.*\n)+) # $3 = defined term
2146 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2148 (?s:.+?)
2149 ( # $4
2152 \n{2,}
2153 (?=\S)
2154 (?! # Negative lookahead for another term
2155 [ ]{0,'.$less_than_tab.'}
2156 (?: \S.*\n )+? # defined term
2158 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2160 (?! # Negative lookahead for another definition
2161 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2165 )'; // mx
2167 $text = preg_replace_callback('{
2168 (?:(?<=\n\n)|\A\n?)
2169 '.$whole_list.'
2170 }mx',
2171 array(&$this, '_doDefLists_callback'), $text);
2173 return $text;
2175 function _doDefLists_callback($matches) {
2176 # Re-usable patterns to match list item bullets and number markers:
2177 $list = $matches[1];
2179 # Turn double returns into triple returns, so that we can make a
2180 # paragraph for the last item in a list, if necessary:
2181 $result = trim($this->processDefListItems($list));
2182 $result = "<dl>\n" . $result . "\n</dl>";
2183 return $this->hashBlock($result) . "\n\n";
2187 function processDefListItems($list_str) {
2189 # Process the contents of a single definition list, splitting it
2190 # into individual term and definition list items.
2192 $less_than_tab = $this->tab_width - 1;
2194 # trim trailing blank lines:
2195 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2197 # Process definition terms.
2198 $list_str = preg_replace_callback('{
2199 (?:\n\n+|\A\n?) # leading line
2200 ( # definition terms = $1
2201 [ ]{0,'.$less_than_tab.'} # leading whitespace
2202 (?![:][ ]|[ ]) # negative lookahead for a definition
2203 # mark (colon) or more whitespace.
2204 (?: \S.* \n)+? # actual term (not whitespace).
2206 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2207 # with a definition mark.
2208 }xm',
2209 array(&$this, '_processDefListItems_callback_dt'), $list_str);
2211 # Process actual definitions.
2212 $list_str = preg_replace_callback('{
2213 \n(\n+)? # leading line = $1
2214 [ ]{0,'.$less_than_tab.'} # whitespace before colon
2215 [:][ ]+ # definition mark (colon)
2216 ((?s:.+?)) # definition text = $2
2217 (?= \n+ # stop at next definition mark,
2218 (?: # next term or end of text
2219 [ ]{0,'.$less_than_tab.'} [:][ ] |
2220 <dt> | \z
2223 }xm',
2224 array(&$this, '_processDefListItems_callback_dd'), $list_str);
2226 return $list_str;
2228 function _processDefListItems_callback_dt($matches) {
2229 $terms = explode("\n", trim($matches[1]));
2230 $text = '';
2231 foreach ($terms as $term) {
2232 $term = $this->runSpanGamut(trim($term));
2233 $text .= "\n<dt>" . $term . "</dt>";
2235 return $text . "\n";
2237 function _processDefListItems_callback_dd($matches) {
2238 $leading_line = $matches[1];
2239 $def = $matches[2];
2241 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2242 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2243 $def = "\n". $def ."\n";
2245 else {
2246 $def = rtrim($def);
2247 $def = $this->runSpanGamut($this->outdent($def));
2250 return "\n<dd>" . $def . "</dd>\n";
2254 function doItalicsAndBold($text) {
2256 # Redefined to change emphasis by underscore behaviour so that it does not
2257 # work in the middle of a word.
2259 # <strong> must go first:
2260 $text = preg_replace_callback(array(
2262 ( # $1: Marker
2263 (?<![a-zA-Z0-9]) # Not preceded by alphanum
2264 (?<!__) # or by two marker chars.
2267 (?=\S) # Not followed by whitespace
2268 (?!__) # or two others marker chars.
2269 ( # $2: Content
2271 [^_]+? # Anthing not em markers.
2273 # Balence any regular _ emphasis inside.
2274 (?<![a-zA-Z0-9]) _ (?=\S) (.+?)
2275 (?<=\S) _ (?![a-zA-Z0-9])
2277 _+ # Allow unbalenced as last resort.
2280 (?<=\S) __ # End mark not preceded by whitespace.
2281 (?![a-zA-Z0-9]) # Not followed by alphanum
2282 (?!__) # or two others marker chars.
2283 }sx',
2285 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
2286 (?=\S) # Not followed by whitespace
2287 (?!\1) # or two others marker chars.
2288 ( # $2: Content
2290 [^*]+? # Anthing not em markers.
2292 # Balence any regular * emphasis inside.
2293 \* (?=\S) (.+?) (?<=\S) \*
2295 \* # Allow unbalenced as last resort.
2298 (?<=\S) \*\* # End mark not preceded by whitespace.
2299 }sx',
2301 array(&$this, '_doItalicAndBold_strong_callback'), $text);
2302 # Then <em>:
2303 $text = preg_replace_callback(array(
2304 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
2305 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx',
2307 array(&$this, '_doItalicAndBold_em_callback'), $text);
2309 return $text;
2313 function formParagraphs($text) {
2315 # Params:
2316 # $text - string to process with html <p> tags
2318 # Strip leading and trailing lines:
2319 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2321 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2324 # Wrap <p> tags and unhashify HTML blocks
2326 foreach ($grafs as $key => $value) {
2327 $value = trim($this->runSpanGamut($value));
2329 # Check if this should be enclosed in a paragraph.
2330 # Clean tag hashes & block tag hashes are left alone.
2331 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2333 if ($is_p) {
2334 $value = "<p>$value</p>";
2336 $grafs[$key] = $value;
2339 # Join grafs in one text, then unhash HTML tags.
2340 $text = implode("\n\n", $grafs);
2342 # Finish by removing any tag hashes still present in $text.
2343 $text = $this->unhash($text);
2345 return $text;
2349 ### Footnotes
2351 function stripFootnotes($text) {
2353 # Strips link definitions from text, stores the URLs and titles in
2354 # hash references.
2356 $less_than_tab = $this->tab_width - 1;
2358 # Link defs are in the form: [^id]: url "optional title"
2359 $text = preg_replace_callback('{
2360 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2361 [ ]*
2362 \n? # maybe *one* newline
2363 ( # text = $2 (no blank lines allowed)
2364 (?:
2365 .+ # actual text
2367 \n # newlines but
2368 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2369 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2370 # by non-indented content
2373 }xm',
2374 array(&$this, '_stripFootnotes_callback'),
2375 $text);
2376 return $text;
2378 function _stripFootnotes_callback($matches) {
2379 $note_id = $this->fn_id_prefix . $matches[1];
2380 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2381 return ''; # String that will replace the block
2385 function doFootnotes($text) {
2387 # Replace footnote references in $text [^id] with a special text-token
2388 # which will be can be
2390 if (!$this->in_footnote && !$this->in_anchor) {
2391 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2393 return $text;
2397 function appendFootnotes($text) {
2399 # Append footnote list to text.
2402 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2403 array(&$this, '_appendFootnotes_callback'), $text);
2405 if (!empty($this->footnotes_ordered)) {
2406 $text .= "\n\n";
2407 $text .= "<div class=\"footnotes\">\n";
2408 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
2409 $text .= "<ol>\n\n";
2411 $attr = " rev=\"footnote\"";
2412 if ($this->fn_backlink_class != "") {
2413 $class = $this->fn_backlink_class;
2414 $class = $this->encodeAmpsAndAngles($class);
2415 $class = str_replace('"', '&quot;', $class);
2416 $attr .= " class=\"$class\"";
2418 if ($this->fn_backlink_title != "") {
2419 $title = $this->fn_backlink_title;
2420 $title = $this->encodeAmpsAndAngles($title);
2421 $title = str_replace('"', '&quot;', $title);
2422 $attr .= " title=\"$title\"";
2424 $num = 0;
2426 $this->in_footnote = true;
2428 foreach ($this->footnotes_ordered as $note_id => $footnote) {
2429 $footnote .= "\n"; # Need to append newline before parsing.
2430 $footnote = $this->runBlockGamut("$footnote\n");
2432 $attr2 = str_replace("%%", ++$num, $attr);
2434 # Add backlink to last paragraph; create new paragraph if needed.
2435 $backlink = "<a href=\"#fnref:$note_id\"$attr2>&#8617;</a>";
2436 if (preg_match('{</p>$}', $footnote)) {
2437 $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2438 } else {
2439 $footnote .= "\n\n<p>$backlink</p>";
2442 $text .= "<li id=\"fn:$note_id\">\n";
2443 $text .= $footnote . "\n";
2444 $text .= "</li>\n\n";
2447 $this->in_footnote = false;
2449 $text .= "</ol>\n";
2450 $text .= "</div>";
2452 return $text;
2454 function _appendFootnotes_callback($matches) {
2455 $node_id = $this->fn_id_prefix . $matches[1];
2457 # Create footnote marker only if it has a corresponding footnote *and*
2458 # the footnote hasn't been used by another marker.
2459 if (isset($this->footnotes[$node_id])) {
2460 # Transfert footnote content to the ordered list.
2461 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2462 unset($this->footnotes[$node_id]);
2464 $num = count($this->footnotes_ordered);
2465 $attr = " rel=\"footnote\"";
2466 if ($this->fn_link_class != "") {
2467 $class = $this->fn_link_class;
2468 $class = $this->encodeAmpsAndAngles($class);
2469 $class = str_replace('"', '&quot;', $class);
2470 $attr .= " class=\"$class\"";
2472 if ($this->fn_link_title != "") {
2473 $title = $this->fn_link_title;
2474 $title = $this->encodeAmpsAndAngles($title);
2475 $title = str_replace('"', '&quot;', $title);
2476 $attr .= " title=\"$title\"";
2478 $attr = str_replace("%%", $num, $attr);
2480 return
2481 "<sup id=\"fnref:$node_id\">".
2482 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2483 "</sup>";
2486 return "[^".$matches[1]."]";
2490 ### Abbreviations ###
2492 function stripAbbreviations($text) {
2494 # Strips abbreviations from text, stores titles in hash references.
2496 $less_than_tab = $this->tab_width - 1;
2498 # Link defs are in the form: [id]*: url "optional title"
2499 $text = preg_replace_callback('{
2500 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
2501 (.*) # text = $2 (no blank lines allowed)
2502 }xm',
2503 array(&$this, '_stripAbbreviations_callback'),
2504 $text);
2505 return $text;
2507 function _stripAbbreviations_callback($matches) {
2508 $abbr_word = $matches[1];
2509 $abbr_desc = $matches[2];
2510 $this->abbr_matches[] = preg_quote($abbr_word);
2511 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2512 return ''; # String that will replace the block
2516 function doAbbreviations($text) {
2518 # Find defined abbreviations in text and wrap them in <abbr> elements.
2520 if ($this->abbr_matches) {
2521 // cannot use the /x modifier because abbr_matches may
2522 // contain spaces:
2523 $text = preg_replace_callback('{'.
2524 '(?<![\w\x1A])'.
2525 '(?:'. implode('|', $this->abbr_matches) .')'.
2526 '(?![\w\x1A])'.
2527 '}',
2528 array(&$this, '_doAbbreviations_callback'), $text);
2530 return $text;
2532 function _doAbbreviations_callback($matches) {
2533 $abbr = $matches[0];
2534 if (isset($this->abbr_desciptions[$abbr])) {
2535 $desc = $this->abbr_desciptions[$abbr];
2536 if (empty($desc)) {
2537 return $this->hashPart("<abbr>$abbr</abbr>");
2538 } else {
2539 $desc = htmlspecialchars($desc, ENT_NOQUOTES);
2540 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
2542 } else {
2543 return $matches[0];
2552 PHP Markdown Extra
2553 ==================
2555 Description
2556 -----------
2558 This is a PHP port of the original Markdown formatter written in Perl
2559 by John Gruber. This special "Extra" version of PHP Markdown features
2560 further enhancements to the syntax for making additional constructs
2561 such as tables and definition list.
2563 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2564 easy-to-write structured text format into HTML. Markdown's text format
2565 is most similar to that of plain text email, and supports features such
2566 as headers, *emphasis*, code blocks, blockquotes, and links.
2568 Markdown's syntax is designed not as a generic markup language, but
2569 specifically to serve as a front-end to (X)HTML. You can use span-level
2570 HTML tags anywhere in a Markdown document, and you can use block level
2571 HTML tags (like <div> and <table> as well).
2573 For more information about Markdown's syntax, see:
2575 <http://daringfireball.net/projects/markdown/>
2578 Bugs
2579 ----
2581 To file bug reports please send email to:
2583 <michel.fortin@michelf.com>
2585 Please include with your report: (1) the example input; (2) the output you
2586 expected; (3) the output Markdown actually produced.
2589 Version History
2590 ---------------
2592 See the readme file for detailed release notes for this version.
2595 Copyright and License
2596 ---------------------
2598 PHP Markdown & Extra
2599 Copyright (c) 2004-2007 Michel Fortin
2600 <http://www.michelf.com/>
2601 All rights reserved.
2603 Based on Markdown
2604 Copyright (c) 2003-2006 John Gruber
2605 <http://daringfireball.net/>
2606 All rights reserved.
2608 Redistribution and use in source and binary forms, with or without
2609 modification, are permitted provided that the following conditions are
2610 met:
2612 * Redistributions of source code must retain the above copyright notice,
2613 this list of conditions and the following disclaimer.
2615 * Redistributions in binary form must reproduce the above copyright
2616 notice, this list of conditions and the following disclaimer in the
2617 documentation and/or other materials provided with the distribution.
2619 * Neither the name "Markdown" nor the names of its contributors may
2620 be used to endorse or promote products derived from this software
2621 without specific prior written permission.
2623 This software is provided by the copyright holders and contributors "as
2624 is" and any express or implied warranties, including, but not limited
2625 to, the implied warranties of merchantability and fitness for a
2626 particular purpose are disclaimed. In no event shall the copyright owner
2627 or contributors be liable for any direct, indirect, incidental, special,
2628 exemplary, or consequential damages (including, but not limited to,
2629 procurement of substitute goods or services; loss of use, data, or
2630 profits; or business interruption) however caused and on any theory of
2631 liability, whether in contract, strict liability, or tort (including
2632 negligence or otherwise) arising in any way out of the use of this
2633 software, even if advised of the possibility of such damage.