3 # Markdown Extra - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2015 Michel Fortin
7 # <https://michelf.ca/projects/php-markdown/>
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
17 # Markdown Extra Parser Class
20 class MarkdownExtra
extends \Michelf\Markdown
{
22 ### Configuration Variables ###
24 # Prefix for footnote ids.
25 public $fn_id_prefix = "";
27 # Optional title attribute for footnote links and backlinks.
28 public $fn_link_title = "";
29 public $fn_backlink_title = "";
31 # Optional class attribute for footnote links and backlinks.
32 public $fn_link_class = "footnote-ref";
33 public $fn_backlink_class = "footnote-backref";
35 # Class name for table cell alignment (%% replaced left/center/right)
36 # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
37 # If empty, the align attribute is used instead of a class name.
38 public $table_align_class_tmpl = '';
40 # Optional class prefix for fenced code block.
41 public $code_class_prefix = "";
42 # Class attribute for code blocks goes on the `code` tag;
43 # setting this to true will put attributes on the `pre` tag instead.
44 public $code_attr_on_pre = false;
46 # Predefined abbreviations.
47 public $predef_abbr = array();
49 ### Parser Implementation ###
51 public function __construct() {
53 # Constructor function. Initialize the parser object.
55 # Add extra escapable characters before parent constructor
56 # initialize the table.
57 $this->escape_chars
.= ':|';
59 # Insert extra document, block, and span transformations.
60 # Parent constructor will do the sorting.
61 $this->document_gamut +
= array(
62 "doFencedCodeBlocks" => 5,
63 "stripFootnotes" => 15,
64 "stripAbbreviations" => 25,
65 "appendFootnotes" => 50,
67 $this->block_gamut +
= array(
68 "doFencedCodeBlocks" => 5,
72 $this->span_gamut +
= array(
74 "doAbbreviations" => 70,
77 $this->enhanced_ordered_list
= true;
78 parent
::__construct();
82 # Extra variables used during extra transformations.
83 protected $footnotes = array();
84 protected $footnotes_ordered = array();
85 protected $footnotes_ref_count = array();
86 protected $footnotes_numbers = array();
87 protected $abbr_desciptions = array();
88 protected $abbr_word_re = '';
90 # Give the current footnote number.
91 protected $footnote_counter = 1;
94 protected function setup() {
96 # Setting up Extra-specific variables.
100 $this->footnotes
= array();
101 $this->footnotes_ordered
= array();
102 $this->footnotes_ref_count
= array();
103 $this->footnotes_numbers
= array();
104 $this->abbr_desciptions
= array();
105 $this->abbr_word_re
= '';
106 $this->footnote_counter
= 1;
108 foreach ($this->predef_abbr
as $abbr_word => $abbr_desc) {
109 if ($this->abbr_word_re
)
110 $this->abbr_word_re
.= '|';
111 $this->abbr_word_re
.= preg_quote($abbr_word);
112 $this->abbr_desciptions
[$abbr_word] = trim($abbr_desc);
116 protected function teardown() {
118 # Clearing Extra-specific variables.
120 $this->footnotes
= array();
121 $this->footnotes_ordered
= array();
122 $this->footnotes_ref_count
= array();
123 $this->footnotes_numbers
= array();
124 $this->abbr_desciptions
= array();
125 $this->abbr_word_re
= '';
131 ### Extra Attribute Parser ###
133 # Expression to use to catch attributes (includes the braces)
134 protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
135 # Expression to use when parsing in a context when no capture is desired
136 protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
138 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null) {
140 # Parse attributes caught by the $this->id_class_attr_catch_re expression
141 # and return the HTML-formatted list of attributes.
143 # Currently supported attributes are .class and #id.
145 # In addition, this method also supports supplying a default Id value,
146 # which will be used to populate the id attribute in case it was not
148 if (empty($attr) && !$defaultIdValue) return "";
150 # Split on components
151 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
152 $elements = $matches[0];
154 # handle classes and ids (only first id taken into account)
156 $attributes = array();
158 foreach ($elements as $element) {
159 if ($element{0} == '.') {
160 $classes[] = substr($element, 1);
161 } else if ($element{0} == '#') {
162 if ($id === false) $id = substr($element, 1);
163 } else if (strpos($element, '=') > 0) {
164 $parts = explode('=', $element, 2);
165 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
169 if (!$id) $id = $defaultIdValue;
171 # compose attributes as string
174 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
176 if (!empty($classes)) {
177 $attr_str .= ' class="'. implode(" ", $classes) . '"';
179 if (!$this->no_markup
&& !empty($attributes)) {
180 $attr_str .= ' '.implode(" ", $attributes);
186 protected function stripLinkDefinitions($text) {
188 # Strips link definitions from text, stores the URLs and titles in
191 $less_than_tab = $this->tab_width
- 1;
193 # Link defs are in the form: ^[id]: url "optional title"
194 $text = preg_replace_callback('{
195 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
197 \n? # maybe *one* newline
205 \n? # maybe one newline
208 (?<=\s) # lookbehind for whitespace
213 )? # title is optional
214 (?:[ ]* '.$this->id_class_attr_catch_re
.' )? # $5 = extra id & class attr
217 array($this, '_stripLinkDefinitions_callback'),
221 protected function _stripLinkDefinitions_callback($matches) {
222 $link_id = strtolower($matches[1]);
223 $url = $matches[2] == '' ?
$matches[3] : $matches[2];
224 $this->urls
[$link_id] = $url;
225 $this->titles
[$link_id] =& $matches[4];
226 $this->ref_attr
[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
227 return ''; # String that will replace the block
231 ### HTML Block Parser ###
233 # Tags that are always treated as block tags:
234 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
236 # Tags treated as block tags only if the opening tag is alone on its line:
237 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
239 # Tags where markdown="1" default to span mode:
240 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
242 # Tags which must not have their contents modified, no matter where
244 protected $clean_tags_re = 'script|style|math|svg';
246 # Tags that do not need to be closed.
247 protected $auto_close_tags_re = 'hr|img|param|source|track';
250 protected function hashHTMLBlocks($text) {
252 # Hashify HTML Blocks and "clean tags".
254 # We only want to do this for block-level HTML tags, such as headers,
255 # lists, and tables. That's because we still want to wrap <p>s around
256 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
257 # phrase emphasis, and spans. The list of tags we're looking for is
260 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
261 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
262 # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
263 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
264 # These two functions are calling each other. It's recursive!
266 if ($this->no_markup
) return $text;
269 # Call the HTML-in-Markdown hasher.
271 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
275 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
276 $enclosing_tag_re = '', $span = false)
279 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
281 # * $indent is the number of space to be ignored when checking for code
282 # blocks. This is important because if we don't take the indent into
283 # account, something like this (which looks right) won't work as expected:
287 # Hello World. <-- Is this a Markdown code block or text?
288 # </div> <-- Is this a Markdown code block or a real tag?
291 # If you don't like this, just don't indent the tag on which
292 # you apply the markdown="1" attribute.
294 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
295 # tag with that name. Nested tags supported.
297 # * If $span is true, text inside must treated as span. So any double
298 # newline will be replaced by a single newline so that it does not create
301 # Returns an array of that form: ( processed text , remaining text )
303 if ($text === '') return array('', '');
305 # Regex to check for the presense of newlines around a block tag.
306 $newline_before_re = '/(?:^\n?|\n\n)*$/';
309 ^ # Start of text following the tag.
310 (?>[ ]*<!--.*?-->)? # Optional comment.
311 [ ]*\n # Must be followed by newline.
314 # Regex to match any tag.
317 ( # $2: Capture whole tag.
318 </? # Any opening or closing tag.
320 '.$this->block_tags_re
.' |
321 '.$this->context_block_tags_re
.' |
322 '.$this->clean_tags_re
.' |
323 (?!\s)'.$enclosing_tag_re.'
326 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
328 ".*?" | # Double quotes (can contain `>`)
329 \'.*?\' | # Single quotes (can contain `>`)
330 .+? # Anything but quotes and `>`.
335 <!-- .*? --> # HTML Comment
337 <\?.*?\?> | <%.*?%> # Processing instruction
339 <!\[CDATA\[.*?\]\]> # CData Block
340 '. ( !$span ?
' # If not in span.
342 # Indented code block
343 (?: ^[ ]*\n | ^ | \n[ ]*\n )
344 [ ]{'.($indent+
4).'}[^\n]* \n
346 (?: [ ]{'.($indent+
4).'}[^\n]* | [ ]* ) \n
349 # Fenced code block marker
351 [ ]{0,'.($indent+
3).'}(?:~{3,}|`{3,})
354 \.?[-_:a-zA-Z0-9]+ # standalone class name
356 '.$this->id_class_attr_nocatch_re
.' # extra attributes
360 ' : '' ). ' # End (if not is span).
363 # Note, this regex needs to go after backtick fenced
364 # code blocks but it should also be kept outside of the
365 # "if not in span" condition adding backticks to the parser
371 $depth = 0; # Current depth inside the tag tree.
372 $parsed = ""; # Parsed text that will be returned.
375 # Loop through every tag until we find the closing tag of the parent
376 # or loop until reaching the end of text if no parent tag specified.
380 # Split the text using the first $tag_match pattern found.
381 # Text before pattern will be first in the array, text after
382 # pattern will be at the end, and between will be any catches made
385 $parts = preg_split($block_tag_re, $text, 2,
386 PREG_SPLIT_DELIM_CAPTURE
);
388 # If in Markdown span mode, add a empty-string span-level hash
389 # after each newline to prevent triggering any block element.
391 $void = $this->hashPart("", ':');
392 $newline = "$void\n";
393 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
396 $parsed .= $parts[0]; # Text before current tag.
398 # If end of $text has been reached. Stop loop.
399 if (count($parts) < 3) {
404 $tag = $parts[1]; # Tag to handle.
405 $text = $parts[2]; # Remaining text after current tag.
406 $tag_re = preg_quote($tag); # For use in a regular expression.
409 # Check for: Fenced code block marker.
410 # Note: need to recheck the whole tag to disambiguate backtick
411 # fences from code spans
413 if (preg_match('{^\n?([ ]{0,'.($indent+
3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re
.')?[ ]*\n?$}', $tag, $capture)) {
414 # Fenced code block marker: find matching end marker.
415 $fence_indent = strlen($capture[1]); # use captured indent in re
416 $fence_re = $capture[2]; # use captured fence in re
417 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
420 # End marker found: pass text unchanged until marker.
421 $parsed .= $tag . $matches[0];
422 $text = substr($text, strlen($matches[0]));
425 # No end marker: just skip it.
430 # Check for: Indented code block.
432 else if ($tag{0} == "\n" ||
$tag{0} == " ") {
433 # Indented code block: pass it unchanged, will be handled
438 # Check for: Code span marker
439 # Note: need to check this after backtick fenced code blocks
441 else if ($tag{0} == "`") {
442 # Find corresponding end marker.
443 $tag_re = preg_quote($tag);
444 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
447 # End marker found: pass text unchanged until marker.
448 $parsed .= $tag . $matches[0];
449 $text = substr($text, strlen($matches[0]));
452 # Unmatched marker: just skip it.
457 # Check for: Opening Block level tag or
458 # Opening Context Block tag (like ins and del)
459 # used as a block tag (tag is alone on it's line).
461 else if (preg_match('{^<(?:'.$this->block_tags_re
.')\b}', $tag) ||
462 ( preg_match('{^<(?:'.$this->context_block_tags_re
.')\b}', $tag) &&
463 preg_match($newline_before_re, $parsed) &&
464 preg_match($newline_after_re, $text) )
467 # Need to parse tag and following text using the HTML parser.
468 list($block_text, $text) =
469 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
471 # Make sure it stays outside of any paragraph by adding newlines.
472 $parsed .= "\n\n$block_text\n\n";
475 # Check for: Clean tag (like script, math)
476 # HTML Comments, processing instructions.
478 else if (preg_match('{^<(?:'.$this->clean_tags_re
.')\b}', $tag) ||
479 $tag{1} == '!' ||
$tag{1} == '?')
481 # Need to parse tag and following text using the HTML parser.
482 # (don't check for markdown attribute)
483 list($block_text, $text) =
484 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
486 $parsed .= $block_text;
489 # Check for: Tag with same name as enclosing tag.
491 else if ($enclosing_tag_re !== '' &&
492 # Same name as enclosing tag.
493 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
496 # Increase/decrease nested tag count.
498 if ($tag{1} == '/') $depth--;
499 else if ($tag{strlen($tag)-2} != '/') $depth++
;
503 # Going out of parent element. Clean up and break so we
504 # return to the calling function.
506 $text = $tag . $text;
515 } while ($depth >= 0);
517 return array($parsed, $text);
519 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
521 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
523 # * Calls $hash_method to convert any blocks.
524 # * Stops when the first opening tag closes.
525 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
526 # (it is not inside clean tags)
528 # Returns an array of that form: ( processed text , remaining text )
530 if ($text === '') return array('', '');
532 # Regex to match `markdown` attribute inside of a tag.
533 $markdown_attr_re = '
535 \s* # Eat whitespace before the `markdown` attribute
539 (["\']) # $1: quote delimiter
540 (.*?) # $2: attribute value
541 \1 # matching delimiter
543 ([^\s>]*) # $3: unquoted attribute value
545 () # $4: make $3 always defined (avoid warnings)
548 # Regex to match any tag.
550 ( # $2: Capture whole tag.
551 </? # Any opening or closing tag.
554 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
556 ".*?" | # Double quotes (can contain `>`)
557 \'.*?\' | # Single quotes (can contain `>`)
558 .+? # Anything but quotes and `>`.
563 <!-- .*? --> # HTML Comment
565 <\?.*?\?> | <%.*?%> # Processing instruction
567 <!\[CDATA\[.*?\]\]> # CData Block
571 $original_text = $text; # Save original text in case of faliure.
573 $depth = 0; # Current depth inside the tag tree.
574 $block_text = ""; # Temporary text holder for current text.
575 $parsed = ""; # Parsed text that will be returned.
578 # Get the name of the starting tag.
579 # (This pattern makes $base_tag_name_re safe without quoting.)
581 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
582 $base_tag_name_re = $matches[1];
585 # Loop through every tag until we find the corresponding closing tag.
589 # Split the text using the first $tag_match pattern found.
590 # Text before pattern will be first in the array, text after
591 # pattern will be at the end, and between will be any catches made
594 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE
);
596 if (count($parts) < 3) {
598 # End of $text reached with unbalenced tag(s).
599 # In that case, we return original text unchanged and pass the
600 # first character as filtered to prevent an infinite loop in the
603 return array($original_text{0}, substr($original_text, 1));
606 $block_text .= $parts[0]; # Text before current tag.
607 $tag = $parts[1]; # Tag to handle.
608 $text = $parts[2]; # Remaining text after current tag.
611 # Check for: Auto-close tag (like <hr/>)
612 # Comments and Processing Instructions.
614 if (preg_match('{^</?(?:'.$this->auto_close_tags_re
.')\b}', $tag) ||
615 $tag{1} == '!' ||
$tag{1} == '?')
617 # Just add the tag to the block as if it was text.
622 # Increase/decrease nested tag count. Only do so if
623 # the tag's name match base tag's.
625 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
626 if ($tag{1} == '/') $depth--;
627 else if ($tag{strlen($tag)-2} != '/') $depth++
;
631 # Check for `markdown="1"` attribute and handle it.
634 preg_match($markdown_attr_re, $tag, $attr_m) &&
635 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
637 # Remove `markdown` attribute from opening tag.
638 $tag = preg_replace($markdown_attr_re, '', $tag);
640 # Check if text inside this tag must be parsed in span mode.
641 $this->mode
= $attr_m[2] . $attr_m[3];
642 $span_mode = $this->mode
== 'span' ||
$this->mode
!= 'block' &&
643 preg_match('{^<(?:'.$this->contain_span_tags_re
.')\b}', $tag);
645 # Calculate indent before tag.
646 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
647 $strlen = $this->utf8_strlen
;
648 $indent = $strlen($matches[1], 'UTF-8');
653 # End preceding block with this tag.
655 $parsed .= $this->$hash_method($block_text);
657 # Get enclosing tag name for the ParseMarkdown function.
658 # (This pattern makes $tag_name_re safe without quoting.)
659 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
660 $tag_name_re = $matches[1];
662 # Parse the content using the HTML-in-Markdown parser.
663 list ($block_text, $text)
664 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
665 $tag_name_re, $span_mode);
667 # Outdent markdown text.
669 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
673 # Append tag content to parsed text.
674 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
675 else $parsed .= "$block_text";
677 # Start over with a new block.
680 else $block_text .= $tag;
683 } while ($depth > 0);
686 # Hash last block text that wasn't processed inside the loop.
688 $parsed .= $this->$hash_method($block_text);
690 return array($parsed, $text);
694 protected function hashClean($text) {
696 # Called whenever a tag must be hashed when a function inserts a "clean" tag
697 # in $text, it passes through this function and is automaticaly escaped,
698 # blocking invalid nested overlap.
700 return $this->hashPart($text, 'C');
704 protected function doAnchors($text) {
706 # Turn Markdown link shortcuts into XHTML <a> tags.
708 if ($this->in_anchor
) return $text;
709 $this->in_anchor
= true;
712 # First, handle reference-style links: [link text] [id]
714 $text = preg_replace_callback('{
715 ( # wrap whole match in $1
717 ('.$this->nested_brackets_re
.') # link text = $2
720 [ ]? # one optional space
721 (?:\n[ ]*)? # one optional newline followed by spaces
728 array($this, '_doAnchors_reference_callback'), $text);
731 # Next, inline-style links: [link text](url "optional title")
733 $text = preg_replace_callback('{
734 ( # wrap whole match in $1
736 ('.$this->nested_brackets_re
.') # link text = $2
743 ('.$this->nested_url_parenthesis_re
.') # href = $4
747 ([\'"]) # quote char = $6
750 [ \n]* # ignore any spaces/tabs between closing quote and )
751 )? # title is optional
753 (?:[ ]? '.$this->id_class_attr_catch_re
.' )? # $8 = id/class attributes
756 array($this, '_doAnchors_inline_callback'), $text);
759 # Last, handle reference-style shortcuts: [link text]
760 # These must come last in case you've also got [link text][1]
761 # or [link text](/foo)
763 $text = preg_replace_callback('{
764 ( # wrap whole match in $1
766 ([^\[\]]+) # link text = $2; can\'t contain [ or ]
770 array($this, '_doAnchors_reference_callback'), $text);
772 $this->in_anchor
= false;
775 protected function _doAnchors_reference_callback($matches) {
776 $whole_match = $matches[1];
777 $link_text = $matches[2];
778 $link_id =& $matches[3];
780 if ($link_id == "") {
781 # for shortcut links like [this][] or [this].
782 $link_id = $link_text;
785 # lower-case and turn embedded newlines into spaces
786 $link_id = strtolower($link_id);
787 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
789 if (isset($this->urls
[$link_id])) {
790 $url = $this->urls
[$link_id];
791 $url = $this->encodeURLAttribute($url);
793 $result = "<a href=\"$url\"";
794 if ( isset( $this->titles
[$link_id] ) ) {
795 $title = $this->titles
[$link_id];
796 $title = $this->encodeAttribute($title);
797 $result .= " title=\"$title\"";
799 if (isset($this->ref_attr
[$link_id]))
800 $result .= $this->ref_attr
[$link_id];
802 $link_text = $this->runSpanGamut($link_text);
803 $result .= ">$link_text</a>";
804 $result = $this->hashPart($result);
807 $result = $whole_match;
811 protected function _doAnchors_inline_callback($matches) {
812 $whole_match = $matches[1];
813 $link_text = $this->runSpanGamut($matches[2]);
814 $url = $matches[3] == '' ?
$matches[4] : $matches[3];
815 $title =& $matches[7];
816 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
818 // if the URL was of the form <s p a c e s> it got caught by the HTML
819 // tag parser and hashed. Need to reverse the process before using the URL.
820 $unhashed = $this->unhash($url);
821 if ($unhashed != $url)
822 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
824 $url = $this->encodeURLAttribute($url);
826 $result = "<a href=\"$url\"";
828 $title = $this->encodeAttribute($title);
829 $result .= " title=\"$title\"";
833 $link_text = $this->runSpanGamut($link_text);
834 $result .= ">$link_text</a>";
836 return $this->hashPart($result);
840 protected function doImages($text) {
842 # Turn Markdown image shortcuts into <img> tags.
845 # First, handle reference-style labeled images: ![alt text][id]
847 $text = preg_replace_callback('{
848 ( # wrap whole match in $1
850 ('.$this->nested_brackets_re
.') # alt text = $2
853 [ ]? # one optional space
854 (?:\n[ ]*)? # one optional newline followed by spaces
862 array($this, '_doImages_reference_callback'), $text);
865 # Next, handle inline images: ![alt text](url "optional title")
866 # Don't forget: encode * and _
868 $text = preg_replace_callback('{
869 ( # wrap whole match in $1
871 ('.$this->nested_brackets_re
.') # alt text = $2
873 \s? # One optional whitespace character
877 <(\S*)> # src url = $3
879 ('.$this->nested_url_parenthesis_re
.') # src url = $4
883 ([\'"]) # quote char = $6
887 )? # title is optional
889 (?:[ ]? '.$this->id_class_attr_catch_re
.' )? # $8 = id/class attributes
892 array($this, '_doImages_inline_callback'), $text);
896 protected function _doImages_reference_callback($matches) {
897 $whole_match = $matches[1];
898 $alt_text = $matches[2];
899 $link_id = strtolower($matches[3]);
901 if ($link_id == "") {
902 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
905 $alt_text = $this->encodeAttribute($alt_text);
906 if (isset($this->urls
[$link_id])) {
907 $url = $this->encodeURLAttribute($this->urls
[$link_id]);
908 $result = "<img src=\"$url\" alt=\"$alt_text\"";
909 if (isset($this->titles
[$link_id])) {
910 $title = $this->titles
[$link_id];
911 $title = $this->encodeAttribute($title);
912 $result .= " title=\"$title\"";
914 if (isset($this->ref_attr
[$link_id]))
915 $result .= $this->ref_attr
[$link_id];
916 $result .= $this->empty_element_suffix
;
917 $result = $this->hashPart($result);
920 # If there's no such link ID, leave intact:
921 $result = $whole_match;
926 protected function _doImages_inline_callback($matches) {
927 $whole_match = $matches[1];
928 $alt_text = $matches[2];
929 $url = $matches[3] == '' ?
$matches[4] : $matches[3];
930 $title =& $matches[7];
931 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
933 $alt_text = $this->encodeAttribute($alt_text);
934 $url = $this->encodeURLAttribute($url);
935 $result = "<img src=\"$url\" alt=\"$alt_text\"";
937 $title = $this->encodeAttribute($title);
938 $result .= " title=\"$title\""; # $title already quoted
941 $result .= $this->empty_element_suffix
;
943 return $this->hashPart($result);
947 protected function doHeaders($text) {
949 # Redefined to add id and class attribute support.
951 # Setext-style headers:
952 # Header 1 {#header1}
955 # Header 2 {#header2 .class1 .class2}
958 $text = preg_replace_callback(
960 (^.+?) # $1: Header text
961 (?:[ ]+ '.$this->id_class_attr_catch_re
.' )? # $3 = id/class attributes
962 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
964 array($this, '_doHeaders_callback_setext'), $text);
967 # # Header 1 {#header1}
968 # ## Header 2 {#header2}
969 # ## Header 2 with closing hashes ## {#header3.class1.class2}
971 # ###### Header 6 {.class2}
973 $text = preg_replace_callback('{
974 ^(\#{1,6}) # $1 = string of #\'s
976 (.+?) # $2 = Header text
978 \#* # optional closing #\'s (not counted)
979 (?:[ ]+ '.$this->id_class_attr_catch_re
.' )? # $3 = id/class attributes
983 array($this, '_doHeaders_callback_atx'), $text);
987 protected function _doHeaders_callback_setext($matches) {
988 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
991 $level = $matches[3]{0} == '=' ?
1 : 2;
993 $defaultId = is_callable($this->header_id_func
) ?
call_user_func($this->header_id_func
, $matches[1]) : null;
995 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
996 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
997 return "\n" . $this->hashBlock($block) . "\n\n";
999 protected function _doHeaders_callback_atx($matches) {
1000 $level = strlen($matches[1]);
1002 $defaultId = is_callable($this->header_id_func
) ?
call_user_func($this->header_id_func
, $matches[2]) : null;
1003 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1004 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1005 return "\n" . $this->hashBlock($block) . "\n\n";
1009 protected function doTables($text) {
1013 $less_than_tab = $this->tab_width
- 1;
1015 # Find tables with leading pipe.
1017 # | Header 1 | Header 2
1018 # | -------- | --------
1022 $text = preg_replace_callback('
1025 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1026 [|] # Optional leading pipe (present)
1027 (.+) \n # $1: Header row (at least one pipe)
1029 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1030 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
1034 [ ]* # Allowed whitespace.
1035 [|] .* \n # Row content.
1038 (?=\n|\Z) # Stop at final double newline.
1040 array($this, '_doTable_leadingPipe_callback'), $text);
1043 # Find tables without leading pipe.
1045 # Header 1 | Header 2
1046 # -------- | --------
1050 $text = preg_replace_callback('
1053 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1054 (\S.*[|].*) \n # $1: Header row (at least one pipe)
1056 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
1057 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
1061 .* [|] .* \n # Row content
1064 (?=\n|\Z) # Stop at final double newline.
1066 array($this, '_DoTable_callback'), $text);
1070 protected function _doTable_leadingPipe_callback($matches) {
1071 $head = $matches[1];
1072 $underline = $matches[2];
1073 $content = $matches[3];
1075 # Remove leading pipe for each row.
1076 $content = preg_replace('/^ *[|]/m', '', $content);
1078 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1080 protected function _doTable_makeAlignAttr($alignname)
1082 if (empty($this->table_align_class_tmpl
))
1083 return " align=\"$alignname\"";
1085 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl
);
1086 return " class=\"$classname\"";
1088 protected function _doTable_callback($matches) {
1089 $head = $matches[1];
1090 $underline = $matches[2];
1091 $content = $matches[3];
1093 # Remove any tailing pipes for each line.
1094 $head = preg_replace('/[|] *$/m', '', $head);
1095 $underline = preg_replace('/[|] *$/m', '', $underline);
1096 $content = preg_replace('/[|] *$/m', '', $content);
1098 # Reading alignement from header underline.
1099 $separators = preg_split('/ *[|] */', $underline);
1100 foreach ($separators as $n => $s) {
1101 if (preg_match('/^ *-+: *$/', $s))
1102 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1103 else if (preg_match('/^ *:-+: *$/', $s))
1104 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1105 else if (preg_match('/^ *:-+ *$/', $s))
1106 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1111 # Parsing span elements, including code spans, character escapes,
1112 # and inline HTML tags, so that pipes inside those gets ignored.
1113 $head = $this->parseSpan($head);
1114 $headers = preg_split('/ *[|] */', $head);
1115 $col_count = count($headers);
1116 $attr = array_pad($attr, $col_count, '');
1118 # Write column headers.
1119 $text = "<table>\n";
1120 $text .= "<thead>\n";
1122 foreach ($headers as $n => $header)
1123 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1125 $text .= "</thead>\n";
1127 # Split content by row.
1128 $rows = explode("\n", trim($content, "\n"));
1130 $text .= "<tbody>\n";
1131 foreach ($rows as $row) {
1132 # Parsing span elements, including code spans, character escapes,
1133 # and inline HTML tags, so that pipes inside those gets ignored.
1134 $row = $this->parseSpan($row);
1136 # Split row by cell.
1137 $row_cells = preg_split('/ *[|] */', $row, $col_count);
1138 $row_cells = array_pad($row_cells, $col_count, '');
1141 foreach ($row_cells as $n => $cell)
1142 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1145 $text .= "</tbody>\n";
1146 $text .= "</table>";
1148 return $this->hashBlock($text) . "\n";
1152 protected function doDefLists($text) {
1154 # Form HTML definition lists.
1156 $less_than_tab = $this->tab_width
- 1;
1158 # Re-usable pattern to match any entire dl list:
1159 $whole_list_re = '(?>
1162 [ ]{0,'.$less_than_tab.'}
1163 ((?>.*\S.*\n)+) # $3 = defined term
1165 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1173 (?! # Negative lookahead for another term
1174 [ ]{0,'.$less_than_tab.'}
1175 (?: \S.*\n )+? # defined term
1177 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1179 (?! # Negative lookahead for another definition
1180 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1186 $text = preg_replace_callback('{
1190 array($this, '_doDefLists_callback'), $text);
1194 protected function _doDefLists_callback($matches) {
1195 # Re-usable patterns to match list item bullets and number markers:
1196 $list = $matches[1];
1198 # Turn double returns into triple returns, so that we can make a
1199 # paragraph for the last item in a list, if necessary:
1200 $result = trim($this->processDefListItems($list));
1201 $result = "<dl>\n" . $result . "\n</dl>";
1202 return $this->hashBlock($result) . "\n\n";
1206 protected function processDefListItems($list_str) {
1208 # Process the contents of a single definition list, splitting it
1209 # into individual term and definition list items.
1211 $less_than_tab = $this->tab_width
- 1;
1213 # trim trailing blank lines:
1214 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1216 # Process definition terms.
1217 $list_str = preg_replace_callback('{
1218 (?>\A\n?|\n\n+) # leading line
1219 ( # definition terms = $1
1220 [ ]{0,'.$less_than_tab.'} # leading whitespace
1221 (?!\:[ ]|[ ]) # negative lookahead for a definition
1222 # mark (colon) or more whitespace.
1223 (?> \S.* \n)+? # actual term (not whitespace).
1225 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
1226 # with a definition mark.
1228 array($this, '_processDefListItems_callback_dt'), $list_str);
1230 # Process actual definitions.
1231 $list_str = preg_replace_callback('{
1232 \n(\n+)? # leading line = $1
1233 ( # marker space = $2
1234 [ ]{0,'.$less_than_tab.'} # whitespace before colon
1235 \:[ ]+ # definition mark (colon)
1237 ((?s:.+?)) # definition text = $3
1238 (?= \n+ # stop at next definition mark,
1239 (?: # next term or end of text
1240 [ ]{0,'.$less_than_tab.'} \:[ ] |
1245 array($this, '_processDefListItems_callback_dd'), $list_str);
1249 protected function _processDefListItems_callback_dt($matches) {
1250 $terms = explode("\n", trim($matches[1]));
1252 foreach ($terms as $term) {
1253 $term = $this->runSpanGamut(trim($term));
1254 $text .= "\n<dt>" . $term . "</dt>";
1256 return $text . "\n";
1258 protected function _processDefListItems_callback_dd($matches) {
1259 $leading_line = $matches[1];
1260 $marker_space = $matches[2];
1263 if ($leading_line ||
preg_match('/\n{2,}/', $def)) {
1264 # Replace marker with the appropriate whitespace indentation
1265 $def = str_repeat(' ', strlen($marker_space)) . $def;
1266 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1267 $def = "\n". $def ."\n";
1271 $def = $this->runSpanGamut($this->outdent($def));
1274 return "\n<dd>" . $def . "</dd>\n";
1278 protected function doFencedCodeBlocks($text) {
1280 # Adding the fenced code block syntax to regular Markdown:
1286 $less_than_tab = $this->tab_width
;
1288 $text = preg_replace_callback('{
1292 (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1296 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1298 '.$this->id_class_attr_catch_re
.' # 3: Extra attributes
1300 [ ]* \n # Whitespace and newline following marker.
1305 (?!\1 [ ]* \n) # Not a closing marker.
1313 array($this, '_doFencedCodeBlocks_callback'), $text);
1317 protected function _doFencedCodeBlocks_callback($matches) {
1318 $classname =& $matches[2];
1319 $attrs =& $matches[3];
1320 $codeblock = $matches[4];
1321 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES
);
1322 $codeblock = preg_replace_callback('/^\n+/',
1323 array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1325 if ($classname != "") {
1326 if ($classname{0} == '.')
1327 $classname = substr($classname, 1);
1328 $attr_str = ' class="'.$this->code_class_prefix
.$classname.'"';
1330 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ?
"pre" : "code", $attrs);
1332 $pre_attr_str = $this->code_attr_on_pre ?
$attr_str : '';
1333 $code_attr_str = $this->code_attr_on_pre ?
'' : $attr_str;
1334 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1336 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1338 protected function _doFencedCodeBlocks_newlines($matches) {
1339 return str_repeat("<br$this->empty_element_suffix",
1340 strlen($matches[0]));
1345 # Redefining emphasis markers so that emphasis by underscore does not
1346 # work in the middle of a word.
1348 protected $em_relist = array(
1349 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1350 '*' => '(?<![\s*])\*(?!\*)',
1351 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1353 protected $strong_relist = array(
1354 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1355 '**' => '(?<![\s*])\*\*(?!\*)',
1356 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1358 protected $em_strong_relist = array(
1359 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1360 '***' => '(?<![\s*])\*\*\*(?!\*)',
1361 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1365 protected function formParagraphs($text) {
1368 # $text - string to process with html <p> tags
1370 # Strip leading and trailing lines:
1371 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1373 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY
);
1376 # Wrap <p> tags and unhashify HTML blocks
1378 foreach ($grafs as $key => $value) {
1379 $value = trim($this->runSpanGamut($value));
1381 # Check if this should be enclosed in a paragraph.
1382 # Clean tag hashes & block tag hashes are left alone.
1383 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1386 $value = "<p>$value</p>";
1388 $grafs[$key] = $value;
1391 # Join grafs in one text, then unhash HTML tags.
1392 $text = implode("\n\n", $grafs);
1394 # Finish by removing any tag hashes still present in $text.
1395 $text = $this->unhash($text);
1403 protected function stripFootnotes($text) {
1405 # Strips link definitions from text, stores the URLs and titles in
1408 $less_than_tab = $this->tab_width
- 1;
1410 # Link defs are in the form: [^id]: url "optional title"
1411 $text = preg_replace_callback('{
1412 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
1414 \n? # maybe *one* newline
1415 ( # text = $2 (no blank lines allowed)
1420 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1421 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1422 # by non-indented content
1426 array($this, '_stripFootnotes_callback'),
1430 protected function _stripFootnotes_callback($matches) {
1431 $note_id = $this->fn_id_prefix
. $matches[1];
1432 $this->footnotes
[$note_id] = $this->outdent($matches[2]);
1433 return ''; # String that will replace the block
1437 protected function doFootnotes($text) {
1439 # Replace footnote references in $text [^id] with a special text-token
1440 # which will be replaced by the actual footnote marker in appendFootnotes.
1442 if (!$this->in_anchor
) {
1443 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1449 protected function appendFootnotes($text) {
1451 # Append footnote list to text.
1453 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1454 array($this, '_appendFootnotes_callback'), $text);
1456 if (!empty($this->footnotes_ordered
)) {
1458 $text .= "<div class=\"footnotes\">\n";
1459 $text .= "<hr". $this->empty_element_suffix
."\n";
1460 $text .= "<ol>\n\n";
1463 if ($this->fn_backlink_class
!= "") {
1464 $class = $this->fn_backlink_class
;
1465 $class = $this->encodeAttribute($class);
1466 $attr .= " class=\"$class\"";
1468 if ($this->fn_backlink_title
!= "") {
1469 $title = $this->fn_backlink_title
;
1470 $title = $this->encodeAttribute($title);
1471 $attr .= " title=\"$title\"";
1475 while (!empty($this->footnotes_ordered
)) {
1476 $footnote = reset($this->footnotes_ordered
);
1477 $note_id = key($this->footnotes_ordered
);
1478 unset($this->footnotes_ordered
[$note_id]);
1479 $ref_count = $this->footnotes_ref_count
[$note_id];
1480 unset($this->footnotes_ref_count
[$note_id]);
1481 unset($this->footnotes
[$note_id]);
1483 $footnote .= "\n"; # Need to append newline before parsing.
1484 $footnote = $this->runBlockGamut("$footnote\n");
1485 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1486 array($this, '_appendFootnotes_callback'), $footnote);
1488 $attr = str_replace("%%", ++
$num, $attr);
1489 $note_id = $this->encodeAttribute($note_id);
1491 # Prepare backlink, multiple backlinks if multiple references
1492 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";
1493 for ($ref_num = 2; $ref_num <= $ref_count; ++
$ref_num) {
1494 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>↩</a>";
1496 # Add backlink to last paragraph; create new paragraph if needed.
1497 if (preg_match('{</p>$}', $footnote)) {
1498 $footnote = substr($footnote, 0, -4) . " $backlink</p>";
1500 $footnote .= "\n\n<p>$backlink</p>";
1503 $text .= "<li id=\"fn:$note_id\">\n";
1504 $text .= $footnote . "\n";
1505 $text .= "</li>\n\n";
1513 protected function _appendFootnotes_callback($matches) {
1514 $node_id = $this->fn_id_prefix
. $matches[1];
1516 # Create footnote marker only if it has a corresponding footnote *and*
1517 # the footnote hasn't been used by another marker.
1518 if (isset($this->footnotes
[$node_id])) {
1519 $num =& $this->footnotes_numbers
[$node_id];
1521 # Transfer footnote content to the ordered list and give it its
1523 $this->footnotes_ordered
[$node_id] = $this->footnotes
[$node_id];
1524 $this->footnotes_ref_count
[$node_id] = 1;
1525 $num = $this->footnote_counter++
;
1526 $ref_count_mark = '';
1528 $ref_count_mark = $this->footnotes_ref_count
[$node_id] +
= 1;
1532 if ($this->fn_link_class
!= "") {
1533 $class = $this->fn_link_class
;
1534 $class = $this->encodeAttribute($class);
1535 $attr .= " class=\"$class\"";
1537 if ($this->fn_link_title
!= "") {
1538 $title = $this->fn_link_title
;
1539 $title = $this->encodeAttribute($title);
1540 $attr .= " title=\"$title\"";
1543 $attr = str_replace("%%", $num, $attr);
1544 $node_id = $this->encodeAttribute($node_id);
1547 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1548 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1552 return "[^".$matches[1]."]";
1556 ### Abbreviations ###
1558 protected function stripAbbreviations($text) {
1560 # Strips abbreviations from text, stores titles in hash references.
1562 $less_than_tab = $this->tab_width
- 1;
1564 # Link defs are in the form: [id]*: url "optional title"
1565 $text = preg_replace_callback('{
1566 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
1567 (.*) # text = $2 (no blank lines allowed)
1569 array($this, '_stripAbbreviations_callback'),
1573 protected function _stripAbbreviations_callback($matches) {
1574 $abbr_word = $matches[1];
1575 $abbr_desc = $matches[2];
1576 if ($this->abbr_word_re
)
1577 $this->abbr_word_re
.= '|';
1578 $this->abbr_word_re
.= preg_quote($abbr_word);
1579 $this->abbr_desciptions
[$abbr_word] = trim($abbr_desc);
1580 return ''; # String that will replace the block
1584 protected function doAbbreviations($text) {
1586 # Find defined abbreviations in text and wrap them in <abbr> elements.
1588 if ($this->abbr_word_re
) {
1589 // cannot use the /x modifier because abbr_word_re may
1590 // contain significant spaces:
1591 $text = preg_replace_callback('{'.
1593 '(?:'.$this->abbr_word_re
.')'.
1596 array($this, '_doAbbreviations_callback'), $text);
1600 protected function _doAbbreviations_callback($matches) {
1601 $abbr = $matches[0];
1602 if (isset($this->abbr_desciptions
[$abbr])) {
1603 $desc = $this->abbr_desciptions
[$abbr];
1605 return $this->hashPart("<abbr>$abbr</abbr>");
1607 $desc = $this->encodeAttribute($desc);
1608 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");