MDL-61928 formslib: shortforms should work with non-editable forms
[moodle.git] / lib / markdown / MarkdownExtra.php
blobac6b1b4f270ad35e9489d70b5a2d2dc69faab644
1 <?php
2 /**
3 * Markdown Extra - A text-to-HTML conversion tool for web writers
5 * @package php-markdown
6 * @author Michel Fortin <michel.fortin@michelf.com>
7 * @copyright 2004-2016 Michel Fortin <https://michelf.com/projects/php-markdown/>
8 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9 */
11 namespace Michelf;
13 /**
14 * Markdown Extra Parser Class
16 class MarkdownExtra extends \Michelf\Markdown {
17 /**
18 * Configuration variables
21 /**
22 * Prefix for footnote ids.
23 * @var string
25 public $fn_id_prefix = "";
27 /**
28 * Optional title attribute for footnote links and backlinks.
29 * @var string
31 public $fn_link_title = "";
32 public $fn_backlink_title = "";
34 /**
35 * Optional class attribute for footnote links and backlinks.
36 * @var string
38 public $fn_link_class = "footnote-ref";
39 public $fn_backlink_class = "footnote-backref";
41 /**
42 * Content to be displayed within footnote backlinks. The default is '↩';
43 * the U+FE0E on the end is a Unicode variant selector used to prevent iOS
44 * from displaying the arrow character as an emoji.
45 * @var string
47 public $fn_backlink_html = '&#8617;&#xFE0E;';
49 /**
50 * Class name for table cell alignment (%% replaced left/center/right)
51 * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
52 * If empty, the align attribute is used instead of a class name.
53 * @var string
55 public $table_align_class_tmpl = '';
57 /**
58 * Optional class prefix for fenced code block.
59 * @var string
61 public $code_class_prefix = "";
63 /**
64 * Class attribute for code blocks goes on the `code` tag;
65 * setting this to true will put attributes on the `pre` tag instead.
66 * @var boolean
68 public $code_attr_on_pre = false;
70 /**
71 * Predefined abbreviations.
72 * @var array
74 public $predef_abbr = array();
76 /**
77 * Parser implementation
80 /**
81 * Constructor function. Initialize the parser object.
82 * @return void
84 public function __construct() {
85 // Add extra escapable characters before parent constructor
86 // initialize the table.
87 $this->escape_chars .= ':|';
89 // Insert extra document, block, and span transformations.
90 // Parent constructor will do the sorting.
91 $this->document_gamut += array(
92 "doFencedCodeBlocks" => 5,
93 "stripFootnotes" => 15,
94 "stripAbbreviations" => 25,
95 "appendFootnotes" => 50,
97 $this->block_gamut += array(
98 "doFencedCodeBlocks" => 5,
99 "doTables" => 15,
100 "doDefLists" => 45,
102 $this->span_gamut += array(
103 "doFootnotes" => 5,
104 "doAbbreviations" => 70,
107 $this->enhanced_ordered_list = true;
108 parent::__construct();
113 * Extra variables used during extra transformations.
114 * @var array
116 protected $footnotes = array();
117 protected $footnotes_ordered = array();
118 protected $footnotes_ref_count = array();
119 protected $footnotes_numbers = array();
120 protected $abbr_desciptions = array();
121 /** @var @string */
122 protected $abbr_word_re = '';
125 * Give the current footnote number.
126 * @var integer
128 protected $footnote_counter = 1;
131 * Setting up Extra-specific variables.
133 protected function setup() {
134 parent::setup();
136 $this->footnotes = array();
137 $this->footnotes_ordered = array();
138 $this->footnotes_ref_count = array();
139 $this->footnotes_numbers = array();
140 $this->abbr_desciptions = array();
141 $this->abbr_word_re = '';
142 $this->footnote_counter = 1;
144 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
145 if ($this->abbr_word_re)
146 $this->abbr_word_re .= '|';
147 $this->abbr_word_re .= preg_quote($abbr_word);
148 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
153 * Clearing Extra-specific variables.
155 protected function teardown() {
156 $this->footnotes = array();
157 $this->footnotes_ordered = array();
158 $this->footnotes_ref_count = array();
159 $this->footnotes_numbers = array();
160 $this->abbr_desciptions = array();
161 $this->abbr_word_re = '';
163 parent::teardown();
168 * Extra attribute parser
172 * Expression to use to catch attributes (includes the braces)
173 * @var string
175 protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
178 * Expression to use when parsing in a context when no capture is desired
179 * @var string
181 protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
184 * Parse attributes caught by the $this->id_class_attr_catch_re expression
185 * and return the HTML-formatted list of attributes.
187 * Currently supported attributes are .class and #id.
189 * In addition, this method also supports supplying a default Id value,
190 * which will be used to populate the id attribute in case it was not
191 * overridden.
192 * @param string $tag_name
193 * @param string $attr
194 * @param mixed $defaultIdValue
195 * @param array $classes
196 * @return string
198 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
199 if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
201 // Split on components
202 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
203 $elements = $matches[0];
205 // Handle classes and IDs (only first ID taken into account)
206 $attributes = array();
207 $id = false;
208 foreach ($elements as $element) {
209 if ($element{0} == '.') {
210 $classes[] = substr($element, 1);
211 } else if ($element{0} == '#') {
212 if ($id === false) $id = substr($element, 1);
213 } else if (strpos($element, '=') > 0) {
214 $parts = explode('=', $element, 2);
215 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
219 if (!$id) $id = $defaultIdValue;
221 // Compose attributes as string
222 $attr_str = "";
223 if (!empty($id)) {
224 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
226 if (!empty($classes)) {
227 $attr_str .= ' class="'. implode(" ", $classes) . '"';
229 if (!$this->no_markup && !empty($attributes)) {
230 $attr_str .= ' '.implode(" ", $attributes);
232 return $attr_str;
236 * Strips link definitions from text, stores the URLs and titles in
237 * hash references.
238 * @param string $text
239 * @return string
241 protected function stripLinkDefinitions($text) {
242 $less_than_tab = $this->tab_width - 1;
244 // Link defs are in the form: ^[id]: url "optional title"
245 $text = preg_replace_callback('{
246 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
247 [ ]*
248 \n? # maybe *one* newline
249 [ ]*
251 <(.+?)> # url = $2
253 (\S+?) # url = $3
255 [ ]*
256 \n? # maybe one newline
257 [ ]*
259 (?<=\s) # lookbehind for whitespace
260 ["(]
261 (.*?) # title = $4
262 [")]
263 [ ]*
264 )? # title is optional
265 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
266 (?:\n+|\Z)
267 }xm',
268 array($this, '_stripLinkDefinitions_callback'),
269 $text);
270 return $text;
274 * Strip link definition callback
275 * @param array $matches
276 * @return string
278 protected function _stripLinkDefinitions_callback($matches) {
279 $link_id = strtolower($matches[1]);
280 $url = $matches[2] == '' ? $matches[3] : $matches[2];
281 $this->urls[$link_id] = $url;
282 $this->titles[$link_id] =& $matches[4];
283 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
284 return ''; // String that will replace the block
289 * HTML block parser
293 * Tags that are always treated as block tags
294 * @var string
296 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
299 * Tags treated as block tags only if the opening tag is alone on its line
300 * @var string
302 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
305 * Tags where markdown="1" default to span mode:
306 * @var string
308 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
311 * Tags which must not have their contents modified, no matter where
312 * they appear
313 * @var string
315 protected $clean_tags_re = 'script|style|math|svg';
318 * Tags that do not need to be closed.
319 * @var string
321 protected $auto_close_tags_re = 'hr|img|param|source|track';
324 * Hashify HTML Blocks and "clean tags".
326 * We only want to do this for block-level HTML tags, such as headers,
327 * lists, and tables. That's because we still want to wrap <p>s around
328 * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
329 * phrase emphasis, and spans. The list of tags we're looking for is
330 * hard-coded.
332 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
333 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
334 * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
335 * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
336 * These two functions are calling each other. It's recursive!
337 * @param string $text
338 * @return string
340 protected function hashHTMLBlocks($text) {
341 if ($this->no_markup) {
342 return $text;
345 // Call the HTML-in-Markdown hasher.
346 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
348 return $text;
352 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
354 * * $indent is the number of space to be ignored when checking for code
355 * blocks. This is important because if we don't take the indent into
356 * account, something like this (which looks right) won't work as expected:
358 * <div>
359 * <div markdown="1">
360 * Hello World. <-- Is this a Markdown code block or text?
361 * </div> <-- Is this a Markdown code block or a real tag?
362 * <div>
364 * If you don't like this, just don't indent the tag on which
365 * you apply the markdown="1" attribute.
367 * * If $enclosing_tag_re is not empty, stops at the first unmatched closing
368 * tag with that name. Nested tags supported.
370 * * If $span is true, text inside must treated as span. So any double
371 * newline will be replaced by a single newline so that it does not create
372 * paragraphs.
374 * Returns an array of that form: ( processed text , remaining text )
376 * @param string $text
377 * @param integer $indent
378 * @param string $enclosing_tag_re
379 * @param boolean $span
380 * @return array
382 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
383 $enclosing_tag_re = '', $span = false)
386 if ($text === '') return array('', '');
388 // Regex to check for the presense of newlines around a block tag.
389 $newline_before_re = '/(?:^\n?|\n\n)*$/';
390 $newline_after_re =
392 ^ # Start of text following the tag.
393 (?>[ ]*<!--.*?-->)? # Optional comment.
394 [ ]*\n # Must be followed by newline.
395 }xs';
397 // Regex to match any tag.
398 $block_tag_re =
400 ( # $2: Capture whole tag.
401 </? # Any opening or closing tag.
402 (?> # Tag name.
403 ' . $this->block_tags_re . ' |
404 ' . $this->context_block_tags_re . ' |
405 ' . $this->clean_tags_re . ' |
406 (?!\s)'.$enclosing_tag_re . '
409 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
411 ".*?" | # Double quotes (can contain `>`)
412 \'.*?\' | # Single quotes (can contain `>`)
413 .+? # Anything but quotes and `>`.
416 > # End of tag.
418 <!-- .*? --> # HTML Comment
420 <\?.*?\?> | <%.*?%> # Processing instruction
422 <!\[CDATA\[.*?\]\]> # CData Block
423 ' . ( !$span ? ' # If not in span.
425 # Indented code block
426 (?: ^[ ]*\n | ^ | \n[ ]*\n )
427 [ ]{' . ($indent + 4) . '}[^\n]* \n
429 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n
432 # Fenced code block marker
433 (?<= ^ | \n )
434 [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
435 [ ]*
436 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
437 [ ]*
438 (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
439 [ ]*
440 (?= \n )
441 ' : '' ) . ' # End (if not is span).
443 # Code span marker
444 # Note, this regex needs to go after backtick fenced
445 # code blocks but it should also be kept outside of the
446 # "if not in span" condition adding backticks to the parser
449 }xs';
452 $depth = 0; // Current depth inside the tag tree.
453 $parsed = ""; // Parsed text that will be returned.
455 // Loop through every tag until we find the closing tag of the parent
456 // or loop until reaching the end of text if no parent tag specified.
457 do {
458 // Split the text using the first $tag_match pattern found.
459 // Text before pattern will be first in the array, text after
460 // pattern will be at the end, and between will be any catches made
461 // by the pattern.
462 $parts = preg_split($block_tag_re, $text, 2,
463 PREG_SPLIT_DELIM_CAPTURE);
465 // If in Markdown span mode, add a empty-string span-level hash
466 // after each newline to prevent triggering any block element.
467 if ($span) {
468 $void = $this->hashPart("", ':');
469 $newline = "\n$void";
470 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
473 $parsed .= $parts[0]; // Text before current tag.
475 // If end of $text has been reached. Stop loop.
476 if (count($parts) < 3) {
477 $text = "";
478 break;
481 $tag = $parts[1]; // Tag to handle.
482 $text = $parts[2]; // Remaining text after current tag.
483 $tag_re = preg_quote($tag); // For use in a regular expression.
485 // Check for: Fenced code block marker.
486 // Note: need to recheck the whole tag to disambiguate backtick
487 // fences from code spans
488 if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) {
489 // Fenced code block marker: find matching end marker.
490 $fence_indent = strlen($capture[1]); // use captured indent in re
491 $fence_re = $capture[2]; // use captured fence in re
492 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text,
493 $matches))
495 // End marker found: pass text unchanged until marker.
496 $parsed .= $tag . $matches[0];
497 $text = substr($text, strlen($matches[0]));
499 else {
500 // No end marker: just skip it.
501 $parsed .= $tag;
504 // Check for: Indented code block.
505 else if ($tag{0} == "\n" || $tag{0} == " ") {
506 // Indented code block: pass it unchanged, will be handled
507 // later.
508 $parsed .= $tag;
510 // Check for: Code span marker
511 // Note: need to check this after backtick fenced code blocks
512 else if ($tag{0} == "`") {
513 // Find corresponding end marker.
514 $tag_re = preg_quote($tag);
515 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}',
516 $text, $matches))
518 // End marker found: pass text unchanged until marker.
519 $parsed .= $tag . $matches[0];
520 $text = substr($text, strlen($matches[0]));
522 else {
523 // Unmatched marker: just skip it.
524 $parsed .= $tag;
527 // Check for: Opening Block level tag or
528 // Opening Context Block tag (like ins and del)
529 // used as a block tag (tag is alone on it's line).
530 else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) ||
531 ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) &&
532 preg_match($newline_before_re, $parsed) &&
533 preg_match($newline_after_re, $text) )
536 // Need to parse tag and following text using the HTML parser.
537 list($block_text, $text) =
538 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
540 // Make sure it stays outside of any paragraph by adding newlines.
541 $parsed .= "\n\n$block_text\n\n";
543 // Check for: Clean tag (like script, math)
544 // HTML Comments, processing instructions.
545 else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) ||
546 $tag{1} == '!' || $tag{1} == '?')
548 // Need to parse tag and following text using the HTML parser.
549 // (don't check for markdown attribute)
550 list($block_text, $text) =
551 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
553 $parsed .= $block_text;
555 // Check for: Tag with same name as enclosing tag.
556 else if ($enclosing_tag_re !== '' &&
557 // Same name as enclosing tag.
558 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag))
560 // Increase/decrease nested tag count.
561 if ($tag{1} == '/') $depth--;
562 else if ($tag{strlen($tag)-2} != '/') $depth++;
564 if ($depth < 0) {
565 // Going out of parent element. Clean up and break so we
566 // return to the calling function.
567 $text = $tag . $text;
568 break;
571 $parsed .= $tag;
573 else {
574 $parsed .= $tag;
576 } while ($depth >= 0);
578 return array($parsed, $text);
582 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
584 * * Calls $hash_method to convert any blocks.
585 * * Stops when the first opening tag closes.
586 * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
587 * (it is not inside clean tags)
589 * Returns an array of that form: ( processed text , remaining text )
590 * @param string $text
591 * @param string $hash_method
592 * @param string $md_attr
593 * @return array
595 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
596 if ($text === '') return array('', '');
598 // Regex to match `markdown` attribute inside of a tag.
599 $markdown_attr_re = '
601 \s* # Eat whitespace before the `markdown` attribute
602 markdown
603 \s*=\s*
605 (["\']) # $1: quote delimiter
606 (.*?) # $2: attribute value
607 \1 # matching delimiter
609 ([^\s>]*) # $3: unquoted attribute value
611 () # $4: make $3 always defined (avoid warnings)
612 }xs';
614 // Regex to match any tag.
615 $tag_re = '{
616 ( # $2: Capture whole tag.
617 </? # Any opening or closing tag.
618 [\w:$]+ # Tag name.
620 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
622 ".*?" | # Double quotes (can contain `>`)
623 \'.*?\' | # Single quotes (can contain `>`)
624 .+? # Anything but quotes and `>`.
627 > # End of tag.
629 <!-- .*? --> # HTML Comment
631 <\?.*?\?> | <%.*?%> # Processing instruction
633 <!\[CDATA\[.*?\]\]> # CData Block
635 }xs';
637 $original_text = $text; // Save original text in case of faliure.
639 $depth = 0; // Current depth inside the tag tree.
640 $block_text = ""; // Temporary text holder for current text.
641 $parsed = ""; // Parsed text that will be returned.
643 // Get the name of the starting tag.
644 // (This pattern makes $base_tag_name_re safe without quoting.)
645 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
646 $base_tag_name_re = $matches[1];
648 // Loop through every tag until we find the corresponding closing tag.
649 do {
650 // Split the text using the first $tag_match pattern found.
651 // Text before pattern will be first in the array, text after
652 // pattern will be at the end, and between will be any catches made
653 // by the pattern.
654 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
656 if (count($parts) < 3) {
657 // End of $text reached with unbalenced tag(s).
658 // In that case, we return original text unchanged and pass the
659 // first character as filtered to prevent an infinite loop in the
660 // parent function.
661 return array($original_text{0}, substr($original_text, 1));
664 $block_text .= $parts[0]; // Text before current tag.
665 $tag = $parts[1]; // Tag to handle.
666 $text = $parts[2]; // Remaining text after current tag.
668 // Check for: Auto-close tag (like <hr/>)
669 // Comments and Processing Instructions.
670 if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) ||
671 $tag{1} == '!' || $tag{1} == '?')
673 // Just add the tag to the block as if it was text.
674 $block_text .= $tag;
676 else {
677 // Increase/decrease nested tag count. Only do so if
678 // the tag's name match base tag's.
679 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) {
680 if ($tag{1} == '/') $depth--;
681 else if ($tag{strlen($tag)-2} != '/') $depth++;
684 // Check for `markdown="1"` attribute and handle it.
685 if ($md_attr &&
686 preg_match($markdown_attr_re, $tag, $attr_m) &&
687 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
689 // Remove `markdown` attribute from opening tag.
690 $tag = preg_replace($markdown_attr_re, '', $tag);
692 // Check if text inside this tag must be parsed in span mode.
693 $this->mode = $attr_m[2] . $attr_m[3];
694 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
695 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag);
697 // Calculate indent before tag.
698 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
699 $strlen = $this->utf8_strlen;
700 $indent = $strlen($matches[1], 'UTF-8');
701 } else {
702 $indent = 0;
705 // End preceding block with this tag.
706 $block_text .= $tag;
707 $parsed .= $this->$hash_method($block_text);
709 // Get enclosing tag name for the ParseMarkdown function.
710 // (This pattern makes $tag_name_re safe without quoting.)
711 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
712 $tag_name_re = $matches[1];
714 // Parse the content using the HTML-in-Markdown parser.
715 list ($block_text, $text)
716 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
717 $tag_name_re, $span_mode);
719 // Outdent markdown text.
720 if ($indent > 0) {
721 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
722 $block_text);
725 // Append tag content to parsed text.
726 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
727 else $parsed .= "$block_text";
729 // Start over with a new block.
730 $block_text = "";
732 else $block_text .= $tag;
735 } while ($depth > 0);
737 // Hash last block text that wasn't processed inside the loop.
738 $parsed .= $this->$hash_method($block_text);
740 return array($parsed, $text);
744 * Called whenever a tag must be hashed when a function inserts a "clean" tag
745 * in $text, it passes through this function and is automaticaly escaped,
746 * blocking invalid nested overlap.
747 * @param string $text
748 * @return string
750 protected function hashClean($text) {
751 return $this->hashPart($text, 'C');
755 * Turn Markdown link shortcuts into XHTML <a> tags.
756 * @param string $text
757 * @return string
759 protected function doAnchors($text) {
760 if ($this->in_anchor) {
761 return $text;
763 $this->in_anchor = true;
765 // First, handle reference-style links: [link text] [id]
766 $text = preg_replace_callback('{
767 ( # wrap whole match in $1
769 (' . $this->nested_brackets_re . ') # link text = $2
772 [ ]? # one optional space
773 (?:\n[ ]*)? # one optional newline followed by spaces
776 (.*?) # id = $3
779 }xs',
780 array($this, '_doAnchors_reference_callback'), $text);
782 // Next, inline-style links: [link text](url "optional title")
783 $text = preg_replace_callback('{
784 ( # wrap whole match in $1
786 (' . $this->nested_brackets_re . ') # link text = $2
788 \( # literal paren
789 [ \n]*
791 <(.+?)> # href = $3
793 (' . $this->nested_url_parenthesis_re . ') # href = $4
795 [ \n]*
796 ( # $5
797 ([\'"]) # quote char = $6
798 (.*?) # Title = $7
799 \6 # matching quote
800 [ \n]* # ignore any spaces/tabs between closing quote and )
801 )? # title is optional
803 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes
805 }xs',
806 array($this, '_doAnchors_inline_callback'), $text);
808 // Last, handle reference-style shortcuts: [link text]
809 // These must come last in case you've also got [link text][1]
810 // or [link text](/foo)
811 $text = preg_replace_callback('{
812 ( # wrap whole match in $1
814 ([^\[\]]+) # link text = $2; can\'t contain [ or ]
817 }xs',
818 array($this, '_doAnchors_reference_callback'), $text);
820 $this->in_anchor = false;
821 return $text;
825 * Callback for reference anchors
826 * @param array $matches
827 * @return string
829 protected function _doAnchors_reference_callback($matches) {
830 $whole_match = $matches[1];
831 $link_text = $matches[2];
832 $link_id =& $matches[3];
834 if ($link_id == "") {
835 // for shortcut links like [this][] or [this].
836 $link_id = $link_text;
839 // lower-case and turn embedded newlines into spaces
840 $link_id = strtolower($link_id);
841 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
843 if (isset($this->urls[$link_id])) {
844 $url = $this->urls[$link_id];
845 $url = $this->encodeURLAttribute($url);
847 $result = "<a href=\"$url\"";
848 if ( isset( $this->titles[$link_id] ) ) {
849 $title = $this->titles[$link_id];
850 $title = $this->encodeAttribute($title);
851 $result .= " title=\"$title\"";
853 if (isset($this->ref_attr[$link_id]))
854 $result .= $this->ref_attr[$link_id];
856 $link_text = $this->runSpanGamut($link_text);
857 $result .= ">$link_text</a>";
858 $result = $this->hashPart($result);
860 else {
861 $result = $whole_match;
863 return $result;
867 * Callback for inline anchors
868 * @param array $matches
869 * @return string
871 protected function _doAnchors_inline_callback($matches) {
872 $whole_match = $matches[1];
873 $link_text = $this->runSpanGamut($matches[2]);
874 $url = $matches[3] == '' ? $matches[4] : $matches[3];
875 $title =& $matches[7];
876 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
878 // if the URL was of the form <s p a c e s> it got caught by the HTML
879 // tag parser and hashed. Need to reverse the process before using the URL.
880 $unhashed = $this->unhash($url);
881 if ($unhashed != $url)
882 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
884 $url = $this->encodeURLAttribute($url);
886 $result = "<a href=\"$url\"";
887 if (isset($title)) {
888 $title = $this->encodeAttribute($title);
889 $result .= " title=\"$title\"";
891 $result .= $attr;
893 $link_text = $this->runSpanGamut($link_text);
894 $result .= ">$link_text</a>";
896 return $this->hashPart($result);
900 * Turn Markdown image shortcuts into <img> tags.
901 * @param string $text
902 * @return string
904 protected function doImages($text) {
905 // First, handle reference-style labeled images: ![alt text][id]
906 $text = preg_replace_callback('{
907 ( # wrap whole match in $1
909 (' . $this->nested_brackets_re . ') # alt text = $2
912 [ ]? # one optional space
913 (?:\n[ ]*)? # one optional newline followed by spaces
916 (.*?) # id = $3
920 }xs',
921 array($this, '_doImages_reference_callback'), $text);
923 // Next, handle inline images: ![alt text](url "optional title")
924 // Don't forget: encode * and _
925 $text = preg_replace_callback('{
926 ( # wrap whole match in $1
928 (' . $this->nested_brackets_re . ') # alt text = $2
930 \s? # One optional whitespace character
931 \( # literal paren
932 [ \n]*
934 <(\S*)> # src url = $3
936 (' . $this->nested_url_parenthesis_re . ') # src url = $4
938 [ \n]*
939 ( # $5
940 ([\'"]) # quote char = $6
941 (.*?) # title = $7
942 \6 # matching quote
943 [ \n]*
944 )? # title is optional
946 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes
948 }xs',
949 array($this, '_doImages_inline_callback'), $text);
951 return $text;
955 * Callback for referenced images
956 * @param array $matches
957 * @return string
959 protected function _doImages_reference_callback($matches) {
960 $whole_match = $matches[1];
961 $alt_text = $matches[2];
962 $link_id = strtolower($matches[3]);
964 if ($link_id == "") {
965 $link_id = strtolower($alt_text); // for shortcut links like ![this][].
968 $alt_text = $this->encodeAttribute($alt_text);
969 if (isset($this->urls[$link_id])) {
970 $url = $this->encodeURLAttribute($this->urls[$link_id]);
971 $result = "<img src=\"$url\" alt=\"$alt_text\"";
972 if (isset($this->titles[$link_id])) {
973 $title = $this->titles[$link_id];
974 $title = $this->encodeAttribute($title);
975 $result .= " title=\"$title\"";
977 if (isset($this->ref_attr[$link_id]))
978 $result .= $this->ref_attr[$link_id];
979 $result .= $this->empty_element_suffix;
980 $result = $this->hashPart($result);
982 else {
983 // If there's no such link ID, leave intact:
984 $result = $whole_match;
987 return $result;
991 * Callback for inline images
992 * @param array $matches
993 * @return string
995 protected function _doImages_inline_callback($matches) {
996 $whole_match = $matches[1];
997 $alt_text = $matches[2];
998 $url = $matches[3] == '' ? $matches[4] : $matches[3];
999 $title =& $matches[7];
1000 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
1002 $alt_text = $this->encodeAttribute($alt_text);
1003 $url = $this->encodeURLAttribute($url);
1004 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1005 if (isset($title)) {
1006 $title = $this->encodeAttribute($title);
1007 $result .= " title=\"$title\""; // $title already quoted
1009 $result .= $attr;
1010 $result .= $this->empty_element_suffix;
1012 return $this->hashPart($result);
1016 * Process markdown headers. Redefined to add ID and class attribute support.
1017 * @param string $text
1018 * @return string
1020 protected function doHeaders($text) {
1021 // Setext-style headers:
1022 // Header 1 {#header1}
1023 // ========
1025 // Header 2 {#header2 .class1 .class2}
1026 // --------
1028 $text = preg_replace_callback(
1030 (^.+?) # $1: Header text
1031 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes
1032 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
1033 }mx',
1034 array($this, '_doHeaders_callback_setext'), $text);
1036 // atx-style headers:
1037 // # Header 1 {#header1}
1038 // ## Header 2 {#header2}
1039 // ## Header 2 with closing hashes ## {#header3.class1.class2}
1040 // ...
1041 // ###### Header 6 {.class2}
1043 $text = preg_replace_callback('{
1044 ^(\#{1,6}) # $1 = string of #\'s
1045 [ ]*
1046 (.+?) # $2 = Header text
1047 [ ]*
1048 \#* # optional closing #\'s (not counted)
1049 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes
1050 [ ]*
1052 }xm',
1053 array($this, '_doHeaders_callback_atx'), $text);
1055 return $text;
1059 * Callback for setext headers
1060 * @param array $matches
1061 * @return string
1063 protected function _doHeaders_callback_setext($matches) {
1064 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) {
1065 return $matches[0];
1068 $level = $matches[3]{0} == '=' ? 1 : 2;
1070 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
1072 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
1073 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>";
1074 return "\n" . $this->hashBlock($block) . "\n\n";
1078 * Callback for atx headers
1079 * @param array $matches
1080 * @return string
1082 protected function _doHeaders_callback_atx($matches) {
1083 $level = strlen($matches[1]);
1085 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1086 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1087 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>";
1088 return "\n" . $this->hashBlock($block) . "\n\n";
1092 * Form HTML tables.
1093 * @param string $text
1094 * @return string
1096 protected function doTables($text) {
1097 $less_than_tab = $this->tab_width - 1;
1098 // Find tables with leading pipe.
1100 // | Header 1 | Header 2
1101 // | -------- | --------
1102 // | Cell 1 | Cell 2
1103 // | Cell 3 | Cell 4
1104 $text = preg_replace_callback('
1106 ^ # Start of a line
1107 [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
1108 [|] # Optional leading pipe (present)
1109 (.+) \n # $1: Header row (at least one pipe)
1111 [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
1112 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
1114 ( # $3: Cells
1116 [ ]* # Allowed whitespace.
1117 [|] .* \n # Row content.
1120 (?=\n|\Z) # Stop at final double newline.
1121 }xm',
1122 array($this, '_doTable_leadingPipe_callback'), $text);
1124 // Find tables without leading pipe.
1126 // Header 1 | Header 2
1127 // -------- | --------
1128 // Cell 1 | Cell 2
1129 // Cell 3 | Cell 4
1130 $text = preg_replace_callback('
1132 ^ # Start of a line
1133 [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
1134 (\S.*[|].*) \n # $1: Header row (at least one pipe)
1136 [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
1137 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
1139 ( # $3: Cells
1141 .* [|] .* \n # Row content
1144 (?=\n|\Z) # Stop at final double newline.
1145 }xm',
1146 array($this, '_DoTable_callback'), $text);
1148 return $text;
1152 * Callback for removing the leading pipe for each row
1153 * @param array $matches
1154 * @return string
1156 protected function _doTable_leadingPipe_callback($matches) {
1157 $head = $matches[1];
1158 $underline = $matches[2];
1159 $content = $matches[3];
1161 $content = preg_replace('/^ *[|]/m', '', $content);
1163 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1167 * Make the align attribute in a table
1168 * @param string $alignname
1169 * @return string
1171 protected function _doTable_makeAlignAttr($alignname)
1173 if (empty($this->table_align_class_tmpl)) {
1174 return " align=\"$alignname\"";
1177 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1178 return " class=\"$classname\"";
1182 * Calback for processing tables
1183 * @param array $matches
1184 * @return string
1186 protected function _doTable_callback($matches) {
1187 $head = $matches[1];
1188 $underline = $matches[2];
1189 $content = $matches[3];
1191 // Remove any tailing pipes for each line.
1192 $head = preg_replace('/[|] *$/m', '', $head);
1193 $underline = preg_replace('/[|] *$/m', '', $underline);
1194 $content = preg_replace('/[|] *$/m', '', $content);
1196 // Reading alignement from header underline.
1197 $separators = preg_split('/ *[|] */', $underline);
1198 foreach ($separators as $n => $s) {
1199 if (preg_match('/^ *-+: *$/', $s))
1200 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1201 else if (preg_match('/^ *:-+: *$/', $s))
1202 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1203 else if (preg_match('/^ *:-+ *$/', $s))
1204 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1205 else
1206 $attr[$n] = '';
1209 // Parsing span elements, including code spans, character escapes,
1210 // and inline HTML tags, so that pipes inside those gets ignored.
1211 $head = $this->parseSpan($head);
1212 $headers = preg_split('/ *[|] */', $head);
1213 $col_count = count($headers);
1214 $attr = array_pad($attr, $col_count, '');
1216 // Write column headers.
1217 $text = "<table>\n";
1218 $text .= "<thead>\n";
1219 $text .= "<tr>\n";
1220 foreach ($headers as $n => $header)
1221 $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n";
1222 $text .= "</tr>\n";
1223 $text .= "</thead>\n";
1225 // Split content by row.
1226 $rows = explode("\n", trim($content, "\n"));
1228 $text .= "<tbody>\n";
1229 foreach ($rows as $row) {
1230 // Parsing span elements, including code spans, character escapes,
1231 // and inline HTML tags, so that pipes inside those gets ignored.
1232 $row = $this->parseSpan($row);
1234 // Split row by cell.
1235 $row_cells = preg_split('/ *[|] */', $row, $col_count);
1236 $row_cells = array_pad($row_cells, $col_count, '');
1238 $text .= "<tr>\n";
1239 foreach ($row_cells as $n => $cell)
1240 $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
1241 $text .= "</tr>\n";
1243 $text .= "</tbody>\n";
1244 $text .= "</table>";
1246 return $this->hashBlock($text) . "\n";
1250 * Form HTML definition lists.
1251 * @param string $text
1252 * @return string
1254 protected function doDefLists($text) {
1255 $less_than_tab = $this->tab_width - 1;
1257 // Re-usable pattern to match any entire dl list:
1258 $whole_list_re = '(?>
1259 ( # $1 = whole list
1260 ( # $2
1261 [ ]{0,' . $less_than_tab . '}
1262 ((?>.*\S.*\n)+) # $3 = defined term
1264 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1266 (?s:.+?)
1267 ( # $4
1270 \n{2,}
1271 (?=\S)
1272 (?! # Negative lookahead for another term
1273 [ ]{0,' . $less_than_tab . '}
1274 (?: \S.*\n )+? # defined term
1276 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1278 (?! # Negative lookahead for another definition
1279 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1283 )'; // mx
1285 $text = preg_replace_callback('{
1286 (?>\A\n?|(?<=\n\n))
1287 ' . $whole_list_re . '
1288 }mx',
1289 array($this, '_doDefLists_callback'), $text);
1291 return $text;
1295 * Callback for processing definition lists
1296 * @param array $matches
1297 * @return string
1299 protected function _doDefLists_callback($matches) {
1300 // Re-usable patterns to match list item bullets and number markers:
1301 $list = $matches[1];
1303 // Turn double returns into triple returns, so that we can make a
1304 // paragraph for the last item in a list, if necessary:
1305 $result = trim($this->processDefListItems($list));
1306 $result = "<dl>\n" . $result . "\n</dl>";
1307 return $this->hashBlock($result) . "\n\n";
1311 * Process the contents of a single definition list, splitting it
1312 * into individual term and definition list items.
1313 * @param string $list_str
1314 * @return string
1316 protected function processDefListItems($list_str) {
1318 $less_than_tab = $this->tab_width - 1;
1320 // Trim trailing blank lines:
1321 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1323 // Process definition terms.
1324 $list_str = preg_replace_callback('{
1325 (?>\A\n?|\n\n+) # leading line
1326 ( # definition terms = $1
1327 [ ]{0,' . $less_than_tab . '} # leading whitespace
1328 (?!\:[ ]|[ ]) # negative lookahead for a definition
1329 # mark (colon) or more whitespace.
1330 (?> \S.* \n)+? # actual term (not whitespace).
1332 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
1333 # with a definition mark.
1334 }xm',
1335 array($this, '_processDefListItems_callback_dt'), $list_str);
1337 // Process actual definitions.
1338 $list_str = preg_replace_callback('{
1339 \n(\n+)? # leading line = $1
1340 ( # marker space = $2
1341 [ ]{0,' . $less_than_tab . '} # whitespace before colon
1342 \:[ ]+ # definition mark (colon)
1344 ((?s:.+?)) # definition text = $3
1345 (?= \n+ # stop at next definition mark,
1346 (?: # next term or end of text
1347 [ ]{0,' . $less_than_tab . '} \:[ ] |
1348 <dt> | \z
1351 }xm',
1352 array($this, '_processDefListItems_callback_dd'), $list_str);
1354 return $list_str;
1358 * Callback for <dt> elements in definition lists
1359 * @param array $matches
1360 * @return string
1362 protected function _processDefListItems_callback_dt($matches) {
1363 $terms = explode("\n", trim($matches[1]));
1364 $text = '';
1365 foreach ($terms as $term) {
1366 $term = $this->runSpanGamut(trim($term));
1367 $text .= "\n<dt>" . $term . "</dt>";
1369 return $text . "\n";
1373 * Callback for <dd> elements in definition lists
1374 * @param array $matches
1375 * @return string
1377 protected function _processDefListItems_callback_dd($matches) {
1378 $leading_line = $matches[1];
1379 $marker_space = $matches[2];
1380 $def = $matches[3];
1382 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1383 // Replace marker with the appropriate whitespace indentation
1384 $def = str_repeat(' ', strlen($marker_space)) . $def;
1385 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1386 $def = "\n". $def ."\n";
1388 else {
1389 $def = rtrim($def);
1390 $def = $this->runSpanGamut($this->outdent($def));
1393 return "\n<dd>" . $def . "</dd>\n";
1397 * Adding the fenced code block syntax to regular Markdown:
1399 * ~~~
1400 * Code block
1401 * ~~~
1403 * @param string $text
1404 * @return string
1406 protected function doFencedCodeBlocks($text) {
1408 $less_than_tab = $this->tab_width;
1410 $text = preg_replace_callback('{
1411 (?:\n|\A)
1412 # 1: Opening marker
1414 (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1416 [ ]*
1418 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1420 [ ]*
1422 ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes
1424 [ ]* \n # Whitespace and newline following marker.
1426 # 4: Content
1429 (?!\1 [ ]* \n) # Not a closing marker.
1430 .*\n+
1434 # Closing marker.
1435 \1 [ ]* (?= \n )
1436 }xm',
1437 array($this, '_doFencedCodeBlocks_callback'), $text);
1439 return $text;
1443 * Callback to process fenced code blocks
1444 * @param array $matches
1445 * @return string
1447 protected function _doFencedCodeBlocks_callback($matches) {
1448 $classname =& $matches[2];
1449 $attrs =& $matches[3];
1450 $codeblock = $matches[4];
1452 if ($this->code_block_content_func) {
1453 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1454 } else {
1455 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1458 $codeblock = preg_replace_callback('/^\n+/',
1459 array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1461 $classes = array();
1462 if ($classname != "") {
1463 if ($classname{0} == '.')
1464 $classname = substr($classname, 1);
1465 $classes[] = $this->code_class_prefix . $classname;
1467 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1468 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
1469 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1470 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1472 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1476 * Replace new lines in fenced code blocks
1477 * @param array $matches
1478 * @return string
1480 protected function _doFencedCodeBlocks_newlines($matches) {
1481 return str_repeat("<br$this->empty_element_suffix",
1482 strlen($matches[0]));
1486 * Redefining emphasis markers so that emphasis by underscore does not
1487 * work in the middle of a word.
1488 * @var array
1490 protected $em_relist = array(
1491 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1492 '*' => '(?<![\s*])\*(?!\*)',
1493 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1495 protected $strong_relist = array(
1496 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1497 '**' => '(?<![\s*])\*\*(?!\*)',
1498 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1500 protected $em_strong_relist = array(
1501 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1502 '***' => '(?<![\s*])\*\*\*(?!\*)',
1503 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1507 * Parse text into paragraphs
1508 * @param string $text String to process in paragraphs
1509 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1510 * @return string HTML output
1512 protected function formParagraphs($text, $wrap_in_p = true) {
1513 // Strip leading and trailing lines:
1514 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1516 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1518 // Wrap <p> tags and unhashify HTML blocks
1519 foreach ($grafs as $key => $value) {
1520 $value = trim($this->runSpanGamut($value));
1522 // Check if this should be enclosed in a paragraph.
1523 // Clean tag hashes & block tag hashes are left alone.
1524 $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1526 if ($is_p) {
1527 $value = "<p>$value</p>";
1529 $grafs[$key] = $value;
1532 // Join grafs in one text, then unhash HTML tags.
1533 $text = implode("\n\n", $grafs);
1535 // Finish by removing any tag hashes still present in $text.
1536 $text = $this->unhash($text);
1538 return $text;
1543 * Footnotes - Strips link definitions from text, stores the URLs and
1544 * titles in hash references.
1545 * @param string $text
1546 * @return string
1548 protected function stripFootnotes($text) {
1549 $less_than_tab = $this->tab_width - 1;
1551 // Link defs are in the form: [^id]: url "optional title"
1552 $text = preg_replace_callback('{
1553 ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1
1554 [ ]*
1555 \n? # maybe *one* newline
1556 ( # text = $2 (no blank lines allowed)
1557 (?:
1558 .+ # actual text
1560 \n # newlines but
1561 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1562 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1563 # by non-indented content
1566 }xm',
1567 array($this, '_stripFootnotes_callback'),
1568 $text);
1569 return $text;
1573 * Callback for stripping footnotes
1574 * @param array $matches
1575 * @return string
1577 protected function _stripFootnotes_callback($matches) {
1578 $note_id = $this->fn_id_prefix . $matches[1];
1579 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1580 return ''; // String that will replace the block
1584 * Replace footnote references in $text [^id] with a special text-token
1585 * which will be replaced by the actual footnote marker in appendFootnotes.
1586 * @param string $text
1587 * @return string
1589 protected function doFootnotes($text) {
1590 if (!$this->in_anchor) {
1591 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1593 return $text;
1597 * Append footnote list to text
1598 * @param string $text
1599 * @return string
1601 protected function appendFootnotes($text) {
1602 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1603 array($this, '_appendFootnotes_callback'), $text);
1605 if (!empty($this->footnotes_ordered)) {
1606 $text .= "\n\n";
1607 $text .= "<div class=\"footnotes\">\n";
1608 $text .= "<hr" . $this->empty_element_suffix . "\n";
1609 $text .= "<ol>\n\n";
1611 $attr = "";
1612 if ($this->fn_backlink_class != "") {
1613 $class = $this->fn_backlink_class;
1614 $class = $this->encodeAttribute($class);
1615 $attr .= " class=\"$class\"";
1617 if ($this->fn_backlink_title != "") {
1618 $title = $this->fn_backlink_title;
1619 $title = $this->encodeAttribute($title);
1620 $attr .= " title=\"$title\"";
1622 $backlink_text = $this->fn_backlink_html;
1623 $num = 0;
1625 while (!empty($this->footnotes_ordered)) {
1626 $footnote = reset($this->footnotes_ordered);
1627 $note_id = key($this->footnotes_ordered);
1628 unset($this->footnotes_ordered[$note_id]);
1629 $ref_count = $this->footnotes_ref_count[$note_id];
1630 unset($this->footnotes_ref_count[$note_id]);
1631 unset($this->footnotes[$note_id]);
1633 $footnote .= "\n"; // Need to append newline before parsing.
1634 $footnote = $this->runBlockGamut("$footnote\n");
1635 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1636 array($this, '_appendFootnotes_callback'), $footnote);
1638 $attr = str_replace("%%", ++$num, $attr);
1639 $note_id = $this->encodeAttribute($note_id);
1641 // Prepare backlink, multiple backlinks if multiple references
1642 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1643 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1644 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1646 // Add backlink to last paragraph; create new paragraph if needed.
1647 if (preg_match('{</p>$}', $footnote)) {
1648 $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1649 } else {
1650 $footnote .= "\n\n<p>$backlink</p>";
1653 $text .= "<li id=\"fn:$note_id\">\n";
1654 $text .= $footnote . "\n";
1655 $text .= "</li>\n\n";
1658 $text .= "</ol>\n";
1659 $text .= "</div>";
1661 return $text;
1665 * Callback for appending footnotes
1666 * @param array $matches
1667 * @return string
1669 protected function _appendFootnotes_callback($matches) {
1670 $node_id = $this->fn_id_prefix . $matches[1];
1672 // Create footnote marker only if it has a corresponding footnote *and*
1673 // the footnote hasn't been used by another marker.
1674 if (isset($this->footnotes[$node_id])) {
1675 $num =& $this->footnotes_numbers[$node_id];
1676 if (!isset($num)) {
1677 // Transfer footnote content to the ordered list and give it its
1678 // number
1679 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1680 $this->footnotes_ref_count[$node_id] = 1;
1681 $num = $this->footnote_counter++;
1682 $ref_count_mark = '';
1683 } else {
1684 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1687 $attr = "";
1688 if ($this->fn_link_class != "") {
1689 $class = $this->fn_link_class;
1690 $class = $this->encodeAttribute($class);
1691 $attr .= " class=\"$class\"";
1693 if ($this->fn_link_title != "") {
1694 $title = $this->fn_link_title;
1695 $title = $this->encodeAttribute($title);
1696 $attr .= " title=\"$title\"";
1699 $attr = str_replace("%%", $num, $attr);
1700 $node_id = $this->encodeAttribute($node_id);
1702 return
1703 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1704 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1705 "</sup>";
1708 return "[^" . $matches[1] . "]";
1713 * Abbreviations - strips abbreviations from text, stores titles in hash
1714 * references.
1715 * @param string $text
1716 * @return string
1718 protected function stripAbbreviations($text) {
1719 $less_than_tab = $this->tab_width - 1;
1721 // Link defs are in the form: [id]*: url "optional title"
1722 $text = preg_replace_callback('{
1723 ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1
1724 (.*) # text = $2 (no blank lines allowed)
1725 }xm',
1726 array($this, '_stripAbbreviations_callback'),
1727 $text);
1728 return $text;
1732 * Callback for stripping abbreviations
1733 * @param array $matches
1734 * @return string
1736 protected function _stripAbbreviations_callback($matches) {
1737 $abbr_word = $matches[1];
1738 $abbr_desc = $matches[2];
1739 if ($this->abbr_word_re) {
1740 $this->abbr_word_re .= '|';
1742 $this->abbr_word_re .= preg_quote($abbr_word);
1743 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1744 return ''; // String that will replace the block
1748 * Find defined abbreviations in text and wrap them in <abbr> elements.
1749 * @param string $text
1750 * @return string
1752 protected function doAbbreviations($text) {
1753 if ($this->abbr_word_re) {
1754 // cannot use the /x modifier because abbr_word_re may
1755 // contain significant spaces:
1756 $text = preg_replace_callback('{' .
1757 '(?<![\w\x1A])' .
1758 '(?:' . $this->abbr_word_re . ')' .
1759 '(?![\w\x1A])' .
1760 '}',
1761 array($this, '_doAbbreviations_callback'), $text);
1763 return $text;
1767 * Callback for processing abbreviations
1768 * @param array $matches
1769 * @return string
1771 protected function _doAbbreviations_callback($matches) {
1772 $abbr = $matches[0];
1773 if (isset($this->abbr_desciptions[$abbr])) {
1774 $desc = $this->abbr_desciptions[$abbr];
1775 if (empty($desc)) {
1776 return $this->hashPart("<abbr>$abbr</abbr>");
1777 } else {
1778 $desc = $this->encodeAttribute($desc);
1779 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1781 } else {
1782 return $matches[0];