lib/markdown/MarkdownExtra.php

   1 <?php
   2 /**
   3  * Markdown Extra - A text-to-HTML conversion tool for web writers
   4  *
   5  * @package   php-markdown
   6  * @author    Michel Fortin <michel.fortin@michelf.com>
   7  * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/>
   8  * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
   9  */
  10
  11 namespace Michelf;
  12
  13 /**
  14  * Markdown Extra Parser Class
  15  */
  16 class MarkdownExtra extends \Michelf\Markdown {
  17         /**
  18          * Configuration variables
  19          */
  20
  21         /**
  22          * Prefix for footnote ids.
  23          * @var string
  24          */
  25         public $fn_id_prefix = "";
  26
  27         /**
  28          * Optional title attribute for footnote links and backlinks.
  29          * @var string
  30          */
  31         public $fn_link_title     = "";
  32         public $fn_backlink_title = "";
  33
  34         /**
  35          * Optional class attribute for footnote links and backlinks.
  36          * @var string
  37          */
  38         public $fn_link_class     = "footnote-ref";
  39         public $fn_backlink_class = "footnote-backref";
  40
  41         /**
  42          * Content to be displayed within footnote backlinks. The default is '↩';
  43          * the U+FE0E on the end is a Unicode variant selector used to prevent iOS
  44          * from displaying the arrow character as an emoji.
  45          * @var string
  46          */
  47         public $fn_backlink_html = '&#8617;&#xFE0E;';
  48
  49         /**
  50          * Class name for table cell alignment (%% replaced left/center/right)
  51          * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  52          * If empty, the align attribute is used instead of a class name.
  53          * @var string
  54          */
  55         public $table_align_class_tmpl = '';
  56
  57         /**
  58          * Optional class prefix for fenced code block.
  59          * @var string
  60          */
  61         public $code_class_prefix = "";
  62
  63         /**
  64          * Class attribute for code blocks goes on the `code` tag;
  65          * setting this to true will put attributes on the `pre` tag instead.
  66          * @var boolean
  67          */
  68         public $code_attr_on_pre = false;
  69
  70         /**
  71          * Predefined abbreviations.
  72          * @var array
  73          */
  74         public $predef_abbr = array();
  75
  76         /**
  77          * Only convert atx-style headers if there's a space between the header and #
  78          * @var boolean
  79          */
  80         public $hashtag_protection = false;
  81
  82         /**
  83          * Parser implementation
  84          */
  85
  86         /**
  87          * Constructor function. Initialize the parser object.
  88          * @return void
  89          */
  90         public function __construct() {
  91                 // Add extra escapable characters before parent constructor
  92                 // initialize the table.
  93                 $this->escape_chars .= ':|';
  94
  95                 // Insert extra document, block, and span transformations.
  96                 // Parent constructor will do the sorting.
  97                 $this->document_gamut += array(
  98                         "doFencedCodeBlocks" => 5,
  99                         "stripFootnotes"     => 15,
 100                         "stripAbbreviations" => 25,
 101                         "appendFootnotes"    => 50,
 102                 );
 103                 $this->block_gamut += array(
 104                         "doFencedCodeBlocks" => 5,
 105                         "doTables"           => 15,
 106                         "doDefLists"         => 45,
 107                 );
 108                 $this->span_gamut += array(
 109                         "doFootnotes"        => 5,
 110                         "doAbbreviations"    => 70,
 111                 );
 112
 113                 $this->enhanced_ordered_list = true;
 114                 parent::__construct();
 115         }
 116
 117
 118         /**
 119          * Extra variables used during extra transformations.
 120          * @var array
 121          */
 122         protected $footnotes = array();
 123         protected $footnotes_ordered = array();
 124         protected $footnotes_ref_count = array();
 125         protected $footnotes_numbers = array();
 126         protected $abbr_desciptions = array();
 127         /** @var string */
 128         protected $abbr_word_re = '';
 129
 130         /**
 131          * Give the current footnote number.
 132          * @var integer
 133          */
 134         protected $footnote_counter = 1;
 135
 136         /**
 137          * Setting up Extra-specific variables.
 138          */
 139         protected function setup() {
 140                 parent::setup();
 141
 142                 $this->footnotes = array();
 143                 $this->footnotes_ordered = array();
 144                 $this->footnotes_ref_count = array();
 145                 $this->footnotes_numbers = array();
 146                 $this->abbr_desciptions = array();
 147                 $this->abbr_word_re = '';
 148                 $this->footnote_counter = 1;
 149
 150                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
 151                         if ($this->abbr_word_re)
 152                                 $this->abbr_word_re .= '|';
 153                         $this->abbr_word_re .= preg_quote($abbr_word);
 154                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
 155                 }
 156         }
 157
 158         /**
 159          * Clearing Extra-specific variables.
 160          */
 161         protected function teardown() {
 162                 $this->footnotes = array();
 163                 $this->footnotes_ordered = array();
 164                 $this->footnotes_ref_count = array();
 165                 $this->footnotes_numbers = array();
 166                 $this->abbr_desciptions = array();
 167                 $this->abbr_word_re = '';
 168
 169                 parent::teardown();
 170         }
 171
 172
 173         /**
 174          * Extra attribute parser
 175          */
 176
 177         /**
 178          * Expression to use to catch attributes (includes the braces)
 179          * @var string
 180          */
 181         protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
 182
 183         /**
 184          * Expression to use when parsing in a context when no capture is desired
 185          * @var string
 186          */
 187         protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
 188
 189         /**
 190          * Parse attributes caught by the $this->id_class_attr_catch_re expression
 191          * and return the HTML-formatted list of attributes.
 192          *
 193          * Currently supported attributes are .class and #id.
 194          *
 195          * In addition, this method also supports supplying a default Id value,
 196          * which will be used to populate the id attribute in case it was not
 197          * overridden.
 198          * @param  string $tag_name
 199          * @param  string $attr
 200          * @param  mixed  $defaultIdValue
 201          * @param  array  $classes
 202          * @return string
 203          */
 204         protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
 205                 if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
 206
 207                 // Split on components
 208                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
 209                 $elements = $matches[0];
 210
 211                 // Handle classes and IDs (only first ID taken into account)
 212                 $attributes = array();
 213                 $id = false;
 214                 foreach ($elements as $element) {
 215                         if ($element[0] == '.') {
 216                                 $classes[] = substr($element, 1);
 217                         } else if ($element[0] == '#') {
 218                                 if ($id === false) $id = substr($element, 1);
 219                         } else if (strpos($element, '=') > 0) {
 220                                 $parts = explode('=', $element, 2);
 221                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
 222                         }
 223                 }
 224
 225                 if (!$id) $id = $defaultIdValue;
 226
 227                 // Compose attributes as string
 228                 $attr_str = "";
 229                 if (!empty($id)) {
 230                         $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
 231                 }
 232                 if (!empty($classes)) {
 233                         $attr_str .= ' class="'. implode(" ", $classes) . '"';
 234                 }
 235                 if (!$this->no_markup && !empty($attributes)) {
 236                         $attr_str .= ' '.implode(" ", $attributes);
 237                 }
 238                 return $attr_str;
 239         }
 240
 241         /**
 242          * Strips link definitions from text, stores the URLs and titles in
 243          * hash references.
 244          * @param  string $text
 245          * @return string
 246          */
 247         protected function stripLinkDefinitions($text) {
 248                 $less_than_tab = $this->tab_width - 1;
 249
 250                 // Link defs are in the form: ^[id]: url "optional title"
 251                 $text = preg_replace_callback('{
 252                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 253                                                           [ ]*
 254                                                           \n?                           # maybe *one* newline
 255                                                           [ ]*
 256                                                         (?:
 257                                                           <(.+?)>                       # url = $2
 258                                                         |
 259                                                           (\S+?)                        # url = $3
 260                                                         )
 261                                                           [ ]*
 262                                                           \n?                           # maybe one newline
 263                                                           [ ]*
 264                                                         (?:
 265                                                                 (?<=\s)                 # lookbehind for whitespace
 266                                                                 ["(]
 267                                                                 (.*?)                   # title = $4
 268                                                                 [")]
 269                                                                 [ ]*
 270                                                         )?      # title is optional
 271                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
 272                                                         (?:\n+|\Z)
 273                         }xm',
 274                         array($this, '_stripLinkDefinitions_callback'),
 275                         $text);
 276                 return $text;
 277         }
 278
 279         /**
 280          * Strip link definition callback
 281          * @param  array $matches
 282          * @return string
 283          */
 284         protected function _stripLinkDefinitions_callback($matches) {
 285                 $link_id = strtolower($matches[1]);
 286                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 287                 $this->urls[$link_id] = $url;
 288                 $this->titles[$link_id] =& $matches[4];
 289                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
 290                 return ''; // String that will replace the block
 291         }
 292
 293
 294         /**
 295          * HTML block parser
 296          */
 297
 298         /**
 299          * Tags that are always treated as block tags
 300          * @var string
 301          */
 302         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
 303
 304         /**
 305          * Tags treated as block tags only if the opening tag is alone on its line
 306          * @var string
 307          */
 308         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
 309
 310         /**
 311          * Tags where markdown="1" default to span mode:
 312          * @var string
 313          */
 314         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
 315
 316         /**
 317          * Tags which must not have their contents modified, no matter where
 318          * they appear
 319          * @var string
 320          */
 321         protected $clean_tags_re = 'script|style|math|svg';
 322
 323         /**
 324          * Tags that do not need to be closed.
 325          * @var string
 326          */
 327         protected $auto_close_tags_re = 'hr|img|param|source|track';
 328
 329         /**
 330          * Hashify HTML Blocks and "clean tags".
 331          *
 332          * We only want to do this for block-level HTML tags, such as headers,
 333          * lists, and tables. That's because we still want to wrap <p>s around
 334          * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 335          * phrase emphasis, and spans. The list of tags we're looking for is
 336          * hard-coded.
 337          *
 338          * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
 339          * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
 340          * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
 341          *  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
 342          * These two functions are calling each other. It's recursive!
 343          * @param  string $text
 344          * @return string
 345          */
 346         protected function hashHTMLBlocks($text) {
 347                 if ($this->no_markup) {
 348                         return $text;
 349                 }
 350
 351                 // Call the HTML-in-Markdown hasher.
 352                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
 353
 354                 return $text;
 355         }
 356
 357         /**
 358          * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
 359          *
 360          * *   $indent is the number of space to be ignored when checking for code
 361          *     blocks. This is important because if we don't take the indent into
 362          *     account, something like this (which looks right) won't work as expected:
 363          *
 364          *     <div>
 365          *         <div markdown="1">
 366          *         Hello World.  <-- Is this a Markdown code block or text?
 367          *         </div>  <-- Is this a Markdown code block or a real tag?
 368          *     <div>
 369          *
 370          *     If you don't like this, just don't indent the tag on which
 371          *     you apply the markdown="1" attribute.
 372          *
 373          * *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
 374          *     tag with that name. Nested tags supported.
 375          *
 376          * *   If $span is true, text inside must treated as span. So any double
 377          *     newline will be replaced by a single newline so that it does not create
 378          *     paragraphs.
 379          *
 380          * Returns an array of that form: ( processed text , remaining text )
 381          *
 382          * @param  string  $text
 383          * @param  integer $indent
 384          * @param  string  $enclosing_tag_re
 385          * @param  boolean $span
 386          * @return array
 387          */
 388         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
 389                                                                                 $enclosing_tag_re = '', $span = false)
 390         {
 391
 392                 if ($text === '') return array('', '');
 393
 394                 // Regex to check for the presense of newlines around a block tag.
 395                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
 396                 $newline_after_re =
 397                         '{
 398                                 ^                                               # Start of text following the tag.
 399                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
 400                                 [ ]*\n                                  # Must be followed by newline.
 401                         }xs';
 402
 403                 // Regex to match any tag.
 404                 $block_tag_re =
 405                         '{
 406                                 (                                       # $2: Capture whole tag.
 407                                         </?                                     # Any opening or closing tag.
 408                                                 (?>                             # Tag name.
 409                                                         ' . $this->block_tags_re . '                    |
 410                                                         ' . $this->context_block_tags_re . '    |
 411                                                         ' . $this->clean_tags_re . '            |
 412                                                         (?!\s)'.$enclosing_tag_re . '
 413                                                 )
 414                                                 (?:
 415                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 416                                                         (?>
 417                                                                 ".*?"           |       # Double quotes (can contain `>`)
 418                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
 419                                                                 .+?                             # Anything but quotes and `>`.
 420                                                         )*?
 421                                                 )?
 422                                         >                                       # End of tag.
 423                                 |
 424                                         <!--    .*?     -->     # HTML Comment
 425                                 |
 426                                         <\?.*?\?> | <%.*?%>     # Processing instruction
 427                                 |
 428                                         <!\[CDATA\[.*?\]\]>     # CData Block
 429                                 ' . ( !$span ? ' # If not in span.
 430                                 |
 431                                         # Indented code block
 432                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
 433                                         [ ]{' . ($indent + 4) . '}[^\n]* \n
 434                                         (?>
 435                                                 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n
 436                                         )*
 437                                 |
 438                                         # Fenced code block marker
 439                                         (?<= ^ | \n )
 440                                         [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
 441                                         [ ]*
 442                                         (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
 443                                         [ ]*
 444                                         (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
 445                                         [ ]*
 446                                         (?= \n )
 447                                 ' : '' ) . ' # End (if not is span).
 448                                 |
 449                                         # Code span marker
 450                                         # Note, this regex needs to go after backtick fenced
 451                                         # code blocks but it should also be kept outside of the
 452                                         # "if not in span" condition adding backticks to the parser
 453                                         `+
 454                                 )
 455                         }xs';
 456
 457
 458                 $depth = 0;             // Current depth inside the tag tree.
 459                 $parsed = "";   // Parsed text that will be returned.
 460
 461                 // Loop through every tag until we find the closing tag of the parent
 462                 // or loop until reaching the end of text if no parent tag specified.
 463                 do {
 464                         // Split the text using the first $tag_match pattern found.
 465                         // Text before  pattern will be first in the array, text after
 466                         // pattern will be at the end, and between will be any catches made
 467                         // by the pattern.
 468                         $parts = preg_split($block_tag_re, $text, 2,
 469                                                                 PREG_SPLIT_DELIM_CAPTURE);
 470
 471                         // If in Markdown span mode, add a empty-string span-level hash
 472                         // after each newline to prevent triggering any block element.
 473                         if ($span) {
 474                                 $void = $this->hashPart("", ':');
 475                                 $newline = "\n$void";
 476                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
 477                         }
 478
 479                         $parsed .= $parts[0]; // Text before current tag.
 480
 481                         // If end of $text has been reached. Stop loop.
 482                         if (count($parts) < 3) {
 483                                 $text = "";
 484                                 break;
 485                         }
 486
 487                         $tag  = $parts[1]; // Tag to handle.
 488                         $text = $parts[2]; // Remaining text after current tag.
 489                         $tag_re = preg_quote($tag); // For use in a regular expression.
 490
 491                         // Check for: Fenced code block marker.
 492                         // Note: need to recheck the whole tag to disambiguate backtick
 493                         // fences from code spans
 494                         if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) {
 495                                 // Fenced code block marker: find matching end marker.
 496                                 $fence_indent = strlen($capture[1]); // use captured indent in re
 497                                 $fence_re = $capture[2]; // use captured fence in re
 498                                 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text,
 499                                         $matches))
 500                                 {
 501                                         // End marker found: pass text unchanged until marker.
 502                                         $parsed .= $tag . $matches[0];
 503                                         $text = substr($text, strlen($matches[0]));
 504                                 }
 505                                 else {
 506                                         // No end marker: just skip it.
 507                                         $parsed .= $tag;
 508                                 }
 509                         }
 510                         // Check for: Indented code block.
 511                         else if ($tag[0] == "\n" || $tag[0] == " ") {
 512                                 // Indented code block: pass it unchanged, will be handled
 513                                 // later.
 514                                 $parsed .= $tag;
 515                         }
 516                         // Check for: Code span marker
 517                         // Note: need to check this after backtick fenced code blocks
 518                         else if ($tag[0] == "`") {
 519                                 // Find corresponding end marker.
 520                                 $tag_re = preg_quote($tag);
 521                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}',
 522                                         $text, $matches))
 523                                 {
 524                                         // End marker found: pass text unchanged until marker.
 525                                         $parsed .= $tag . $matches[0];
 526                                         $text = substr($text, strlen($matches[0]));
 527                                 }
 528                                 else {
 529                                         // Unmatched marker: just skip it.
 530                                         $parsed .= $tag;
 531                                 }
 532                         }
 533                         // Check for: Opening Block level tag or
 534                         //            Opening Context Block tag (like ins and del)
 535                         //               used as a block tag (tag is alone on it's line).
 536                         else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) ||
 537                                 (       preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) &&
 538                                         preg_match($newline_before_re, $parsed) &&
 539                                         preg_match($newline_after_re, $text)    )
 540                                 )
 541                         {
 542                                 // Need to parse tag and following text using the HTML parser.
 543                                 list($block_text, $text) =
 544                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
 545
 546                                 // Make sure it stays outside of any paragraph by adding newlines.
 547                                 $parsed .= "\n\n$block_text\n\n";
 548                         }
 549                         // Check for: Clean tag (like script, math)
 550                         //            HTML Comments, processing instructions.
 551                         else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) ||
 552                                 $tag[1] == '!' || $tag[1] == '?')
 553                         {
 554                                 // Need to parse tag and following text using the HTML parser.
 555                                 // (don't check for markdown attribute)
 556                                 list($block_text, $text) =
 557                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
 558
 559                                 $parsed .= $block_text;
 560                         }
 561                         // Check for: Tag with same name as enclosing tag.
 562                         else if ($enclosing_tag_re !== '' &&
 563                                 // Same name as enclosing tag.
 564                                 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag))
 565                         {
 566                                 // Increase/decrease nested tag count.
 567                                 if ($tag[1] == '/')                                             $depth--;
 568                                 else if ($tag[strlen($tag)-2] != '/')   $depth++;
 569
 570                                 if ($depth < 0) {
 571                                         // Going out of parent element. Clean up and break so we
 572                                         // return to the calling function.
 573                                         $text = $tag . $text;
 574                                         break;
 575                                 }
 576
 577                                 $parsed .= $tag;
 578                         }
 579                         else {
 580                                 $parsed .= $tag;
 581                         }
 582                 } while ($depth >= 0);
 583
 584                 return array($parsed, $text);
 585         }
 586
 587         /**
 588          * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
 589          *
 590          * *   Calls $hash_method to convert any blocks.
 591          * *   Stops when the first opening tag closes.
 592          * *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
 593          *     (it is not inside clean tags)
 594          *
 595          * Returns an array of that form: ( processed text , remaining text )
 596          * @param  string $text
 597          * @param  string $hash_method
 598          * @param  string $md_attr
 599          * @return array
 600          */
 601         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
 602                 if ($text === '') return array('', '');
 603
 604                 // Regex to match `markdown` attribute inside of a tag.
 605                 $markdown_attr_re = '
 606                         {
 607                                 \s*                     # Eat whitespace before the `markdown` attribute
 608                                 markdown
 609                                 \s*=\s*
 610                                 (?>
 611                                         (["\'])         # $1: quote delimiter
 612                                         (.*?)           # $2: attribute value
 613                                         \1                      # matching delimiter
 614                                 |
 615                                         ([^\s>]*)       # $3: unquoted attribute value
 616                                 )
 617                                 ()                              # $4: make $3 always defined (avoid warnings)
 618                         }xs';
 619
 620                 // Regex to match any tag.
 621                 $tag_re = '{
 622                                 (                                       # $2: Capture whole tag.
 623                                         </?                                     # Any opening or closing tag.
 624                                                 [\w:$]+                 # Tag name.
 625                                                 (?:
 626                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 627                                                         (?>
 628                                                                 ".*?"           |       # Double quotes (can contain `>`)
 629                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
 630                                                                 .+?                             # Anything but quotes and `>`.
 631                                                         )*?
 632                                                 )?
 633                                         >                                       # End of tag.
 634                                 |
 635                                         <!--    .*?     -->     # HTML Comment
 636                                 |
 637                                         <\?.*?\?> | <%.*?%>     # Processing instruction
 638                                 |
 639                                         <!\[CDATA\[.*?\]\]>     # CData Block
 640                                 )
 641                         }xs';
 642
 643                 $original_text = $text;         // Save original text in case of faliure.
 644
 645                 $depth          = 0;    // Current depth inside the tag tree.
 646                 $block_text     = "";   // Temporary text holder for current text.
 647                 $parsed         = "";   // Parsed text that will be returned.
 648
 649                 // Get the name of the starting tag.
 650                 // (This pattern makes $base_tag_name_re safe without quoting.)
 651                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
 652                         $base_tag_name_re = $matches[1];
 653
 654                 // Loop through every tag until we find the corresponding closing tag.
 655                 do {
 656                         // Split the text using the first $tag_match pattern found.
 657                         // Text before  pattern will be first in the array, text after
 658                         // pattern will be at the end, and between will be any catches made
 659                         // by the pattern.
 660                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
 661
 662                         if (count($parts) < 3) {
 663                                 // End of $text reached with unbalenced tag(s).
 664                                 // In that case, we return original text unchanged and pass the
 665                                 // first character as filtered to prevent an infinite loop in the
 666                                 // parent function.
 667                                 return array($original_text[0], substr($original_text, 1));
 668                         }
 669
 670                         $block_text .= $parts[0]; // Text before current tag.
 671                         $tag         = $parts[1]; // Tag to handle.
 672                         $text        = $parts[2]; // Remaining text after current tag.
 673
 674                         // Check for: Auto-close tag (like <hr/>)
 675                         //                       Comments and Processing Instructions.
 676                         if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) ||
 677                                 $tag[1] == '!' || $tag[1] == '?')
 678                         {
 679                                 // Just add the tag to the block as if it was text.
 680                                 $block_text .= $tag;
 681                         }
 682                         else {
 683                                 // Increase/decrease nested tag count. Only do so if
 684                                 // the tag's name match base tag's.
 685                                 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) {
 686                                         if ($tag[1] == '/')                                             $depth--;
 687                                         else if ($tag[strlen($tag)-2] != '/')   $depth++;
 688                                 }
 689
 690                                 // Check for `markdown="1"` attribute and handle it.
 691                                 if ($md_attr &&
 692                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
 693                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
 694                                 {
 695                                         // Remove `markdown` attribute from opening tag.
 696                                         $tag = preg_replace($markdown_attr_re, '', $tag);
 697
 698                                         // Check if text inside this tag must be parsed in span mode.
 699                                         $this->mode = $attr_m[2] . $attr_m[3];
 700                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
 701                                                 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag);
 702
 703                                         // Calculate indent before tag.
 704                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
 705                                                 $strlen = $this->utf8_strlen;
 706                                                 $indent = $strlen($matches[1], 'UTF-8');
 707                                         } else {
 708                                                 $indent = 0;
 709                                         }
 710
 711                                         // End preceding block with this tag.
 712                                         $block_text .= $tag;
 713                                         $parsed .= $this->$hash_method($block_text);
 714
 715                                         // Get enclosing tag name for the ParseMarkdown function.
 716                                         // (This pattern makes $tag_name_re safe without quoting.)
 717                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
 718                                         $tag_name_re = $matches[1];
 719
 720                                         // Parse the content using the HTML-in-Markdown parser.
 721                                         list ($block_text, $text)
 722                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
 723                                                         $tag_name_re, $span_mode);
 724
 725                                         // Outdent markdown text.
 726                                         if ($indent > 0) {
 727                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
 728                                                                                                         $block_text);
 729                                         }
 730
 731                                         // Append tag content to parsed text.
 732                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
 733                                         else                            $parsed .= "$block_text";
 734
 735                                         // Start over with a new block.
 736                                         $block_text = "";
 737                                 }
 738                                 else $block_text .= $tag;
 739                         }
 740
 741                 } while ($depth > 0);
 742
 743                 // Hash last block text that wasn't processed inside the loop.
 744                 $parsed .= $this->$hash_method($block_text);
 745
 746                 return array($parsed, $text);
 747         }
 748
 749         /**
 750          * Called whenever a tag must be hashed when a function inserts a "clean" tag
 751          * in $text, it passes through this function and is automaticaly escaped,
 752          * blocking invalid nested overlap.
 753          * @param  string $text
 754          * @return string
 755          */
 756         protected function hashClean($text) {
 757                 return $this->hashPart($text, 'C');
 758         }
 759
 760         /**
 761          * Turn Markdown link shortcuts into XHTML <a> tags.
 762          * @param  string $text
 763          * @return string
 764          */
 765         protected function doAnchors($text) {
 766                 if ($this->in_anchor) {
 767                         return $text;
 768                 }
 769                 $this->in_anchor = true;
 770
 771                 // First, handle reference-style links: [link text] [id]
 772                 $text = preg_replace_callback('{
 773                         (                                       # wrap whole match in $1
 774                           \[
 775                                 (' . $this->nested_brackets_re . ')     # link text = $2
 776                           \]
 777
 778                           [ ]?                          # one optional space
 779                           (?:\n[ ]*)?           # one optional newline followed by spaces
 780
 781                           \[
 782                                 (.*?)           # id = $3
 783                           \]
 784                         )
 785                         }xs',
 786                         array($this, '_doAnchors_reference_callback'), $text);
 787
 788                 // Next, inline-style links: [link text](url "optional title")
 789                 $text = preg_replace_callback('{
 790                         (                               # wrap whole match in $1
 791                           \[
 792                                 (' . $this->nested_brackets_re . ')     # link text = $2
 793                           \]
 794                           \(                    # literal paren
 795                                 [ \n]*
 796                                 (?:
 797                                         <(.+?)> # href = $3
 798                                 |
 799                                         (' . $this->nested_url_parenthesis_re . ')      # href = $4
 800                                 )
 801                                 [ \n]*
 802                                 (                       # $5
 803                                   ([\'"])       # quote char = $6
 804                                   (.*?)         # Title = $7
 805                                   \6            # matching quote
 806                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 807                                 )?                      # title is optional
 808                           \)
 809                           (?:[ ]? ' . $this->id_class_attr_catch_re . ' )?       # $8 = id/class attributes
 810                         )
 811                         }xs',
 812                         array($this, '_doAnchors_inline_callback'), $text);
 813
 814                 // Last, handle reference-style shortcuts: [link text]
 815                 // These must come last in case you've also got [link text][1]
 816                 // or [link text](/foo)
 817                 $text = preg_replace_callback('{
 818                         (                                       # wrap whole match in $1
 819                           \[
 820                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 821                           \]
 822                         )
 823                         }xs',
 824                         array($this, '_doAnchors_reference_callback'), $text);
 825
 826                 $this->in_anchor = false;
 827                 return $text;
 828         }
 829
 830         /**
 831          * Callback for reference anchors
 832          * @param  array $matches
 833          * @return string
 834          */
 835         protected function _doAnchors_reference_callback($matches) {
 836                 $whole_match =  $matches[1];
 837                 $link_text   =  $matches[2];
 838                 $link_id     =& $matches[3];
 839
 840                 if ($link_id == "") {
 841                         // for shortcut links like [this][] or [this].
 842                         $link_id = $link_text;
 843                 }
 844
 845                 // lower-case and turn embedded newlines into spaces
 846                 $link_id = strtolower($link_id);
 847                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 848
 849                 if (isset($this->urls[$link_id])) {
 850                         $url = $this->urls[$link_id];
 851                         $url = $this->encodeURLAttribute($url);
 852
 853                         $result = "<a href=\"$url\"";
 854                         if ( isset( $this->titles[$link_id] ) ) {
 855                                 $title = $this->titles[$link_id];
 856                                 $title = $this->encodeAttribute($title);
 857                                 $result .=  " title=\"$title\"";
 858                         }
 859                         if (isset($this->ref_attr[$link_id]))
 860                                 $result .= $this->ref_attr[$link_id];
 861
 862                         $link_text = $this->runSpanGamut($link_text);
 863                         $result .= ">$link_text</a>";
 864                         $result = $this->hashPart($result);
 865                 }
 866                 else {
 867                         $result = $whole_match;
 868                 }
 869                 return $result;
 870         }
 871
 872         /**
 873          * Callback for inline anchors
 874          * @param  array $matches
 875          * @return string
 876          */
 877         protected function _doAnchors_inline_callback($matches) {
 878                 $whole_match    =  $matches[1];
 879                 $link_text              =  $this->runSpanGamut($matches[2]);
 880                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 881                 $title                  =& $matches[7];
 882                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
 883
 884                 // if the URL was of the form <s p a c e s> it got caught by the HTML
 885                 // tag parser and hashed. Need to reverse the process before using the URL.
 886                 $unhashed = $this->unhash($url);
 887                 if ($unhashed != $url)
 888                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 889
 890                 $url = $this->encodeURLAttribute($url);
 891
 892                 $result = "<a href=\"$url\"";
 893                 if (isset($title)) {
 894                         $title = $this->encodeAttribute($title);
 895                         $result .=  " title=\"$title\"";
 896                 }
 897                 $result .= $attr;
 898
 899                 $link_text = $this->runSpanGamut($link_text);
 900                 $result .= ">$link_text</a>";
 901
 902                 return $this->hashPart($result);
 903         }
 904
 905         /**
 906          * Turn Markdown image shortcuts into <img> tags.
 907          * @param  string $text
 908          * @return string
 909          */
 910         protected function doImages($text) {
 911                 // First, handle reference-style labeled images: ![alt text][id]
 912                 $text = preg_replace_callback('{
 913                         (                               # wrap whole match in $1
 914                           !\[
 915                                 (' . $this->nested_brackets_re . ')             # alt text = $2
 916                           \]
 917
 918                           [ ]?                          # one optional space
 919                           (?:\n[ ]*)?           # one optional newline followed by spaces
 920
 921                           \[
 922                                 (.*?)           # id = $3
 923                           \]
 924
 925                         )
 926                         }xs',
 927                         array($this, '_doImages_reference_callback'), $text);
 928
 929                 // Next, handle inline images:  ![alt text](url "optional title")
 930                 // Don't forget: encode * and _
 931                 $text = preg_replace_callback('{
 932                         (                               # wrap whole match in $1
 933                           !\[
 934                                 (' . $this->nested_brackets_re . ')             # alt text = $2
 935                           \]
 936                           \s?                   # One optional whitespace character
 937                           \(                    # literal paren
 938                                 [ \n]*
 939                                 (?:
 940                                         <(\S*)> # src url = $3
 941                                 |
 942                                         (' . $this->nested_url_parenthesis_re . ')      # src url = $4
 943                                 )
 944                                 [ \n]*
 945                                 (                       # $5
 946                                   ([\'"])       # quote char = $6
 947                                   (.*?)         # title = $7
 948                                   \6            # matching quote
 949                                   [ \n]*
 950                                 )?                      # title is optional
 951                           \)
 952                           (?:[ ]? ' . $this->id_class_attr_catch_re . ' )?       # $8 = id/class attributes
 953                         )
 954                         }xs',
 955                         array($this, '_doImages_inline_callback'), $text);
 956
 957                 return $text;
 958         }
 959
 960         /**
 961          * Callback for referenced images
 962          * @param  array $matches
 963          * @return string
 964          */
 965         protected function _doImages_reference_callback($matches) {
 966                 $whole_match = $matches[1];
 967                 $alt_text    = $matches[2];
 968                 $link_id     = strtolower($matches[3]);
 969
 970                 if ($link_id == "") {
 971                         $link_id = strtolower($alt_text); // for shortcut links like ![this][].
 972                 }
 973
 974                 $alt_text = $this->encodeAttribute($alt_text);
 975                 if (isset($this->urls[$link_id])) {
 976                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
 977                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 978                         if (isset($this->titles[$link_id])) {
 979                                 $title = $this->titles[$link_id];
 980                                 $title = $this->encodeAttribute($title);
 981                                 $result .=  " title=\"$title\"";
 982                         }
 983                         if (isset($this->ref_attr[$link_id]))
 984                                 $result .= $this->ref_attr[$link_id];
 985                         $result .= $this->empty_element_suffix;
 986                         $result = $this->hashPart($result);
 987                 }
 988                 else {
 989                         // If there's no such link ID, leave intact:
 990                         $result = $whole_match;
 991                 }
 992
 993                 return $result;
 994         }
 995
 996         /**
 997          * Callback for inline images
 998          * @param  array $matches
 999          * @return string
1000          */
1001         protected function _doImages_inline_callback($matches) {
1002                 $whole_match    = $matches[1];
1003                 $alt_text               = $matches[2];
1004                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
1005                 $title                  =& $matches[7];
1006                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
1007
1008                 $alt_text = $this->encodeAttribute($alt_text);
1009                 $url = $this->encodeURLAttribute($url);
1010                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1011                 if (isset($title)) {
1012                         $title = $this->encodeAttribute($title);
1013                         $result .=  " title=\"$title\""; // $title already quoted
1014                 }
1015                 $result .= $attr;
1016                 $result .= $this->empty_element_suffix;
1017
1018                 return $this->hashPart($result);
1019         }
1020
1021         /**
1022          * Process markdown headers. Redefined to add ID and class attribute support.
1023          * @param  string $text
1024          * @return string
1025          */
1026         protected function doHeaders($text) {
1027                 // Setext-style headers:
1028                 //  Header 1  {#header1}
1029                 //        ========
1030                 //
1031                 //        Header 2  {#header2 .class1 .class2}
1032                 //        --------
1033                 //
1034                 $text = preg_replace_callback(
1035                         '{
1036                                 (^.+?)                                                          # $1: Header text
1037                                 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )?         # $3 = id/class attributes
1038                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
1039                         }mx',
1040                         array($this, '_doHeaders_callback_setext'), $text);
1041
1042                 // atx-style headers:
1043                 //      # Header 1        {#header1}
1044                 //      ## Header 2       {#header2}
1045                 //      ## Header 2 with closing hashes ##  {#header3.class1.class2}
1046                 //      ...
1047                 //      ###### Header 6   {.class2}
1048                 //
1049                 $text = preg_replace_callback('{
1050                                 ^(\#{1,6})      # $1 = string of #\'s
1051                                 [ ]'.($this->hashtag_protection ? '+' : '*').'
1052                                 (.+?)           # $2 = Header text
1053                                 [ ]*
1054                                 \#*                     # optional closing #\'s (not counted)
1055                                 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )?         # $3 = id/class attributes
1056                                 [ ]*
1057                                 \n+
1058                         }xm',
1059                         array($this, '_doHeaders_callback_atx'), $text);
1060
1061                 return $text;
1062         }
1063
1064         /**
1065          * Callback for setext headers
1066          * @param  array $matches
1067          * @return string
1068          */
1069         protected function _doHeaders_callback_setext($matches) {
1070                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) {
1071                         return $matches[0];
1072                 }
1073
1074                 $level = $matches[3][0] == '=' ? 1 : 2;
1075
1076                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
1077
1078                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
1079                 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>";
1080                 return "\n" . $this->hashBlock($block) . "\n\n";
1081         }
1082
1083         /**
1084          * Callback for atx headers
1085          * @param  array $matches
1086          * @return string
1087          */
1088         protected function _doHeaders_callback_atx($matches) {
1089                 $level = strlen($matches[1]);
1090
1091                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1092                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1093                 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>";
1094                 return "\n" . $this->hashBlock($block) . "\n\n";
1095         }
1096
1097         /**
1098          * Form HTML tables.
1099          * @param  string $text
1100          * @return string
1101          */
1102         protected function doTables($text) {
1103                 $less_than_tab = $this->tab_width - 1;
1104                 // Find tables with leading pipe.
1105                 //
1106                 //      | Header 1 | Header 2
1107                 //      | -------- | --------
1108                 //      | Cell 1   | Cell 2
1109                 //      | Cell 3   | Cell 4
1110                 $text = preg_replace_callback('
1111                         {
1112                                 ^                                                       # Start of a line
1113                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1114                                 [|]                                                     # Optional leading pipe (present)
1115                                 (.+) \n                                         # $1: Header row (at least one pipe)
1116
1117                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1118                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
1119
1120                                 (                                                       # $3: Cells
1121                                         (?>
1122                                                 [ ]*                            # Allowed whitespace.
1123                                                 [|] .* \n                       # Row content.
1124                                         )*
1125                                 )
1126                                 (?=\n|\Z)                                       # Stop at final double newline.
1127                         }xm',
1128                         array($this, '_doTable_leadingPipe_callback'), $text);
1129
1130                 // Find tables without leading pipe.
1131                 //
1132                 //      Header 1 | Header 2
1133                 //      -------- | --------
1134                 //      Cell 1   | Cell 2
1135                 //      Cell 3   | Cell 4
1136                 $text = preg_replace_callback('
1137                         {
1138                                 ^                                                       # Start of a line
1139                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1140                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
1141
1142                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1143                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
1144
1145                                 (                                                       # $3: Cells
1146                                         (?>
1147                                                 .* [|] .* \n            # Row content
1148                                         )*
1149                                 )
1150                                 (?=\n|\Z)                                       # Stop at final double newline.
1151                         }xm',
1152                         array($this, '_DoTable_callback'), $text);
1153
1154                 return $text;
1155         }
1156
1157         /**
1158          * Callback for removing the leading pipe for each row
1159          * @param  array $matches
1160          * @return string
1161          */
1162         protected function _doTable_leadingPipe_callback($matches) {
1163                 $head           = $matches[1];
1164                 $underline      = $matches[2];
1165                 $content        = $matches[3];
1166
1167                 $content        = preg_replace('/^ *[|]/m', '', $content);
1168
1169                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1170         }
1171
1172         /**
1173          * Make the align attribute in a table
1174          * @param  string $alignname
1175          * @return string
1176          */
1177         protected function _doTable_makeAlignAttr($alignname)
1178         {
1179                 if (empty($this->table_align_class_tmpl)) {
1180                         return " align=\"$alignname\"";
1181                 }
1182
1183                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1184                 return " class=\"$classname\"";
1185         }
1186
1187         /**
1188          * Calback for processing tables
1189          * @param  array $matches
1190          * @return string
1191          */
1192         protected function _doTable_callback($matches) {
1193                 $head           = $matches[1];
1194                 $underline      = $matches[2];
1195                 $content        = $matches[3];
1196
1197                 // Remove any tailing pipes for each line.
1198                 $head           = preg_replace('/[|] *$/m', '', $head);
1199                 $underline      = preg_replace('/[|] *$/m', '', $underline);
1200                 $content        = preg_replace('/[|] *$/m', '', $content);
1201
1202                 // Reading alignement from header underline.
1203                 $separators     = preg_split('/ *[|] */', $underline);
1204                 foreach ($separators as $n => $s) {
1205                         if (preg_match('/^ *-+: *$/', $s))
1206                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1207                         else if (preg_match('/^ *:-+: *$/', $s))
1208                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1209                         else if (preg_match('/^ *:-+ *$/', $s))
1210                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1211                         else
1212                                 $attr[$n] = '';
1213                 }
1214
1215                 // Parsing span elements, including code spans, character escapes,
1216                 // and inline HTML tags, so that pipes inside those gets ignored.
1217                 $head           = $this->parseSpan($head);
1218                 $headers        = preg_split('/ *[|] */', $head);
1219                 $col_count      = count($headers);
1220                 $attr       = array_pad($attr, $col_count, '');
1221
1222                 // Write column headers.
1223                 $text = "<table>\n";
1224                 $text .= "<thead>\n";
1225                 $text .= "<tr>\n";
1226                 foreach ($headers as $n => $header)
1227                         $text .= "  <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n";
1228                 $text .= "</tr>\n";
1229                 $text .= "</thead>\n";
1230
1231                 // Split content by row.
1232                 $rows = explode("\n", trim($content, "\n"));
1233
1234                 $text .= "<tbody>\n";
1235                 foreach ($rows as $row) {
1236                         // Parsing span elements, including code spans, character escapes,
1237                         // and inline HTML tags, so that pipes inside those gets ignored.
1238                         $row = $this->parseSpan($row);
1239
1240                         // Split row by cell.
1241                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
1242                         $row_cells = array_pad($row_cells, $col_count, '');
1243
1244                         $text .= "<tr>\n";
1245                         foreach ($row_cells as $n => $cell)
1246                                 $text .= "  <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
1247                         $text .= "</tr>\n";
1248                 }
1249                 $text .= "</tbody>\n";
1250                 $text .= "</table>";
1251
1252                 return $this->hashBlock($text) . "\n";
1253         }
1254
1255         /**
1256          * Form HTML definition lists.
1257          * @param  string $text
1258          * @return string
1259          */
1260         protected function doDefLists($text) {
1261                 $less_than_tab = $this->tab_width - 1;
1262
1263                 // Re-usable pattern to match any entire dl list:
1264                 $whole_list_re = '(?>
1265                         (                                                               # $1 = whole list
1266                           (                                                             # $2
1267                                 [ ]{0,' . $less_than_tab . '}
1268                                 ((?>.*\S.*\n)+)                         # $3 = defined term
1269                                 \n?
1270                                 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1271                           )
1272                           (?s:.+?)
1273                           (                                                             # $4
1274                                   \z
1275                                 |
1276                                   \n{2,}
1277                                   (?=\S)
1278                                   (?!                                           # Negative lookahead for another term
1279                                         [ ]{0,' . $less_than_tab . '}
1280                                         (?: \S.*\n )+?                  # defined term
1281                                         \n?
1282                                         [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1283                                   )
1284                                   (?!                                           # Negative lookahead for another definition
1285                                         [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1286                                   )
1287                           )
1288                         )
1289                 )'; // mx
1290
1291                 $text = preg_replace_callback('{
1292                                 (?>\A\n?|(?<=\n\n))
1293                                 ' . $whole_list_re . '
1294                         }mx',
1295                         array($this, '_doDefLists_callback'), $text);
1296
1297                 return $text;
1298         }
1299
1300         /**
1301          * Callback for processing definition lists
1302          * @param  array $matches
1303          * @return string
1304          */
1305         protected function _doDefLists_callback($matches) {
1306                 // Re-usable patterns to match list item bullets and number markers:
1307                 $list = $matches[1];
1308
1309                 // Turn double returns into triple returns, so that we can make a
1310                 // paragraph for the last item in a list, if necessary:
1311                 $result = trim($this->processDefListItems($list));
1312                 $result = "<dl>\n" . $result . "\n</dl>";
1313                 return $this->hashBlock($result) . "\n\n";
1314         }
1315
1316         /**
1317          * Process the contents of a single definition list, splitting it
1318          * into individual term and definition list items.
1319          * @param  string $list_str
1320          * @return string
1321          */
1322         protected function processDefListItems($list_str) {
1323
1324                 $less_than_tab = $this->tab_width - 1;
1325
1326                 // Trim trailing blank lines:
1327                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1328
1329                 // Process definition terms.
1330                 $list_str = preg_replace_callback('{
1331                         (?>\A\n?|\n\n+)                                         # leading line
1332                         (                                                                       # definition terms = $1
1333                                 [ ]{0,' . $less_than_tab . '}   # leading whitespace
1334                                 (?!\:[ ]|[ ])                                   # negative lookahead for a definition
1335                                                                                                 #   mark (colon) or more whitespace.
1336                                 (?> \S.* \n)+?                                  # actual term (not whitespace).
1337                         )
1338                         (?=\n?[ ]{0,3}:[ ])                                     # lookahead for following line feed
1339                                                                                                 #   with a definition mark.
1340                         }xm',
1341                         array($this, '_processDefListItems_callback_dt'), $list_str);
1342
1343                 // Process actual definitions.
1344                 $list_str = preg_replace_callback('{
1345                         \n(\n+)?                                                        # leading line = $1
1346                         (                                                                       # marker space = $2
1347                                 [ ]{0,' . $less_than_tab . '}   # whitespace before colon
1348                                 \:[ ]+                                                  # definition mark (colon)
1349                         )
1350                         ((?s:.+?))                                                      # definition text = $3
1351                         (?= \n+                                                         # stop at next definition mark,
1352                                 (?:                                                             # next term or end of text
1353                                         [ ]{0,' . $less_than_tab . '} \:[ ]     |
1354                                         <dt> | \z
1355                                 )
1356                         )
1357                         }xm',
1358                         array($this, '_processDefListItems_callback_dd'), $list_str);
1359
1360                 return $list_str;
1361         }
1362
1363         /**
1364          * Callback for <dt> elements in definition lists
1365          * @param  array $matches
1366          * @return string
1367          */
1368         protected function _processDefListItems_callback_dt($matches) {
1369                 $terms = explode("\n", trim($matches[1]));
1370                 $text = '';
1371                 foreach ($terms as $term) {
1372                         $term = $this->runSpanGamut(trim($term));
1373                         $text .= "\n<dt>" . $term . "</dt>";
1374                 }
1375                 return $text . "\n";
1376         }
1377
1378         /**
1379          * Callback for <dd> elements in definition lists
1380          * @param  array $matches
1381          * @return string
1382          */
1383         protected function _processDefListItems_callback_dd($matches) {
1384                 $leading_line   = $matches[1];
1385                 $marker_space   = $matches[2];
1386                 $def                    = $matches[3];
1387
1388                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1389                         // Replace marker with the appropriate whitespace indentation
1390                         $def = str_repeat(' ', strlen($marker_space)) . $def;
1391                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1392                         $def = "\n". $def ."\n";
1393                 }
1394                 else {
1395                         $def = rtrim($def);
1396                         $def = $this->runSpanGamut($this->outdent($def));
1397                 }
1398
1399                 return "\n<dd>" . $def . "</dd>\n";
1400         }
1401
1402         /**
1403          * Adding the fenced code block syntax to regular Markdown:
1404          *
1405          * ~~~
1406          * Code block
1407          * ~~~
1408          *
1409          * @param  string $text
1410          * @return string
1411          */
1412         protected function doFencedCodeBlocks($text) {
1413
1414                 $less_than_tab = $this->tab_width;
1415
1416                 $text = preg_replace_callback('{
1417                                 (?:\n|\A)
1418                                 # 1: Opening marker
1419                                 (
1420                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1421                                 )
1422                                 [ ]*
1423                                 (?:
1424                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1425                                 )?
1426                                 [ ]*
1427                                 (?:
1428                                         ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes
1429                                 )?
1430                                 [ ]* \n # Whitespace and newline following marker.
1431
1432                                 # 4: Content
1433                                 (
1434                                         (?>
1435                                                 (?!\1 [ ]* \n)  # Not a closing marker.
1436                                                 .*\n+
1437                                         )+
1438                                 )
1439
1440                                 # Closing marker.
1441                                 \1 [ ]* (?= \n )
1442                         }xm',
1443                         array($this, '_doFencedCodeBlocks_callback'), $text);
1444
1445                 return $text;
1446         }
1447
1448         /**
1449          * Callback to process fenced code blocks
1450          * @param  array $matches
1451          * @return string
1452          */
1453         protected function _doFencedCodeBlocks_callback($matches) {
1454                 $classname =& $matches[2];
1455                 $attrs     =& $matches[3];
1456                 $codeblock = $matches[4];
1457
1458                 if ($this->code_block_content_func) {
1459                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1460                 } else {
1461                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1462                 }
1463
1464                 $codeblock = preg_replace_callback('/^\n+/',
1465                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1466
1467                 $classes = array();
1468                 if ($classname != "") {
1469                         if ($classname[0] == '.')
1470                                 $classname = substr($classname, 1);
1471                         $classes[] = $this->code_class_prefix . $classname;
1472                 }
1473                 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1474                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1475                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1476                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1477
1478                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1479         }
1480
1481         /**
1482          * Replace new lines in fenced code blocks
1483          * @param  array $matches
1484          * @return string
1485          */
1486         protected function _doFencedCodeBlocks_newlines($matches) {
1487                 return str_repeat("<br$this->empty_element_suffix",
1488                         strlen($matches[0]));
1489         }
1490
1491         /**
1492          * Redefining emphasis markers so that emphasis by underscore does not
1493          * work in the middle of a word.
1494          * @var array
1495          */
1496         protected $em_relist = array(
1497                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1498                 '*' => '(?<![\s*])\*(?!\*)',
1499                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1500         );
1501         protected $strong_relist = array(
1502                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1503                 '**' => '(?<![\s*])\*\*(?!\*)',
1504                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1505         );
1506         protected $em_strong_relist = array(
1507                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1508                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1509                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1510         );
1511
1512         /**
1513          * Parse text into paragraphs
1514          * @param  string $text String to process in paragraphs
1515          * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1516          * @return string       HTML output
1517          */
1518         protected function formParagraphs($text, $wrap_in_p = true) {
1519                 // Strip leading and trailing lines:
1520                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1521
1522                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1523
1524                 // Wrap <p> tags and unhashify HTML blocks
1525                 foreach ($grafs as $key => $value) {
1526                         $value = trim($this->runSpanGamut($value));
1527
1528                         // Check if this should be enclosed in a paragraph.
1529                         // Clean tag hashes & block tag hashes are left alone.
1530                         $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1531
1532                         if ($is_p) {
1533                                 $value = "<p>$value</p>";
1534                         }
1535                         $grafs[$key] = $value;
1536                 }
1537
1538                 // Join grafs in one text, then unhash HTML tags.
1539                 $text = implode("\n\n", $grafs);
1540
1541                 // Finish by removing any tag hashes still present in $text.
1542                 $text = $this->unhash($text);
1543
1544                 return $text;
1545         }
1546
1547
1548         /**
1549          * Footnotes - Strips link definitions from text, stores the URLs and
1550          * titles in hash references.
1551          * @param  string $text
1552          * @return string
1553          */
1554         protected function stripFootnotes($text) {
1555                 $less_than_tab = $this->tab_width - 1;
1556
1557                 // Link defs are in the form: [^id]: url "optional title"
1558                 $text = preg_replace_callback('{
1559                         ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?:  # note_id = $1
1560                           [ ]*
1561                           \n?                                   # maybe *one* newline
1562                         (                                               # text = $2 (no blank lines allowed)
1563                                 (?:
1564                                         .+                              # actual text
1565                                 |
1566                                         \n                              # newlines but
1567                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1568                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1569                                                                         # by non-indented content
1570                                 )*
1571                         )
1572                         }xm',
1573                         array($this, '_stripFootnotes_callback'),
1574                         $text);
1575                 return $text;
1576         }
1577
1578         /**
1579          * Callback for stripping footnotes
1580          * @param  array $matches
1581          * @return string
1582          */
1583         protected function _stripFootnotes_callback($matches) {
1584                 $note_id = $this->fn_id_prefix . $matches[1];
1585                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1586                 return ''; // String that will replace the block
1587         }
1588
1589         /**
1590          * Replace footnote references in $text [^id] with a special text-token
1591          * which will be replaced by the actual footnote marker in appendFootnotes.
1592          * @param  string $text
1593          * @return string
1594          */
1595         protected function doFootnotes($text) {
1596                 if (!$this->in_anchor) {
1597                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1598                 }
1599                 return $text;
1600         }
1601
1602         /**
1603          * Append footnote list to text
1604          * @param  string $text
1605          * @return string
1606          */
1607         protected function appendFootnotes($text) {
1608                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1609                         array($this, '_appendFootnotes_callback'), $text);
1610
1611                 if (!empty($this->footnotes_ordered)) {
1612                         $text .= "\n\n";
1613                         $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n";
1614                         $text .= "<hr" . $this->empty_element_suffix . "\n";
1615                         $text .= "<ol>\n\n";
1616
1617                         $attr = "";
1618                         if ($this->fn_backlink_class != "") {
1619                                 $class = $this->fn_backlink_class;
1620                                 $class = $this->encodeAttribute($class);
1621                                 $attr .= " class=\"$class\"";
1622                         }
1623                         if ($this->fn_backlink_title != "") {
1624                                 $title = $this->fn_backlink_title;
1625                                 $title = $this->encodeAttribute($title);
1626                                 $attr .= " title=\"$title\"";
1627                                 $attr .= " aria-label=\"$title\"";
1628                         }
1629                         $attr .= " role=\"doc-backlink\"";
1630                         $backlink_text = $this->fn_backlink_html;
1631                         $num = 0;
1632
1633                         while (!empty($this->footnotes_ordered)) {
1634                                 $footnote = reset($this->footnotes_ordered);
1635                                 $note_id = key($this->footnotes_ordered);
1636                                 unset($this->footnotes_ordered[$note_id]);
1637                                 $ref_count = $this->footnotes_ref_count[$note_id];
1638                                 unset($this->footnotes_ref_count[$note_id]);
1639                                 unset($this->footnotes[$note_id]);
1640
1641                                 $footnote .= "\n"; // Need to append newline before parsing.
1642                                 $footnote = $this->runBlockGamut("$footnote\n");
1643                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1644                                         array($this, '_appendFootnotes_callback'), $footnote);
1645
1646                                 $attr = str_replace("%%", ++$num, $attr);
1647                                 $note_id = $this->encodeAttribute($note_id);
1648
1649                                 // Prepare backlink, multiple backlinks if multiple references
1650                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1651                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1652                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1653                                 }
1654                                 // Add backlink to last paragraph; create new paragraph if needed.
1655                                 if (preg_match('{</p>$}', $footnote)) {
1656                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1657                                 } else {
1658                                         $footnote .= "\n\n<p>$backlink</p>";
1659                                 }
1660
1661                                 $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n";
1662                                 $text .= $footnote . "\n";
1663                                 $text .= "</li>\n\n";
1664                         }
1665
1666                         $text .= "</ol>\n";
1667                         $text .= "</div>";
1668                 }
1669                 return $text;
1670         }
1671
1672         /**
1673          * Callback for appending footnotes
1674          * @param  array $matches
1675          * @return string
1676          */
1677         protected function _appendFootnotes_callback($matches) {
1678                 $node_id = $this->fn_id_prefix . $matches[1];
1679
1680                 // Create footnote marker only if it has a corresponding footnote *and*
1681                 // the footnote hasn't been used by another marker.
1682                 if (isset($this->footnotes[$node_id])) {
1683                         $num =& $this->footnotes_numbers[$node_id];
1684                         if (!isset($num)) {
1685                                 // Transfer footnote content to the ordered list and give it its
1686                                 // number
1687                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1688                                 $this->footnotes_ref_count[$node_id] = 1;
1689                                 $num = $this->footnote_counter++;
1690                                 $ref_count_mark = '';
1691                         } else {
1692                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1693                         }
1694
1695                         $attr = "";
1696                         if ($this->fn_link_class != "") {
1697                                 $class = $this->fn_link_class;
1698                                 $class = $this->encodeAttribute($class);
1699                                 $attr .= " class=\"$class\"";
1700                         }
1701                         if ($this->fn_link_title != "") {
1702                                 $title = $this->fn_link_title;
1703                                 $title = $this->encodeAttribute($title);
1704                                 $attr .= " title=\"$title\"";
1705                         }
1706                         $attr .= " role=\"doc-noteref\"";
1707
1708                         $attr = str_replace("%%", $num, $attr);
1709                         $node_id = $this->encodeAttribute($node_id);
1710
1711                         return
1712                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1713                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1714                                 "</sup>";
1715                 }
1716
1717                 return "[^" . $matches[1] . "]";
1718         }
1719
1720
1721         /**
1722          * Abbreviations - strips abbreviations from text, stores titles in hash
1723          * references.
1724          * @param  string $text
1725          * @return string
1726          */
1727         protected function stripAbbreviations($text) {
1728                 $less_than_tab = $this->tab_width - 1;
1729
1730                 // Link defs are in the form: [id]*: url "optional title"
1731                 $text = preg_replace_callback('{
1732                         ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?:  # abbr_id = $1
1733                         (.*)                                    # text = $2 (no blank lines allowed)
1734                         }xm',
1735                         array($this, '_stripAbbreviations_callback'),
1736                         $text);
1737                 return $text;
1738         }
1739
1740         /**
1741          * Callback for stripping abbreviations
1742          * @param  array $matches
1743          * @return string
1744          */
1745         protected function _stripAbbreviations_callback($matches) {
1746                 $abbr_word = $matches[1];
1747                 $abbr_desc = $matches[2];
1748                 if ($this->abbr_word_re) {
1749                         $this->abbr_word_re .= '|';
1750                 }
1751                 $this->abbr_word_re .= preg_quote($abbr_word);
1752                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1753                 return ''; // String that will replace the block
1754         }
1755
1756         /**
1757          * Find defined abbreviations in text and wrap them in <abbr> elements.
1758          * @param  string $text
1759          * @return string
1760          */
1761         protected function doAbbreviations($text) {
1762                 if ($this->abbr_word_re) {
1763                         // cannot use the /x modifier because abbr_word_re may
1764                         // contain significant spaces:
1765                         $text = preg_replace_callback('{' .
1766                                 '(?<![\w\x1A])' .
1767                                 '(?:' . $this->abbr_word_re . ')' .
1768                                 '(?![\w\x1A])' .
1769                                 '}',
1770                                 array($this, '_doAbbreviations_callback'), $text);
1771                 }
1772                 return $text;
1773         }
1774
1775         /**
1776          * Callback for processing abbreviations
1777          * @param  array $matches
1778          * @return string
1779          */
1780         protected function _doAbbreviations_callback($matches) {
1781                 $abbr = $matches[0];
1782                 if (isset($this->abbr_desciptions[$abbr])) {
1783                         $desc = $this->abbr_desciptions[$abbr];
1784                         if (empty($desc)) {
1785                                 return $this->hashPart("<abbr>$abbr</abbr>");
1786                         } else {
1787                                 $desc = $this->encodeAttribute($desc);
1788                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1789                         }
1790                 } else {
1791                         return $matches[0];
1792                 }
1793         }
1794 }