lib/markdown/MarkdownExtra.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown Extra
   6 # Copyright (c) 2004-2015 Michel Fortin
   7 # <https://michelf.ca/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13 namespace Michelf;
  14
  15
  16 #
  17 # Markdown Extra Parser Class
  18 #
  19
  20 class MarkdownExtra extends \Michelf\Markdown {
  21
  22         ### Configuration Variables ###
  23
  24         # Prefix for footnote ids.
  25         public $fn_id_prefix = "";
  26
  27         # Optional title attribute for footnote links and backlinks.
  28         public $fn_link_title = "";
  29         public $fn_backlink_title = "";
  30
  31         # Optional class attribute for footnote links and backlinks.
  32         public $fn_link_class = "footnote-ref";
  33         public $fn_backlink_class = "footnote-backref";
  34
  35         # Class name for table cell alignment (%% replaced left/center/right)
  36         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  37         # If empty, the align attribute is used instead of a class name.
  38         public $table_align_class_tmpl = '';
  39
  40         # Optional class prefix for fenced code block.
  41         public $code_class_prefix = "";
  42         # Class attribute for code blocks goes on the `code` tag;
  43         # setting this to true will put attributes on the `pre` tag instead.
  44         public $code_attr_on_pre = false;
  45
  46         # Predefined abbreviations.
  47         public $predef_abbr = array();
  48
  49         ### Parser Implementation ###
  50
  51         public function __construct() {
  52         #
  53         # Constructor function. Initialize the parser object.
  54         #
  55                 # Add extra escapable characters before parent constructor
  56                 # initialize the table.
  57                 $this->escape_chars .= ':|';
  58
  59                 # Insert extra document, block, and span transformations.
  60                 # Parent constructor will do the sorting.
  61                 $this->document_gamut += array(
  62                         "doFencedCodeBlocks" => 5,
  63                         "stripFootnotes"     => 15,
  64                         "stripAbbreviations" => 25,
  65                         "appendFootnotes"    => 50,
  66                         );
  67                 $this->block_gamut += array(
  68                         "doFencedCodeBlocks" => 5,
  69                         "doTables"           => 15,
  70                         "doDefLists"         => 45,
  71                         );
  72                 $this->span_gamut += array(
  73                         "doFootnotes"        => 5,
  74                         "doAbbreviations"    => 70,
  75                         );
  76
  77                 $this->enhanced_ordered_list = true;
  78                 parent::__construct();
  79         }
  80
  81
  82         # Extra variables used during extra transformations.
  83         protected $footnotes = array();
  84         protected $footnotes_ordered = array();
  85         protected $footnotes_ref_count = array();
  86         protected $footnotes_numbers = array();
  87         protected $abbr_desciptions = array();
  88         protected $abbr_word_re = '';
  89
  90         # Give the current footnote number.
  91         protected $footnote_counter = 1;
  92
  93
  94         protected function setup() {
  95         #
  96         # Setting up Extra-specific variables.
  97         #
  98                 parent::setup();
  99
 100                 $this->footnotes = array();
 101                 $this->footnotes_ordered = array();
 102                 $this->footnotes_ref_count = array();
 103                 $this->footnotes_numbers = array();
 104                 $this->abbr_desciptions = array();
 105                 $this->abbr_word_re = '';
 106                 $this->footnote_counter = 1;
 107
 108                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
 109                         if ($this->abbr_word_re)
 110                                 $this->abbr_word_re .= '|';
 111                         $this->abbr_word_re .= preg_quote($abbr_word);
 112                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
 113                 }
 114         }
 115
 116         protected function teardown() {
 117         #
 118         # Clearing Extra-specific variables.
 119         #
 120                 $this->footnotes = array();
 121                 $this->footnotes_ordered = array();
 122                 $this->footnotes_ref_count = array();
 123                 $this->footnotes_numbers = array();
 124                 $this->abbr_desciptions = array();
 125                 $this->abbr_word_re = '';
 126
 127                 parent::teardown();
 128         }
 129
 130
 131         ### Extra Attribute Parser ###
 132
 133         # Expression to use to catch attributes (includes the braces)
 134         protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
 135         # Expression to use when parsing in a context when no capture is desired
 136         protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
 137
 138         protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null) {
 139         #
 140         # Parse attributes caught by the $this->id_class_attr_catch_re expression
 141         # and return the HTML-formatted list of attributes.
 142         #
 143         # Currently supported attributes are .class and #id.
 144         #
 145         # In addition, this method also supports supplying a default Id value,
 146         # which will be used to populate the id attribute in case it was not
 147         # overridden.
 148                 if (empty($attr) && !$defaultIdValue) return "";
 149
 150                 # Split on components
 151                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
 152                 $elements = $matches[0];
 153
 154                 # handle classes and ids (only first id taken into account)
 155                 $classes = array();
 156                 $attributes = array();
 157                 $id = false;
 158                 foreach ($elements as $element) {
 159                         if ($element{0} == '.') {
 160                                 $classes[] = substr($element, 1);
 161                         } else if ($element{0} == '#') {
 162                                 if ($id === false) $id = substr($element, 1);
 163                         } else if (strpos($element, '=') > 0) {
 164                                 $parts = explode('=', $element, 2);
 165                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
 166                         }
 167                 }
 168
 169                 if (!$id) $id = $defaultIdValue;
 170
 171                 # compose attributes as string
 172                 $attr_str = "";
 173                 if (!empty($id)) {
 174                         $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
 175                 }
 176                 if (!empty($classes)) {
 177                         $attr_str .= ' class="'. implode(" ", $classes) . '"';
 178                 }
 179                 if (!$this->no_markup && !empty($attributes)) {
 180                         $attr_str .= ' '.implode(" ", $attributes);
 181                 }
 182                 return $attr_str;
 183         }
 184
 185
 186         protected function stripLinkDefinitions($text) {
 187         #
 188         # Strips link definitions from text, stores the URLs and titles in
 189         # hash references.
 190         #
 191                 $less_than_tab = $this->tab_width - 1;
 192
 193                 # Link defs are in the form: ^[id]: url "optional title"
 194                 $text = preg_replace_callback('{
 195                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 196                                                           [ ]*
 197                                                           \n?                           # maybe *one* newline
 198                                                           [ ]*
 199                                                         (?:
 200                                                           <(.+?)>                       # url = $2
 201                                                         |
 202                                                           (\S+?)                        # url = $3
 203                                                         )
 204                                                           [ ]*
 205                                                           \n?                           # maybe one newline
 206                                                           [ ]*
 207                                                         (?:
 208                                                                 (?<=\s)                 # lookbehind for whitespace
 209                                                                 ["(]
 210                                                                 (.*?)                   # title = $4
 211                                                                 [")]
 212                                                                 [ ]*
 213                                                         )?      # title is optional
 214                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
 215                                                         (?:\n+|\Z)
 216                         }xm',
 217                         array($this, '_stripLinkDefinitions_callback'),
 218                         $text);
 219                 return $text;
 220         }
 221         protected function _stripLinkDefinitions_callback($matches) {
 222                 $link_id = strtolower($matches[1]);
 223                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 224                 $this->urls[$link_id] = $url;
 225                 $this->titles[$link_id] =& $matches[4];
 226                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
 227                 return ''; # String that will replace the block
 228         }
 229
 230
 231         ### HTML Block Parser ###
 232
 233         # Tags that are always treated as block tags:
 234         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
 235
 236         # Tags treated as block tags only if the opening tag is alone on its line:
 237         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
 238
 239         # Tags where markdown="1" default to span mode:
 240         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
 241
 242         # Tags which must not have their contents modified, no matter where
 243         # they appear:
 244         protected $clean_tags_re = 'script|style|math|svg';
 245
 246         # Tags that do not need to be closed.
 247         protected $auto_close_tags_re = 'hr|img|param|source|track';
 248
 249
 250         protected function hashHTMLBlocks($text) {
 251         #
 252         # Hashify HTML Blocks and "clean tags".
 253         #
 254         # We only want to do this for block-level HTML tags, such as headers,
 255         # lists, and tables. That's because we still want to wrap <p>s around
 256         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 257         # phrase emphasis, and spans. The list of tags we're looking for is
 258         # hard-coded.
 259         #
 260         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
 261         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
 262         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
 263         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
 264         # These two functions are calling each other. It's recursive!
 265         #
 266                 if ($this->no_markup)  return $text;
 267
 268                 #
 269                 # Call the HTML-in-Markdown hasher.
 270                 #
 271                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
 272
 273                 return $text;
 274         }
 275         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
 276                                                                                 $enclosing_tag_re = '', $span = false)
 277         {
 278         #
 279         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
 280         #
 281         # *   $indent is the number of space to be ignored when checking for code
 282         #     blocks. This is important because if we don't take the indent into
 283         #     account, something like this (which looks right) won't work as expected:
 284         #
 285         #     <div>
 286         #         <div markdown="1">
 287         #         Hello World.  <-- Is this a Markdown code block or text?
 288         #         </div>  <-- Is this a Markdown code block or a real tag?
 289         #     <div>
 290         #
 291         #     If you don't like this, just don't indent the tag on which
 292         #     you apply the markdown="1" attribute.
 293         #
 294         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
 295         #     tag with that name. Nested tags supported.
 296         #
 297         # *   If $span is true, text inside must treated as span. So any double
 298         #     newline will be replaced by a single newline so that it does not create
 299         #     paragraphs.
 300         #
 301         # Returns an array of that form: ( processed text , remaining text )
 302         #
 303                 if ($text === '') return array('', '');
 304
 305                 # Regex to check for the presense of newlines around a block tag.
 306                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
 307                 $newline_after_re =
 308                         '{
 309                                 ^                                               # Start of text following the tag.
 310                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
 311                                 [ ]*\n                                  # Must be followed by newline.
 312                         }xs';
 313
 314                 # Regex to match any tag.
 315                 $block_tag_re =
 316                         '{
 317                                 (                                       # $2: Capture whole tag.
 318                                         </?                                     # Any opening or closing tag.
 319                                                 (?>                             # Tag name.
 320                                                         '.$this->block_tags_re.'                        |
 321                                                         '.$this->context_block_tags_re.'        |
 322                                                         '.$this->clean_tags_re.'                |
 323                                                         (?!\s)'.$enclosing_tag_re.'
 324                                                 )
 325                                                 (?:
 326                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 327                                                         (?>
 328                                                                 ".*?"           |       # Double quotes (can contain `>`)
 329                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
 330                                                                 .+?                             # Anything but quotes and `>`.
 331                                                         )*?
 332                                                 )?
 333                                         >                                       # End of tag.
 334                                 |
 335                                         <!--    .*?     -->     # HTML Comment
 336                                 |
 337                                         <\?.*?\?> | <%.*?%>     # Processing instruction
 338                                 |
 339                                         <!\[CDATA\[.*?\]\]>     # CData Block
 340                                 '. ( !$span ? ' # If not in span.
 341                                 |
 342                                         # Indented code block
 343                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
 344                                         [ ]{'.($indent+4).'}[^\n]* \n
 345                                         (?>
 346                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
 347                                         )*
 348                                 |
 349                                         # Fenced code block marker
 350                                         (?<= ^ | \n )
 351                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
 352                                                                         [ ]*
 353                                         (?:
 354                                         \.?[-_:a-zA-Z0-9]+ # standalone class name
 355                                         |
 356                                                 '.$this->id_class_attr_nocatch_re.' # extra attributes
 357                                         )?
 358                                         [ ]*
 359                                         (?= \n )
 360                                 ' : '' ). ' # End (if not is span).
 361                                 |
 362                                         # Code span marker
 363                                         # Note, this regex needs to go after backtick fenced
 364                                         # code blocks but it should also be kept outside of the
 365                                         # "if not in span" condition adding backticks to the parser
 366                                         `+
 367                                 )
 368                         }xs';
 369
 370
 371                 $depth = 0;             # Current depth inside the tag tree.
 372                 $parsed = "";   # Parsed text that will be returned.
 373
 374                 #
 375                 # Loop through every tag until we find the closing tag of the parent
 376                 # or loop until reaching the end of text if no parent tag specified.
 377                 #
 378                 do {
 379                         #
 380                         # Split the text using the first $tag_match pattern found.
 381                         # Text before  pattern will be first in the array, text after
 382                         # pattern will be at the end, and between will be any catches made
 383                         # by the pattern.
 384                         #
 385                         $parts = preg_split($block_tag_re, $text, 2,
 386                                                                 PREG_SPLIT_DELIM_CAPTURE);
 387
 388                         # If in Markdown span mode, add a empty-string span-level hash
 389                         # after each newline to prevent triggering any block element.
 390                         if ($span) {
 391                                 $void = $this->hashPart("", ':');
 392                                 $newline = "$void\n";
 393                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
 394                         }
 395
 396                         $parsed .= $parts[0]; # Text before current tag.
 397
 398                         # If end of $text has been reached. Stop loop.
 399                         if (count($parts) < 3) {
 400                                 $text = "";
 401                                 break;
 402                         }
 403
 404                         $tag  = $parts[1]; # Tag to handle.
 405                         $text = $parts[2]; # Remaining text after current tag.
 406                         $tag_re = preg_quote($tag); # For use in a regular expression.
 407
 408                         #
 409                         # Check for: Fenced code block marker.
 410                         # Note: need to recheck the whole tag to disambiguate backtick
 411                         # fences from code spans
 412                         #
 413                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
 414                                 # Fenced code block marker: find matching end marker.
 415                                 $fence_indent = strlen($capture[1]); # use captured indent in re
 416                                 $fence_re = $capture[2]; # use captured fence in re
 417                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
 418                                         $matches))
 419                                 {
 420                                         # End marker found: pass text unchanged until marker.
 421                                         $parsed .= $tag . $matches[0];
 422                                         $text = substr($text, strlen($matches[0]));
 423                                 }
 424                                 else {
 425                                         # No end marker: just skip it.
 426                                         $parsed .= $tag;
 427                                 }
 428                         }
 429                         #
 430                         # Check for: Indented code block.
 431                         #
 432                         else if ($tag{0} == "\n" || $tag{0} == " ") {
 433                                 # Indented code block: pass it unchanged, will be handled
 434                                 # later.
 435                                 $parsed .= $tag;
 436                         }
 437                         #
 438                         # Check for: Code span marker
 439                         # Note: need to check this after backtick fenced code blocks
 440                         #
 441                         else if ($tag{0} == "`") {
 442                                 # Find corresponding end marker.
 443                                 $tag_re = preg_quote($tag);
 444                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
 445                                         $text, $matches))
 446                                 {
 447                                         # End marker found: pass text unchanged until marker.
 448                                         $parsed .= $tag . $matches[0];
 449                                         $text = substr($text, strlen($matches[0]));
 450                                 }
 451                                 else {
 452                                         # Unmatched marker: just skip it.
 453                                         $parsed .= $tag;
 454                                 }
 455                         }
 456                         #
 457                         # Check for: Opening Block level tag or
 458                         #            Opening Context Block tag (like ins and del)
 459                         #               used as a block tag (tag is alone on it's line).
 460                         #
 461                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
 462                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
 463                                         preg_match($newline_before_re, $parsed) &&
 464                                         preg_match($newline_after_re, $text)    )
 465                                 )
 466                         {
 467                                 # Need to parse tag and following text using the HTML parser.
 468                                 list($block_text, $text) =
 469                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
 470
 471                                 # Make sure it stays outside of any paragraph by adding newlines.
 472                                 $parsed .= "\n\n$block_text\n\n";
 473                         }
 474                         #
 475                         # Check for: Clean tag (like script, math)
 476                         #            HTML Comments, processing instructions.
 477                         #
 478                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
 479                                 $tag{1} == '!' || $tag{1} == '?')
 480                         {
 481                                 # Need to parse tag and following text using the HTML parser.
 482                                 # (don't check for markdown attribute)
 483                                 list($block_text, $text) =
 484                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
 485
 486                                 $parsed .= $block_text;
 487                         }
 488                         #
 489                         # Check for: Tag with same name as enclosing tag.
 490                         #
 491                         else if ($enclosing_tag_re !== '' &&
 492                                 # Same name as enclosing tag.
 493                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
 494                         {
 495                                 #
 496                                 # Increase/decrease nested tag count.
 497                                 #
 498                                 if ($tag{1} == '/')                                             $depth--;
 499                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
 500
 501                                 if ($depth < 0) {
 502                                         #
 503                                         # Going out of parent element. Clean up and break so we
 504                                         # return to the calling function.
 505                                         #
 506                                         $text = $tag . $text;
 507                                         break;
 508                                 }
 509
 510                                 $parsed .= $tag;
 511                         }
 512                         else {
 513                                 $parsed .= $tag;
 514                         }
 515                 } while ($depth >= 0);
 516
 517                 return array($parsed, $text);
 518         }
 519         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
 520         #
 521         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
 522         #
 523         # *   Calls $hash_method to convert any blocks.
 524         # *   Stops when the first opening tag closes.
 525         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
 526         #     (it is not inside clean tags)
 527         #
 528         # Returns an array of that form: ( processed text , remaining text )
 529         #
 530                 if ($text === '') return array('', '');
 531
 532                 # Regex to match `markdown` attribute inside of a tag.
 533                 $markdown_attr_re = '
 534                         {
 535                                 \s*                     # Eat whitespace before the `markdown` attribute
 536                                 markdown
 537                                 \s*=\s*
 538                                 (?>
 539                                         (["\'])         # $1: quote delimiter
 540                                         (.*?)           # $2: attribute value
 541                                         \1                      # matching delimiter
 542                                 |
 543                                         ([^\s>]*)       # $3: unquoted attribute value
 544                                 )
 545                                 ()                              # $4: make $3 always defined (avoid warnings)
 546                         }xs';
 547
 548                 # Regex to match any tag.
 549                 $tag_re = '{
 550                                 (                                       # $2: Capture whole tag.
 551                                         </?                                     # Any opening or closing tag.
 552                                                 [\w:$]+                 # Tag name.
 553                                                 (?:
 554                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 555                                                         (?>
 556                                                                 ".*?"           |       # Double quotes (can contain `>`)
 557                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
 558                                                                 .+?                             # Anything but quotes and `>`.
 559                                                         )*?
 560                                                 )?
 561                                         >                                       # End of tag.
 562                                 |
 563                                         <!--    .*?     -->     # HTML Comment
 564                                 |
 565                                         <\?.*?\?> | <%.*?%>     # Processing instruction
 566                                 |
 567                                         <!\[CDATA\[.*?\]\]>     # CData Block
 568                                 )
 569                         }xs';
 570
 571                 $original_text = $text;         # Save original text in case of faliure.
 572
 573                 $depth          = 0;    # Current depth inside the tag tree.
 574                 $block_text     = "";   # Temporary text holder for current text.
 575                 $parsed         = "";   # Parsed text that will be returned.
 576
 577                 #
 578                 # Get the name of the starting tag.
 579                 # (This pattern makes $base_tag_name_re safe without quoting.)
 580                 #
 581                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
 582                         $base_tag_name_re = $matches[1];
 583
 584                 #
 585                 # Loop through every tag until we find the corresponding closing tag.
 586                 #
 587                 do {
 588                         #
 589                         # Split the text using the first $tag_match pattern found.
 590                         # Text before  pattern will be first in the array, text after
 591                         # pattern will be at the end, and between will be any catches made
 592                         # by the pattern.
 593                         #
 594                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
 595
 596                         if (count($parts) < 3) {
 597                                 #
 598                                 # End of $text reached with unbalenced tag(s).
 599                                 # In that case, we return original text unchanged and pass the
 600                                 # first character as filtered to prevent an infinite loop in the
 601                                 # parent function.
 602                                 #
 603                                 return array($original_text{0}, substr($original_text, 1));
 604                         }
 605
 606                         $block_text .= $parts[0]; # Text before current tag.
 607                         $tag         = $parts[1]; # Tag to handle.
 608                         $text        = $parts[2]; # Remaining text after current tag.
 609
 610                         #
 611                         # Check for: Auto-close tag (like <hr/>)
 612                         #                        Comments and Processing Instructions.
 613                         #
 614                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
 615                                 $tag{1} == '!' || $tag{1} == '?')
 616                         {
 617                                 # Just add the tag to the block as if it was text.
 618                                 $block_text .= $tag;
 619                         }
 620                         else {
 621                                 #
 622                                 # Increase/decrease nested tag count. Only do so if
 623                                 # the tag's name match base tag's.
 624                                 #
 625                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
 626                                         if ($tag{1} == '/')                                             $depth--;
 627                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
 628                                 }
 629
 630                                 #
 631                                 # Check for `markdown="1"` attribute and handle it.
 632                                 #
 633                                 if ($md_attr &&
 634                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
 635                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
 636                                 {
 637                                         # Remove `markdown` attribute from opening tag.
 638                                         $tag = preg_replace($markdown_attr_re, '', $tag);
 639
 640                                         # Check if text inside this tag must be parsed in span mode.
 641                                         $this->mode = $attr_m[2] . $attr_m[3];
 642                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
 643                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
 644
 645                                         # Calculate indent before tag.
 646                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
 647                                                 $strlen = $this->utf8_strlen;
 648                                                 $indent = $strlen($matches[1], 'UTF-8');
 649                                         } else {
 650                                                 $indent = 0;
 651                                         }
 652
 653                                         # End preceding block with this tag.
 654                                         $block_text .= $tag;
 655                                         $parsed .= $this->$hash_method($block_text);
 656
 657                                         # Get enclosing tag name for the ParseMarkdown function.
 658                                         # (This pattern makes $tag_name_re safe without quoting.)
 659                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
 660                                         $tag_name_re = $matches[1];
 661
 662                                         # Parse the content using the HTML-in-Markdown parser.
 663                                         list ($block_text, $text)
 664                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
 665                                                         $tag_name_re, $span_mode);
 666
 667                                         # Outdent markdown text.
 668                                         if ($indent > 0) {
 669                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
 670                                                                                                         $block_text);
 671                                         }
 672
 673                                         # Append tag content to parsed text.
 674                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
 675                                         else                            $parsed .= "$block_text";
 676
 677                                         # Start over with a new block.
 678                                         $block_text = "";
 679                                 }
 680                                 else $block_text .= $tag;
 681                         }
 682
 683                 } while ($depth > 0);
 684
 685                 #
 686                 # Hash last block text that wasn't processed inside the loop.
 687                 #
 688                 $parsed .= $this->$hash_method($block_text);
 689
 690                 return array($parsed, $text);
 691         }
 692
 693
 694         protected function hashClean($text) {
 695         #
 696         # Called whenever a tag must be hashed when a function inserts a "clean" tag
 697         # in $text, it passes through this function and is automaticaly escaped,
 698         # blocking invalid nested overlap.
 699         #
 700                 return $this->hashPart($text, 'C');
 701         }
 702
 703
 704         protected function doAnchors($text) {
 705         #
 706         # Turn Markdown link shortcuts into XHTML <a> tags.
 707         #
 708                 if ($this->in_anchor) return $text;
 709                 $this->in_anchor = true;
 710
 711                 #
 712                 # First, handle reference-style links: [link text] [id]
 713                 #
 714                 $text = preg_replace_callback('{
 715                         (                                       # wrap whole match in $1
 716                           \[
 717                                 ('.$this->nested_brackets_re.') # link text = $2
 718                           \]
 719
 720                           [ ]?                          # one optional space
 721                           (?:\n[ ]*)?           # one optional newline followed by spaces
 722
 723                           \[
 724                                 (.*?)           # id = $3
 725                           \]
 726                         )
 727                         }xs',
 728                         array($this, '_doAnchors_reference_callback'), $text);
 729
 730                 #
 731                 # Next, inline-style links: [link text](url "optional title")
 732                 #
 733                 $text = preg_replace_callback('{
 734                         (                               # wrap whole match in $1
 735                           \[
 736                                 ('.$this->nested_brackets_re.') # link text = $2
 737                           \]
 738                           \(                    # literal paren
 739                                 [ \n]*
 740                                 (?:
 741                                         <(.+?)> # href = $3
 742                                 |
 743                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 744                                 )
 745                                 [ \n]*
 746                                 (                       # $5
 747                                   ([\'"])       # quote char = $6
 748                                   (.*?)         # Title = $7
 749                                   \6            # matching quote
 750                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 751                                 )?                      # title is optional
 752                           \)
 753                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
 754                         )
 755                         }xs',
 756                         array($this, '_doAnchors_inline_callback'), $text);
 757
 758                 #
 759                 # Last, handle reference-style shortcuts: [link text]
 760                 # These must come last in case you've also got [link text][1]
 761                 # or [link text](/foo)
 762                 #
 763                 $text = preg_replace_callback('{
 764                         (                                       # wrap whole match in $1
 765                           \[
 766                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 767                           \]
 768                         )
 769                         }xs',
 770                         array($this, '_doAnchors_reference_callback'), $text);
 771
 772                 $this->in_anchor = false;
 773                 return $text;
 774         }
 775         protected function _doAnchors_reference_callback($matches) {
 776                 $whole_match =  $matches[1];
 777                 $link_text   =  $matches[2];
 778                 $link_id     =& $matches[3];
 779
 780                 if ($link_id == "") {
 781                         # for shortcut links like [this][] or [this].
 782                         $link_id = $link_text;
 783                 }
 784
 785                 # lower-case and turn embedded newlines into spaces
 786                 $link_id = strtolower($link_id);
 787                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 788
 789                 if (isset($this->urls[$link_id])) {
 790                         $url = $this->urls[$link_id];
 791                         $url = $this->encodeURLAttribute($url);
 792
 793                         $result = "<a href=\"$url\"";
 794                         if ( isset( $this->titles[$link_id] ) ) {
 795                                 $title = $this->titles[$link_id];
 796                                 $title = $this->encodeAttribute($title);
 797                                 $result .=  " title=\"$title\"";
 798                         }
 799                         if (isset($this->ref_attr[$link_id]))
 800                                 $result .= $this->ref_attr[$link_id];
 801
 802                         $link_text = $this->runSpanGamut($link_text);
 803                         $result .= ">$link_text</a>";
 804                         $result = $this->hashPart($result);
 805                 }
 806                 else {
 807                         $result = $whole_match;
 808                 }
 809                 return $result;
 810         }
 811         protected function _doAnchors_inline_callback($matches) {
 812                 $whole_match    =  $matches[1];
 813                 $link_text              =  $this->runSpanGamut($matches[2]);
 814                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 815                 $title                  =& $matches[7];
 816                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
 817
 818                 // if the URL was of the form <s p a c e s> it got caught by the HTML
 819                 // tag parser and hashed. Need to reverse the process before using the URL.
 820                 $unhashed = $this->unhash($url);
 821                 if ($unhashed != $url)
 822                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 823
 824                 $url = $this->encodeURLAttribute($url);
 825
 826                 $result = "<a href=\"$url\"";
 827                 if (isset($title)) {
 828                         $title = $this->encodeAttribute($title);
 829                         $result .=  " title=\"$title\"";
 830                 }
 831                 $result .= $attr;
 832
 833                 $link_text = $this->runSpanGamut($link_text);
 834                 $result .= ">$link_text</a>";
 835
 836                 return $this->hashPart($result);
 837         }
 838
 839
 840         protected function doImages($text) {
 841         #
 842         # Turn Markdown image shortcuts into <img> tags.
 843         #
 844                 #
 845                 # First, handle reference-style labeled images: ![alt text][id]
 846                 #
 847                 $text = preg_replace_callback('{
 848                         (                               # wrap whole match in $1
 849                           !\[
 850                                 ('.$this->nested_brackets_re.')         # alt text = $2
 851                           \]
 852
 853                           [ ]?                          # one optional space
 854                           (?:\n[ ]*)?           # one optional newline followed by spaces
 855
 856                           \[
 857                                 (.*?)           # id = $3
 858                           \]
 859
 860                         )
 861                         }xs',
 862                         array($this, '_doImages_reference_callback'), $text);
 863
 864                 #
 865                 # Next, handle inline images:  ![alt text](url "optional title")
 866                 # Don't forget: encode * and _
 867                 #
 868                 $text = preg_replace_callback('{
 869                         (                               # wrap whole match in $1
 870                           !\[
 871                                 ('.$this->nested_brackets_re.')         # alt text = $2
 872                           \]
 873                           \s?                   # One optional whitespace character
 874                           \(                    # literal paren
 875                                 [ \n]*
 876                                 (?:
 877                                         <(\S*)> # src url = $3
 878                                 |
 879                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 880                                 )
 881                                 [ \n]*
 882                                 (                       # $5
 883                                   ([\'"])       # quote char = $6
 884                                   (.*?)         # title = $7
 885                                   \6            # matching quote
 886                                   [ \n]*
 887                                 )?                      # title is optional
 888                           \)
 889                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
 890                         )
 891                         }xs',
 892                         array($this, '_doImages_inline_callback'), $text);
 893
 894                 return $text;
 895         }
 896         protected function _doImages_reference_callback($matches) {
 897                 $whole_match = $matches[1];
 898                 $alt_text    = $matches[2];
 899                 $link_id     = strtolower($matches[3]);
 900
 901                 if ($link_id == "") {
 902                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 903                 }
 904
 905                 $alt_text = $this->encodeAttribute($alt_text);
 906                 if (isset($this->urls[$link_id])) {
 907                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
 908                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 909                         if (isset($this->titles[$link_id])) {
 910                                 $title = $this->titles[$link_id];
 911                                 $title = $this->encodeAttribute($title);
 912                                 $result .=  " title=\"$title\"";
 913                         }
 914                         if (isset($this->ref_attr[$link_id]))
 915                                 $result .= $this->ref_attr[$link_id];
 916                         $result .= $this->empty_element_suffix;
 917                         $result = $this->hashPart($result);
 918                 }
 919                 else {
 920                         # If there's no such link ID, leave intact:
 921                         $result = $whole_match;
 922                 }
 923
 924                 return $result;
 925         }
 926         protected function _doImages_inline_callback($matches) {
 927                 $whole_match    = $matches[1];
 928                 $alt_text               = $matches[2];
 929                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 930                 $title                  =& $matches[7];
 931                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
 932
 933                 $alt_text = $this->encodeAttribute($alt_text);
 934                 $url = $this->encodeURLAttribute($url);
 935                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 936                 if (isset($title)) {
 937                         $title = $this->encodeAttribute($title);
 938                         $result .=  " title=\"$title\""; # $title already quoted
 939                 }
 940                 $result .= $attr;
 941                 $result .= $this->empty_element_suffix;
 942
 943                 return $this->hashPart($result);
 944         }
 945
 946
 947         protected function doHeaders($text) {
 948         #
 949         # Redefined to add id and class attribute support.
 950         #
 951                 # Setext-style headers:
 952                 #         Header 1  {#header1}
 953                 #         ========
 954                 #
 955                 #         Header 2  {#header2 .class1 .class2}
 956                 #         --------
 957                 #
 958                 $text = preg_replace_callback(
 959                         '{
 960                                 (^.+?)                                                          # $1: Header text
 961                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
 962                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
 963                         }mx',
 964                         array($this, '_doHeaders_callback_setext'), $text);
 965
 966                 # atx-style headers:
 967                 #       # Header 1        {#header1}
 968                 #       ## Header 2       {#header2}
 969                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
 970                 #       ...
 971                 #       ###### Header 6   {.class2}
 972                 #
 973                 $text = preg_replace_callback('{
 974                                 ^(\#{1,6})      # $1 = string of #\'s
 975                                 [ ]*
 976                                 (.+?)           # $2 = Header text
 977                                 [ ]*
 978                                 \#*                     # optional closing #\'s (not counted)
 979                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
 980                                 [ ]*
 981                                 \n+
 982                         }xm',
 983                         array($this, '_doHeaders_callback_atx'), $text);
 984
 985                 return $text;
 986         }
 987         protected function _doHeaders_callback_setext($matches) {
 988                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
 989                         return $matches[0];
 990
 991                 $level = $matches[3]{0} == '=' ? 1 : 2;
 992
 993                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
 994
 995                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
 996                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
 997                 return "\n" . $this->hashBlock($block) . "\n\n";
 998         }
 999         protected function _doHeaders_callback_atx($matches) {
1000                 $level = strlen($matches[1]);
1001
1002                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1003                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1004                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1005                 return "\n" . $this->hashBlock($block) . "\n\n";
1006         }
1007
1008
1009         protected function doTables($text) {
1010         #
1011         # Form HTML tables.
1012         #
1013                 $less_than_tab = $this->tab_width - 1;
1014                 #
1015                 # Find tables with leading pipe.
1016                 #
1017                 #       | Header 1 | Header 2
1018                 #       | -------- | --------
1019                 #       | Cell 1   | Cell 2
1020                 #       | Cell 3   | Cell 4
1021                 #
1022                 $text = preg_replace_callback('
1023                         {
1024                                 ^                                                       # Start of a line
1025                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1026                                 [|]                                                     # Optional leading pipe (present)
1027                                 (.+) \n                                         # $1: Header row (at least one pipe)
1028
1029                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1030                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
1031
1032                                 (                                                       # $3: Cells
1033                                         (?>
1034                                                 [ ]*                            # Allowed whitespace.
1035                                                 [|] .* \n                       # Row content.
1036                                         )*
1037                                 )
1038                                 (?=\n|\Z)                                       # Stop at final double newline.
1039                         }xm',
1040                         array($this, '_doTable_leadingPipe_callback'), $text);
1041
1042                 #
1043                 # Find tables without leading pipe.
1044                 #
1045                 #       Header 1 | Header 2
1046                 #       -------- | --------
1047                 #       Cell 1   | Cell 2
1048                 #       Cell 3   | Cell 4
1049                 #
1050                 $text = preg_replace_callback('
1051                         {
1052                                 ^                                                       # Start of a line
1053                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1054                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
1055
1056                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1057                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
1058
1059                                 (                                                       # $3: Cells
1060                                         (?>
1061                                                 .* [|] .* \n            # Row content
1062                                         )*
1063                                 )
1064                                 (?=\n|\Z)                                       # Stop at final double newline.
1065                         }xm',
1066                         array($this, '_DoTable_callback'), $text);
1067
1068                 return $text;
1069         }
1070         protected function _doTable_leadingPipe_callback($matches) {
1071                 $head           = $matches[1];
1072                 $underline      = $matches[2];
1073                 $content        = $matches[3];
1074
1075                 # Remove leading pipe for each row.
1076                 $content        = preg_replace('/^ *[|]/m', '', $content);
1077
1078                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1079         }
1080         protected function _doTable_makeAlignAttr($alignname)
1081         {
1082                 if (empty($this->table_align_class_tmpl))
1083                         return " align=\"$alignname\"";
1084
1085                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1086                 return " class=\"$classname\"";
1087         }
1088         protected function _doTable_callback($matches) {
1089                 $head           = $matches[1];
1090                 $underline      = $matches[2];
1091                 $content        = $matches[3];
1092
1093                 # Remove any tailing pipes for each line.
1094                 $head           = preg_replace('/[|] *$/m', '', $head);
1095                 $underline      = preg_replace('/[|] *$/m', '', $underline);
1096                 $content        = preg_replace('/[|] *$/m', '', $content);
1097
1098                 # Reading alignement from header underline.
1099                 $separators     = preg_split('/ *[|] */', $underline);
1100                 foreach ($separators as $n => $s) {
1101                         if (preg_match('/^ *-+: *$/', $s))
1102                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1103                         else if (preg_match('/^ *:-+: *$/', $s))
1104                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1105                         else if (preg_match('/^ *:-+ *$/', $s))
1106                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1107                         else
1108                                 $attr[$n] = '';
1109                 }
1110
1111                 # Parsing span elements, including code spans, character escapes,
1112                 # and inline HTML tags, so that pipes inside those gets ignored.
1113                 $head           = $this->parseSpan($head);
1114                 $headers        = preg_split('/ *[|] */', $head);
1115                 $col_count      = count($headers);
1116                 $attr       = array_pad($attr, $col_count, '');
1117
1118                 # Write column headers.
1119                 $text = "<table>\n";
1120                 $text .= "<thead>\n";
1121                 $text .= "<tr>\n";
1122                 foreach ($headers as $n => $header)
1123                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1124                 $text .= "</tr>\n";
1125                 $text .= "</thead>\n";
1126
1127                 # Split content by row.
1128                 $rows = explode("\n", trim($content, "\n"));
1129
1130                 $text .= "<tbody>\n";
1131                 foreach ($rows as $row) {
1132                         # Parsing span elements, including code spans, character escapes,
1133                         # and inline HTML tags, so that pipes inside those gets ignored.
1134                         $row = $this->parseSpan($row);
1135
1136                         # Split row by cell.
1137                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
1138                         $row_cells = array_pad($row_cells, $col_count, '');
1139
1140                         $text .= "<tr>\n";
1141                         foreach ($row_cells as $n => $cell)
1142                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1143                         $text .= "</tr>\n";
1144                 }
1145                 $text .= "</tbody>\n";
1146                 $text .= "</table>";
1147
1148                 return $this->hashBlock($text) . "\n";
1149         }
1150
1151
1152         protected function doDefLists($text) {
1153         #
1154         # Form HTML definition lists.
1155         #
1156                 $less_than_tab = $this->tab_width - 1;
1157
1158                 # Re-usable pattern to match any entire dl list:
1159                 $whole_list_re = '(?>
1160                         (                                                               # $1 = whole list
1161                           (                                                             # $2
1162                                 [ ]{0,'.$less_than_tab.'}
1163                                 ((?>.*\S.*\n)+)                         # $3 = defined term
1164                                 \n?
1165                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1166                           )
1167                           (?s:.+?)
1168                           (                                                             # $4
1169                                   \z
1170                                 |
1171                                   \n{2,}
1172                                   (?=\S)
1173                                   (?!                                           # Negative lookahead for another term
1174                                         [ ]{0,'.$less_than_tab.'}
1175                                         (?: \S.*\n )+?                  # defined term
1176                                         \n?
1177                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1178                                   )
1179                                   (?!                                           # Negative lookahead for another definition
1180                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1181                                   )
1182                           )
1183                         )
1184                 )'; // mx
1185
1186                 $text = preg_replace_callback('{
1187                                 (?>\A\n?|(?<=\n\n))
1188                                 '.$whole_list_re.'
1189                         }mx',
1190                         array($this, '_doDefLists_callback'), $text);
1191
1192                 return $text;
1193         }
1194         protected function _doDefLists_callback($matches) {
1195                 # Re-usable patterns to match list item bullets and number markers:
1196                 $list = $matches[1];
1197
1198                 # Turn double returns into triple returns, so that we can make a
1199                 # paragraph for the last item in a list, if necessary:
1200                 $result = trim($this->processDefListItems($list));
1201                 $result = "<dl>\n" . $result . "\n</dl>";
1202                 return $this->hashBlock($result) . "\n\n";
1203         }
1204
1205
1206         protected function processDefListItems($list_str) {
1207         #
1208         #       Process the contents of a single definition list, splitting it
1209         #       into individual term and definition list items.
1210         #
1211                 $less_than_tab = $this->tab_width - 1;
1212
1213                 # trim trailing blank lines:
1214                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1215
1216                 # Process definition terms.
1217                 $list_str = preg_replace_callback('{
1218                         (?>\A\n?|\n\n+)                                 # leading line
1219                         (                                                               # definition terms = $1
1220                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
1221                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
1222                                                                                         #   mark (colon) or more whitespace.
1223                                 (?> \S.* \n)+?                          # actual term (not whitespace).
1224                         )
1225                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
1226                                                                                         #   with a definition mark.
1227                         }xm',
1228                         array($this, '_processDefListItems_callback_dt'), $list_str);
1229
1230                 # Process actual definitions.
1231                 $list_str = preg_replace_callback('{
1232                         \n(\n+)?                                                # leading line = $1
1233                         (                                                               # marker space = $2
1234                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
1235                                 \:[ ]+                                          # definition mark (colon)
1236                         )
1237                         ((?s:.+?))                                              # definition text = $3
1238                         (?= \n+                                                 # stop at next definition mark,
1239                                 (?:                                                     # next term or end of text
1240                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
1241                                         <dt> | \z
1242                                 )
1243                         )
1244                         }xm',
1245                         array($this, '_processDefListItems_callback_dd'), $list_str);
1246
1247                 return $list_str;
1248         }
1249         protected function _processDefListItems_callback_dt($matches) {
1250                 $terms = explode("\n", trim($matches[1]));
1251                 $text = '';
1252                 foreach ($terms as $term) {
1253                         $term = $this->runSpanGamut(trim($term));
1254                         $text .= "\n<dt>" . $term . "</dt>";
1255                 }
1256                 return $text . "\n";
1257         }
1258         protected function _processDefListItems_callback_dd($matches) {
1259                 $leading_line   = $matches[1];
1260                 $marker_space   = $matches[2];
1261                 $def                    = $matches[3];
1262
1263                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1264                         # Replace marker with the appropriate whitespace indentation
1265                         $def = str_repeat(' ', strlen($marker_space)) . $def;
1266                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1267                         $def = "\n". $def ."\n";
1268                 }
1269                 else {
1270                         $def = rtrim($def);
1271                         $def = $this->runSpanGamut($this->outdent($def));
1272                 }
1273
1274                 return "\n<dd>" . $def . "</dd>\n";
1275         }
1276
1277
1278         protected function doFencedCodeBlocks($text) {
1279         #
1280         # Adding the fenced code block syntax to regular Markdown:
1281         #
1282         # ~~~
1283         # Code block
1284         # ~~~
1285         #
1286                 $less_than_tab = $this->tab_width;
1287
1288                 $text = preg_replace_callback('{
1289                                 (?:\n|\A)
1290                                 # 1: Opening marker
1291                                 (
1292                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1293                                 )
1294                                 [ ]*
1295                                 (?:
1296                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1297                                 |
1298                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
1299                                 )?
1300                                 [ ]* \n # Whitespace and newline following marker.
1301
1302                                 # 4: Content
1303                                 (
1304                                         (?>
1305                                                 (?!\1 [ ]* \n)  # Not a closing marker.
1306                                                 .*\n+
1307                                         )+
1308                                 )
1309
1310                                 # Closing marker.
1311                                 \1 [ ]* (?= \n )
1312                         }xm',
1313                         array($this, '_doFencedCodeBlocks_callback'), $text);
1314
1315                 return $text;
1316         }
1317         protected function _doFencedCodeBlocks_callback($matches) {
1318                 $classname =& $matches[2];
1319                 $attrs     =& $matches[3];
1320                 $codeblock = $matches[4];
1321                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1322                 $codeblock = preg_replace_callback('/^\n+/',
1323                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1324
1325                 if ($classname != "") {
1326                         if ($classname{0} == '.')
1327                                 $classname = substr($classname, 1);
1328                         $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
1329                 } else {
1330                         $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
1331                 }
1332                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1333                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1334                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1335
1336                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1337         }
1338         protected function _doFencedCodeBlocks_newlines($matches) {
1339                 return str_repeat("<br$this->empty_element_suffix",
1340                         strlen($matches[0]));
1341         }
1342
1343
1344         #
1345         # Redefining emphasis markers so that emphasis by underscore does not
1346         # work in the middle of a word.
1347         #
1348         protected $em_relist = array(
1349                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1350                 '*' => '(?<![\s*])\*(?!\*)',
1351                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1352                 );
1353         protected $strong_relist = array(
1354                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1355                 '**' => '(?<![\s*])\*\*(?!\*)',
1356                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1357                 );
1358         protected $em_strong_relist = array(
1359                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1360                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1361                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1362                 );
1363
1364
1365         protected function formParagraphs($text) {
1366         #
1367         #       Params:
1368         #               $text - string to process with html <p> tags
1369         #
1370                 # Strip leading and trailing lines:
1371                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1372
1373                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1374
1375                 #
1376                 # Wrap <p> tags and unhashify HTML blocks
1377                 #
1378                 foreach ($grafs as $key => $value) {
1379                         $value = trim($this->runSpanGamut($value));
1380
1381                         # Check if this should be enclosed in a paragraph.
1382                         # Clean tag hashes & block tag hashes are left alone.
1383                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1384
1385                         if ($is_p) {
1386                                 $value = "<p>$value</p>";
1387                         }
1388                         $grafs[$key] = $value;
1389                 }
1390
1391                 # Join grafs in one text, then unhash HTML tags.
1392                 $text = implode("\n\n", $grafs);
1393
1394                 # Finish by removing any tag hashes still present in $text.
1395                 $text = $this->unhash($text);
1396
1397                 return $text;
1398         }
1399
1400
1401         ### Footnotes
1402
1403         protected function stripFootnotes($text) {
1404         #
1405         # Strips link definitions from text, stores the URLs and titles in
1406         # hash references.
1407         #
1408                 $less_than_tab = $this->tab_width - 1;
1409
1410                 # Link defs are in the form: [^id]: url "optional title"
1411                 $text = preg_replace_callback('{
1412                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
1413                           [ ]*
1414                           \n?                                   # maybe *one* newline
1415                         (                                               # text = $2 (no blank lines allowed)
1416                                 (?:
1417                                         .+                              # actual text
1418                                 |
1419                                         \n                              # newlines but
1420                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1421                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1422                                                                         # by non-indented content
1423                                 )*
1424                         )
1425                         }xm',
1426                         array($this, '_stripFootnotes_callback'),
1427                         $text);
1428                 return $text;
1429         }
1430         protected function _stripFootnotes_callback($matches) {
1431                 $note_id = $this->fn_id_prefix . $matches[1];
1432                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1433                 return ''; # String that will replace the block
1434         }
1435
1436
1437         protected function doFootnotes($text) {
1438         #
1439         # Replace footnote references in $text [^id] with a special text-token
1440         # which will be replaced by the actual footnote marker in appendFootnotes.
1441         #
1442                 if (!$this->in_anchor) {
1443                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1444                 }
1445                 return $text;
1446         }
1447
1448
1449         protected function appendFootnotes($text) {
1450         #
1451         # Append footnote list to text.
1452         #
1453                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1454                         array($this, '_appendFootnotes_callback'), $text);
1455
1456                 if (!empty($this->footnotes_ordered)) {
1457                         $text .= "\n\n";
1458                         $text .= "<div class=\"footnotes\">\n";
1459                         $text .= "<hr". $this->empty_element_suffix ."\n";
1460                         $text .= "<ol>\n\n";
1461
1462                         $attr = "";
1463                         if ($this->fn_backlink_class != "") {
1464                                 $class = $this->fn_backlink_class;
1465                                 $class = $this->encodeAttribute($class);
1466                                 $attr .= " class=\"$class\"";
1467                         }
1468                         if ($this->fn_backlink_title != "") {
1469                                 $title = $this->fn_backlink_title;
1470                                 $title = $this->encodeAttribute($title);
1471                                 $attr .= " title=\"$title\"";
1472                         }
1473                         $num = 0;
1474
1475                         while (!empty($this->footnotes_ordered)) {
1476                                 $footnote = reset($this->footnotes_ordered);
1477                                 $note_id = key($this->footnotes_ordered);
1478                                 unset($this->footnotes_ordered[$note_id]);
1479                                 $ref_count = $this->footnotes_ref_count[$note_id];
1480                                 unset($this->footnotes_ref_count[$note_id]);
1481                                 unset($this->footnotes[$note_id]);
1482
1483                                 $footnote .= "\n"; # Need to append newline before parsing.
1484                                 $footnote = $this->runBlockGamut("$footnote\n");
1485                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1486                                         array($this, '_appendFootnotes_callback'), $footnote);
1487
1488                                 $attr = str_replace("%%", ++$num, $attr);
1489                                 $note_id = $this->encodeAttribute($note_id);
1490
1491                                 # Prepare backlink, multiple backlinks if multiple references
1492                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
1493                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1494                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
1495                                 }
1496                                 # Add backlink to last paragraph; create new paragraph if needed.
1497                                 if (preg_match('{</p>$}', $footnote)) {
1498                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1499                                 } else {
1500                                         $footnote .= "\n\n<p>$backlink</p>";
1501                                 }
1502
1503                                 $text .= "<li id=\"fn:$note_id\">\n";
1504                                 $text .= $footnote . "\n";
1505                                 $text .= "</li>\n\n";
1506                         }
1507
1508                         $text .= "</ol>\n";
1509                         $text .= "</div>";
1510                 }
1511                 return $text;
1512         }
1513         protected function _appendFootnotes_callback($matches) {
1514                 $node_id = $this->fn_id_prefix . $matches[1];
1515
1516                 # Create footnote marker only if it has a corresponding footnote *and*
1517                 # the footnote hasn't been used by another marker.
1518                 if (isset($this->footnotes[$node_id])) {
1519                         $num =& $this->footnotes_numbers[$node_id];
1520                         if (!isset($num)) {
1521                                 # Transfer footnote content to the ordered list and give it its
1522                                 # number
1523                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1524                                 $this->footnotes_ref_count[$node_id] = 1;
1525                                 $num = $this->footnote_counter++;
1526                                 $ref_count_mark = '';
1527                         } else {
1528                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1529                         }
1530
1531                         $attr = "";
1532                         if ($this->fn_link_class != "") {
1533                                 $class = $this->fn_link_class;
1534                                 $class = $this->encodeAttribute($class);
1535                                 $attr .= " class=\"$class\"";
1536                         }
1537                         if ($this->fn_link_title != "") {
1538                                 $title = $this->fn_link_title;
1539                                 $title = $this->encodeAttribute($title);
1540                                 $attr .= " title=\"$title\"";
1541                         }
1542
1543                         $attr = str_replace("%%", $num, $attr);
1544                         $node_id = $this->encodeAttribute($node_id);
1545
1546                         return
1547                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1548                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1549                                 "</sup>";
1550                 }
1551
1552                 return "[^".$matches[1]."]";
1553         }
1554
1555
1556         ### Abbreviations ###
1557
1558         protected function stripAbbreviations($text) {
1559         #
1560         # Strips abbreviations from text, stores titles in hash references.
1561         #
1562                 $less_than_tab = $this->tab_width - 1;
1563
1564                 # Link defs are in the form: [id]*: url "optional title"
1565                 $text = preg_replace_callback('{
1566                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
1567                         (.*)                                    # text = $2 (no blank lines allowed)
1568                         }xm',
1569                         array($this, '_stripAbbreviations_callback'),
1570                         $text);
1571                 return $text;
1572         }
1573         protected function _stripAbbreviations_callback($matches) {
1574                 $abbr_word = $matches[1];
1575                 $abbr_desc = $matches[2];
1576                 if ($this->abbr_word_re)
1577                         $this->abbr_word_re .= '|';
1578                 $this->abbr_word_re .= preg_quote($abbr_word);
1579                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1580                 return ''; # String that will replace the block
1581         }
1582
1583
1584         protected function doAbbreviations($text) {
1585         #
1586         # Find defined abbreviations in text and wrap them in <abbr> elements.
1587         #
1588                 if ($this->abbr_word_re) {
1589                         // cannot use the /x modifier because abbr_word_re may
1590                         // contain significant spaces:
1591                         $text = preg_replace_callback('{'.
1592                                 '(?<![\w\x1A])'.
1593                                 '(?:'.$this->abbr_word_re.')'.
1594                                 '(?![\w\x1A])'.
1595                                 '}',
1596                                 array($this, '_doAbbreviations_callback'), $text);
1597                 }
1598                 return $text;
1599         }
1600         protected function _doAbbreviations_callback($matches) {
1601                 $abbr = $matches[0];
1602                 if (isset($this->abbr_desciptions[$abbr])) {
1603                         $desc = $this->abbr_desciptions[$abbr];
1604                         if (empty($desc)) {
1605                                 return $this->hashPart("<abbr>$abbr</abbr>");
1606                         } else {
1607                                 $desc = $this->encodeAttribute($desc);
1608                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1609                         }
1610                 } else {
1611                         return $matches[0];
1612                 }
1613         }
1614 }