lib/markdown/Markdown.php

   1 <?php
   2 /**
   3  * Markdown  -  A text-to-HTML conversion tool for web writers
   4  *
   5  * @package   php-markdown
   6  * @author    Michel Fortin <michel.fortin@michelf.com>
   7  * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/>
   8  * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
   9  */
  10
  11 namespace Michelf;
  12
  13 /**
  14  * Markdown Parser Class
  15  */
  16 class Markdown implements MarkdownInterface {
  17         /**
  18          * Define the package version
  19          * @var string
  20          */
  21         const MARKDOWNLIB_VERSION = "1.8.0";
  22
  23         /**
  24          * Simple function interface - Initialize the parser and return the result
  25          * of its transform method. This will work fine for derived classes too.
  26          *
  27          * @api
  28          *
  29          * @param  string $text
  30          * @return string
  31          */
  32         public static function defaultTransform($text) {
  33                 // Take parser class on which this function was called.
  34                 $parser_class = \get_called_class();
  35
  36                 // Try to take parser from the static parser list
  37                 static $parser_list;
  38                 $parser =& $parser_list[$parser_class];
  39
  40                 // Create the parser it not already set
  41                 if (!$parser) {
  42                         $parser = new $parser_class;
  43                 }
  44
  45                 // Transform text using parser.
  46                 return $parser->transform($text);
  47         }
  48
  49         /**
  50          * Configuration variables
  51          */
  52
  53         /**
  54          * Change to ">" for HTML output.
  55          * @var string
  56          */
  57         public $empty_element_suffix = " />";
  58
  59         /**
  60          * The width of indentation of the output markup
  61          * @var int
  62          */
  63         public $tab_width = 4;
  64
  65         /**
  66          * Change to `true` to disallow markup or entities.
  67          * @var boolean
  68          */
  69         public $no_markup   = false;
  70         public $no_entities = false;
  71
  72
  73         /**
  74          * Change to `true` to enable line breaks on \n without two trailling spaces
  75          * @var boolean
  76          */
  77         public $hard_wrap = false;
  78
  79         /**
  80          * Predefined URLs and titles for reference links and images.
  81          * @var array
  82          */
  83         public $predef_urls   = array();
  84         public $predef_titles = array();
  85
  86         /**
  87          * Optional filter function for URLs
  88          * @var callable
  89          */
  90         public $url_filter_func = null;
  91
  92         /**
  93          * Optional header id="" generation callback function.
  94          * @var callable
  95          */
  96         public $header_id_func = null;
  97
  98         /**
  99          * Optional function for converting code block content to HTML
 100          * @var callable
 101          */
 102         public $code_block_content_func = null;
 103
 104         /**
 105          * Optional function for converting code span content to HTML.
 106          * @var callable
 107          */
 108         public $code_span_content_func = null;
 109
 110         /**
 111          * Class attribute to toggle "enhanced ordered list" behaviour
 112          * setting this to true will allow ordered lists to start from the index
 113          * number that is defined first.
 114          *
 115          * For example:
 116          * 2. List item two
 117          * 3. List item three
 118          *
 119          * Becomes:
 120          * <ol start="2">
 121          * <li>List item two</li>
 122          * <li>List item three</li>
 123          * </ol>
 124          *
 125          * @var bool
 126          */
 127         public $enhanced_ordered_list = false;
 128
 129         /**
 130          * Parser implementation
 131          */
 132
 133         /**
 134          * Regex to match balanced [brackets].
 135          * Needed to insert a maximum bracked depth while converting to PHP.
 136          * @var int
 137          */
 138         protected $nested_brackets_depth = 6;
 139         protected $nested_brackets_re;
 140
 141         protected $nested_url_parenthesis_depth = 4;
 142         protected $nested_url_parenthesis_re;
 143
 144         /**
 145          * Table of hash values for escaped characters:
 146          * @var string
 147          */
 148         protected $escape_chars = '\`*_{}[]()>#+-.!';
 149         protected $escape_chars_re;
 150
 151         /**
 152          * Constructor function. Initialize appropriate member variables.
 153          * @return void
 154          */
 155         public function __construct() {
 156                 $this->_initDetab();
 157                 $this->prepareItalicsAndBold();
 158
 159                 $this->nested_brackets_re =
 160                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 161                         str_repeat('\])*', $this->nested_brackets_depth);
 162
 163                 $this->nested_url_parenthesis_re =
 164                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 165                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 166
 167                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 168
 169                 // Sort document, block, and span gamut in ascendent priority order.
 170                 asort($this->document_gamut);
 171                 asort($this->block_gamut);
 172                 asort($this->span_gamut);
 173         }
 174
 175
 176         /**
 177          * Internal hashes used during transformation.
 178          * @var array
 179          */
 180         protected $urls        = array();
 181         protected $titles      = array();
 182         protected $html_hashes = array();
 183
 184         /**
 185          * Status flag to avoid invalid nesting.
 186          * @var boolean
 187          */
 188         protected $in_anchor = false;
 189
 190         /**
 191          * Status flag to avoid invalid nesting.
 192          * @var boolean
 193          */
 194         protected $in_emphasis_processing = false;
 195
 196         /**
 197          * Called before the transformation process starts to setup parser states.
 198          * @return void
 199          */
 200         protected function setup() {
 201                 // Clear global hashes.
 202                 $this->urls        = $this->predef_urls;
 203                 $this->titles      = $this->predef_titles;
 204                 $this->html_hashes = array();
 205                 $this->in_anchor   = false;
 206                 $this->in_emphasis_processing = false;
 207         }
 208
 209         /**
 210          * Called after the transformation process to clear any variable which may
 211          * be taking up memory unnecessarly.
 212          * @return void
 213          */
 214         protected function teardown() {
 215                 $this->urls        = array();
 216                 $this->titles      = array();
 217                 $this->html_hashes = array();
 218         }
 219
 220         /**
 221          * Main function. Performs some preprocessing on the input text and pass
 222          * it through the document gamut.
 223          *
 224          * @api
 225          *
 226          * @param  string $text
 227          * @return string
 228          */
 229         public function transform($text) {
 230                 $this->setup();
 231
 232                 # Remove UTF-8 BOM and marker character in input, if present.
 233                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 234
 235                 # Standardize line endings:
 236                 #   DOS to Unix and Mac to Unix
 237                 $text = preg_replace('{\r\n?}', "\n", $text);
 238
 239                 # Make sure $text ends with a couple of newlines:
 240                 $text .= "\n\n";
 241
 242                 # Convert all tabs to spaces.
 243                 $text = $this->detab($text);
 244
 245                 # Turn block-level HTML blocks into hash entries
 246                 $text = $this->hashHTMLBlocks($text);
 247
 248                 # Strip any lines consisting only of spaces and tabs.
 249                 # This makes subsequent regexen easier to write, because we can
 250                 # match consecutive blank lines with /\n+/ instead of something
 251                 # contorted like /[ ]*\n+/ .
 252                 $text = preg_replace('/^[ ]+$/m', '', $text);
 253
 254                 # Run document gamut methods.
 255                 foreach ($this->document_gamut as $method => $priority) {
 256                         $text = $this->$method($text);
 257                 }
 258
 259                 $this->teardown();
 260
 261                 return $text . "\n";
 262         }
 263
 264         /**
 265          * Define the document gamut
 266          * @var array
 267          */
 268         protected $document_gamut = array(
 269                 // Strip link definitions, store in hashes.
 270                 "stripLinkDefinitions" => 20,
 271                 "runBasicBlockGamut"   => 30,
 272         );
 273
 274         /**
 275          * Strips link definitions from text, stores the URLs and titles in
 276          * hash references
 277          * @param  string $text
 278          * @return string
 279          */
 280         protected function stripLinkDefinitions($text) {
 281
 282                 $less_than_tab = $this->tab_width - 1;
 283
 284                 // Link defs are in the form: ^[id]: url "optional title"
 285                 $text = preg_replace_callback('{
 286                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 287                                                           [ ]*
 288                                                           \n?                           # maybe *one* newline
 289                                                           [ ]*
 290                                                         (?:
 291                                                           <(.+?)>                       # url = $2
 292                                                         |
 293                                                           (\S+?)                        # url = $3
 294                                                         )
 295                                                           [ ]*
 296                                                           \n?                           # maybe one newline
 297                                                           [ ]*
 298                                                         (?:
 299                                                                 (?<=\s)                 # lookbehind for whitespace
 300                                                                 ["(]
 301                                                                 (.*?)                   # title = $4
 302                                                                 [")]
 303                                                                 [ ]*
 304                                                         )?      # title is optional
 305                                                         (?:\n+|\Z)
 306                         }xm',
 307                         array($this, '_stripLinkDefinitions_callback'),
 308                         $text
 309                 );
 310                 return $text;
 311         }
 312
 313         /**
 314          * The callback to strip link definitions
 315          * @param  array $matches
 316          * @return string
 317          */
 318         protected function _stripLinkDefinitions_callback($matches) {
 319                 $link_id = strtolower($matches[1]);
 320                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 321                 $this->urls[$link_id] = $url;
 322                 $this->titles[$link_id] =& $matches[4];
 323                 return ''; // String that will replace the block
 324         }
 325
 326         /**
 327          * Hashify HTML blocks
 328          * @param  string $text
 329          * @return string
 330          */
 331         protected function hashHTMLBlocks($text) {
 332                 if ($this->no_markup) {
 333                         return $text;
 334                 }
 335
 336                 $less_than_tab = $this->tab_width - 1;
 337
 338                 /**
 339                  * Hashify HTML blocks:
 340                  *
 341                  * We only want to do this for block-level HTML tags, such as headers,
 342                  * lists, and tables. That's because we still want to wrap <p>s around
 343                  * "paragraphs" that are wrapped in non-block-level tags, such as
 344                  * anchors, phrase emphasis, and spans. The list of tags we're looking
 345                  * for is hard-coded:
 346                  *
 347                  * *  List "a" is made of tags which can be both inline or block-level.
 348                  *    These will be treated block-level when the start tag is alone on
 349                  *    its line, otherwise they're not matched here and will be taken as
 350                  *    inline later.
 351                  * *  List "b" is made of tags which are always block-level;
 352                  */
 353                 $block_tags_a_re = 'ins|del';
 354                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 355                                                    'script|noscript|style|form|fieldset|iframe|math|svg|'.
 356                                                    'article|section|nav|aside|hgroup|header|footer|'.
 357                                                    'figure';
 358
 359                 // Regular expression for the content of a block tag.
 360                 $nested_tags_level = 4;
 361                 $attr = '
 362                         (?>                             # optional tag attributes
 363                           \s                    # starts with whitespace
 364                           (?>
 365                                 [^>"/]+         # text outside quotes
 366                           |
 367                                 /+(?!>)         # slash not followed by ">"
 368                           |
 369                                 "[^"]*"         # text inside double quotes (tolerate ">")
 370                           |
 371                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 372                           )*
 373                         )?
 374                         ';
 375                 $content =
 376                         str_repeat('
 377                                 (?>
 378                                   [^<]+                 # content without tag
 379                                 |
 380                                   <\2                   # nested opening tag
 381                                         '.$attr.'       # attributes
 382                                         (?>
 383                                           />
 384                                         |
 385                                           >', $nested_tags_level).      // end of opening tag
 386                                           '.*?'.                                        // last level nested tag content
 387                         str_repeat('
 388                                           </\2\s*>      # closing nested tag
 389                                         )
 390                                   |
 391                                         <(?!/\2\s*>     # other tags with a different name
 392                                   )
 393                                 )*',
 394                                 $nested_tags_level);
 395                 $content2 = str_replace('\2', '\3', $content);
 396
 397                 /**
 398                  * First, look for nested blocks, e.g.:
 399                  *      <div>
 400                  *              <div>
 401                  *              tags for inner block must be indented.
 402                  *              </div>
 403                  *      </div>
 404                  *
 405                  * The outermost tags must start at the left margin for this to match,
 406                  * and the inner nested divs must be indented.
 407                  * We need to do this before the next, more liberal match, because the
 408                  * next match will start at the first `<div>` and stop at the
 409                  * first `</div>`.
 410                  */
 411                 $text = preg_replace_callback('{(?>
 412                         (?>
 413                                 (?<=\n)                 # Starting on its own line
 414                                 |                               # or
 415                                 \A\n?                   # the at beginning of the doc
 416                         )
 417                         (                                               # save in $1
 418
 419                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 420                           # in between.
 421
 422                                                 [ ]{0,'.$less_than_tab.'}
 423                                                 <('.$block_tags_b_re.')# start tag = $2
 424                                                 '.$attr.'>                      # attributes followed by > and \n
 425                                                 '.$content.'            # content, support nesting
 426                                                 </\2>                           # the matching end tag
 427                                                 [ ]*                            # trailing spaces/tabs
 428                                                 (?=\n+|\Z)      # followed by a newline or end of document
 429
 430                         | # Special version for tags of group a.
 431
 432                                                 [ ]{0,'.$less_than_tab.'}
 433                                                 <('.$block_tags_a_re.')# start tag = $3
 434                                                 '.$attr.'>[ ]*\n        # attributes followed by >
 435                                                 '.$content2.'           # content, support nesting
 436                                                 </\3>                           # the matching end tag
 437                                                 [ ]*                            # trailing spaces/tabs
 438                                                 (?=\n+|\Z)      # followed by a newline or end of document
 439
 440                         | # Special case just for <hr />. It was easier to make a special
 441                           # case than to make the other regex more complicated.
 442
 443                                                 [ ]{0,'.$less_than_tab.'}
 444                                                 <(hr)                           # start tag = $2
 445                                                 '.$attr.'                       # attributes
 446                                                 /?>                                     # the matching end tag
 447                                                 [ ]*
 448                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 449
 450                         | # Special case for standalone HTML comments:
 451
 452                                         [ ]{0,'.$less_than_tab.'}
 453                                         (?s:
 454                                                 <!-- .*? -->
 455                                         )
 456                                         [ ]*
 457                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 458
 459                         | # PHP and ASP-style processor instructions (<? and <%)
 460
 461                                         [ ]{0,'.$less_than_tab.'}
 462                                         (?s:
 463                                                 <([?%])                 # $2
 464                                                 .*?
 465                                                 \2>
 466                                         )
 467                                         [ ]*
 468                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 469
 470                         )
 471                         )}Sxmi',
 472                         array($this, '_hashHTMLBlocks_callback'),
 473                         $text
 474                 );
 475
 476                 return $text;
 477         }
 478
 479         /**
 480          * The callback for hashing HTML blocks
 481          * @param  string $matches
 482          * @return string
 483          */
 484         protected function _hashHTMLBlocks_callback($matches) {
 485                 $text = $matches[1];
 486                 $key  = $this->hashBlock($text);
 487                 return "\n\n$key\n\n";
 488         }
 489
 490         /**
 491          * Called whenever a tag must be hashed when a function insert an atomic
 492          * element in the text stream. Passing $text to through this function gives
 493          * a unique text-token which will be reverted back when calling unhash.
 494          *
 495          * The $boundary argument specify what character should be used to surround
 496          * the token. By convension, "B" is used for block elements that needs not
 497          * to be wrapped into paragraph tags at the end, ":" is used for elements
 498          * that are word separators and "X" is used in the general case.
 499          *
 500          * @param  string $text
 501          * @param  string $boundary
 502          * @return string
 503          */
 504         protected function hashPart($text, $boundary = 'X') {
 505                 // Swap back any tag hash found in $text so we do not have to `unhash`
 506                 // multiple times at the end.
 507                 $text = $this->unhash($text);
 508
 509                 // Then hash the block.
 510                 static $i = 0;
 511                 $key = "$boundary\x1A" . ++$i . $boundary;
 512                 $this->html_hashes[$key] = $text;
 513                 return $key; // String that will replace the tag.
 514         }
 515
 516         /**
 517          * Shortcut function for hashPart with block-level boundaries.
 518          * @param  string $text
 519          * @return string
 520          */
 521         protected function hashBlock($text) {
 522                 return $this->hashPart($text, 'B');
 523         }
 524
 525         /**
 526          * Define the block gamut - these are all the transformations that form
 527          * block-level tags like paragraphs, headers, and list items.
 528          * @var array
 529          */
 530         protected $block_gamut = array(
 531                 "doHeaders"         => 10,
 532                 "doHorizontalRules" => 20,
 533                 "doLists"           => 40,
 534                 "doCodeBlocks"      => 50,
 535                 "doBlockQuotes"     => 60,
 536         );
 537
 538         /**
 539          * Run block gamut tranformations.
 540          *
 541          * We need to escape raw HTML in Markdown source before doing anything
 542          * else. This need to be done for each block, and not only at the
 543          * begining in the Markdown function since hashed blocks can be part of
 544          * list items and could have been indented. Indented blocks would have
 545          * been seen as a code block in a previous pass of hashHTMLBlocks.
 546          *
 547          * @param  string $text
 548          * @return string
 549          */
 550         protected function runBlockGamut($text) {
 551                 $text = $this->hashHTMLBlocks($text);
 552                 return $this->runBasicBlockGamut($text);
 553         }
 554
 555         /**
 556          * Run block gamut tranformations, without hashing HTML blocks. This is
 557          * useful when HTML blocks are known to be already hashed, like in the first
 558          * whole-document pass.
 559          *
 560          * @param  string $text
 561          * @return string
 562          */
 563         protected function runBasicBlockGamut($text) {
 564
 565                 foreach ($this->block_gamut as $method => $priority) {
 566                         $text = $this->$method($text);
 567                 }
 568
 569                 // Finally form paragraph and restore hashed blocks.
 570                 $text = $this->formParagraphs($text);
 571
 572                 return $text;
 573         }
 574
 575         /**
 576          * Convert horizontal rules
 577          * @param  string $text
 578          * @return string
 579          */
 580         protected function doHorizontalRules($text) {
 581                 return preg_replace(
 582                         '{
 583                                 ^[ ]{0,3}       # Leading space
 584                                 ([-*_])         # $1: First marker
 585                                 (?>                     # Repeated marker group
 586                                         [ ]{0,2}        # Zero, one, or two spaces.
 587                                         \1                      # Marker character
 588                                 ){2,}           # Group repeated at least twice
 589                                 [ ]*            # Tailing spaces
 590                                 $                       # End of line.
 591                         }mx',
 592                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 593                         $text
 594                 );
 595         }
 596
 597         /**
 598          * These are all the transformations that occur *within* block-level
 599          * tags like paragraphs, headers, and list items.
 600          * @var array
 601          */
 602         protected $span_gamut = array(
 603                 // Process character escapes, code spans, and inline HTML
 604                 // in one shot.
 605                 "parseSpan"           => -30,
 606                 // Process anchor and image tags. Images must come first,
 607                 // because ![foo][f] looks like an anchor.
 608                 "doImages"            =>  10,
 609                 "doAnchors"           =>  20,
 610                 // Make links out of things like `<https://example.com/>`
 611                 // Must come after doAnchors, because you can use < and >
 612                 // delimiters in inline links like [this](<url>).
 613                 "doAutoLinks"         =>  30,
 614                 "encodeAmpsAndAngles" =>  40,
 615                 "doItalicsAndBold"    =>  50,
 616                 "doHardBreaks"        =>  60,
 617         );
 618
 619         /**
 620          * Run span gamut transformations
 621          * @param  string $text
 622          * @return string
 623          */
 624         protected function runSpanGamut($text) {
 625                 foreach ($this->span_gamut as $method => $priority) {
 626                         $text = $this->$method($text);
 627                 }
 628
 629                 return $text;
 630         }
 631
 632         /**
 633          * Do hard breaks
 634          * @param  string $text
 635          * @return string
 636          */
 637         protected function doHardBreaks($text) {
 638                 if ($this->hard_wrap) {
 639                         return preg_replace_callback('/ *\n/',
 640                                 array($this, '_doHardBreaks_callback'), $text);
 641                 } else {
 642                         return preg_replace_callback('/ {2,}\n/',
 643                                 array($this, '_doHardBreaks_callback'), $text);
 644                 }
 645         }
 646
 647         /**
 648          * Trigger part hashing for the hard break (callback method)
 649          * @param  array $matches
 650          * @return string
 651          */
 652         protected function _doHardBreaks_callback($matches) {
 653                 return $this->hashPart("<br$this->empty_element_suffix\n");
 654         }
 655
 656         /**
 657          * Turn Markdown link shortcuts into XHTML <a> tags.
 658          * @param  string $text
 659          * @return string
 660          */
 661         protected function doAnchors($text) {
 662                 if ($this->in_anchor) {
 663                         return $text;
 664                 }
 665                 $this->in_anchor = true;
 666
 667                 // First, handle reference-style links: [link text] [id]
 668                 $text = preg_replace_callback('{
 669                         (                                       # wrap whole match in $1
 670                           \[
 671                                 ('.$this->nested_brackets_re.') # link text = $2
 672                           \]
 673
 674                           [ ]?                          # one optional space
 675                           (?:\n[ ]*)?           # one optional newline followed by spaces
 676
 677                           \[
 678                                 (.*?)           # id = $3
 679                           \]
 680                         )
 681                         }xs',
 682                         array($this, '_doAnchors_reference_callback'), $text);
 683
 684                 // Next, inline-style links: [link text](url "optional title")
 685                 $text = preg_replace_callback('{
 686                         (                               # wrap whole match in $1
 687                           \[
 688                                 ('.$this->nested_brackets_re.') # link text = $2
 689                           \]
 690                           \(                    # literal paren
 691                                 [ \n]*
 692                                 (?:
 693                                         <(.+?)> # href = $3
 694                                 |
 695                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 696                                 )
 697                                 [ \n]*
 698                                 (                       # $5
 699                                   ([\'"])       # quote char = $6
 700                                   (.*?)         # Title = $7
 701                                   \6            # matching quote
 702                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 703                                 )?                      # title is optional
 704                           \)
 705                         )
 706                         }xs',
 707                         array($this, '_doAnchors_inline_callback'), $text);
 708
 709                 // Last, handle reference-style shortcuts: [link text]
 710                 // These must come last in case you've also got [link text][1]
 711                 // or [link text](/foo)
 712                 $text = preg_replace_callback('{
 713                         (                                       # wrap whole match in $1
 714                           \[
 715                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 716                           \]
 717                         )
 718                         }xs',
 719                         array($this, '_doAnchors_reference_callback'), $text);
 720
 721                 $this->in_anchor = false;
 722                 return $text;
 723         }
 724
 725         /**
 726          * Callback method to parse referenced anchors
 727          * @param  string $matches
 728          * @return string
 729          */
 730         protected function _doAnchors_reference_callback($matches) {
 731                 $whole_match =  $matches[1];
 732                 $link_text   =  $matches[2];
 733                 $link_id     =& $matches[3];
 734
 735                 if ($link_id == "") {
 736                         // for shortcut links like [this][] or [this].
 737                         $link_id = $link_text;
 738                 }
 739
 740                 // lower-case and turn embedded newlines into spaces
 741                 $link_id = strtolower($link_id);
 742                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 743
 744                 if (isset($this->urls[$link_id])) {
 745                         $url = $this->urls[$link_id];
 746                         $url = $this->encodeURLAttribute($url);
 747
 748                         $result = "<a href=\"$url\"";
 749                         if ( isset( $this->titles[$link_id] ) ) {
 750                                 $title = $this->titles[$link_id];
 751                                 $title = $this->encodeAttribute($title);
 752                                 $result .=  " title=\"$title\"";
 753                         }
 754
 755                         $link_text = $this->runSpanGamut($link_text);
 756                         $result .= ">$link_text</a>";
 757                         $result = $this->hashPart($result);
 758                 } else {
 759                         $result = $whole_match;
 760                 }
 761                 return $result;
 762         }
 763
 764         /**
 765          * Callback method to parse inline anchors
 766          * @param  string $matches
 767          * @return string
 768          */
 769         protected function _doAnchors_inline_callback($matches) {
 770                 $whole_match    =  $matches[1];
 771                 $link_text              =  $this->runSpanGamut($matches[2]);
 772                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 773                 $title                  =& $matches[7];
 774
 775                 // If the URL was of the form <s p a c e s> it got caught by the HTML
 776                 // tag parser and hashed. Need to reverse the process before using
 777                 // the URL.
 778                 $unhashed = $this->unhash($url);
 779                 if ($unhashed != $url)
 780                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 781
 782                 $url = $this->encodeURLAttribute($url);
 783
 784                 $result = "<a href=\"$url\"";
 785                 if (isset($title)) {
 786                         $title = $this->encodeAttribute($title);
 787                         $result .=  " title=\"$title\"";
 788                 }
 789
 790                 $link_text = $this->runSpanGamut($link_text);
 791                 $result .= ">$link_text</a>";
 792
 793                 return $this->hashPart($result);
 794         }
 795
 796         /**
 797          * Turn Markdown image shortcuts into <img> tags.
 798          * @param  string $text
 799          * @return string
 800          */
 801         protected function doImages($text) {
 802                 // First, handle reference-style labeled images: ![alt text][id]
 803                 $text = preg_replace_callback('{
 804                         (                               # wrap whole match in $1
 805                           !\[
 806                                 ('.$this->nested_brackets_re.')         # alt text = $2
 807                           \]
 808
 809                           [ ]?                          # one optional space
 810                           (?:\n[ ]*)?           # one optional newline followed by spaces
 811
 812                           \[
 813                                 (.*?)           # id = $3
 814                           \]
 815
 816                         )
 817                         }xs',
 818                         array($this, '_doImages_reference_callback'), $text);
 819
 820                 // Next, handle inline images:  ![alt text](url "optional title")
 821                 // Don't forget: encode * and _
 822                 $text = preg_replace_callback('{
 823                         (                               # wrap whole match in $1
 824                           !\[
 825                                 ('.$this->nested_brackets_re.')         # alt text = $2
 826                           \]
 827                           \s?                   # One optional whitespace character
 828                           \(                    # literal paren
 829                                 [ \n]*
 830                                 (?:
 831                                         <(\S*)> # src url = $3
 832                                 |
 833                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 834                                 )
 835                                 [ \n]*
 836                                 (                       # $5
 837                                   ([\'"])       # quote char = $6
 838                                   (.*?)         # title = $7
 839                                   \6            # matching quote
 840                                   [ \n]*
 841                                 )?                      # title is optional
 842                           \)
 843                         )
 844                         }xs',
 845                         array($this, '_doImages_inline_callback'), $text);
 846
 847                 return $text;
 848         }
 849
 850         /**
 851          * Callback to parse references image tags
 852          * @param  array $matches
 853          * @return string
 854          */
 855         protected function _doImages_reference_callback($matches) {
 856                 $whole_match = $matches[1];
 857                 $alt_text    = $matches[2];
 858                 $link_id     = strtolower($matches[3]);
 859
 860                 if ($link_id == "") {
 861                         $link_id = strtolower($alt_text); // for shortcut links like ![this][].
 862                 }
 863
 864                 $alt_text = $this->encodeAttribute($alt_text);
 865                 if (isset($this->urls[$link_id])) {
 866                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
 867                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 868                         if (isset($this->titles[$link_id])) {
 869                                 $title = $this->titles[$link_id];
 870                                 $title = $this->encodeAttribute($title);
 871                                 $result .=  " title=\"$title\"";
 872                         }
 873                         $result .= $this->empty_element_suffix;
 874                         $result = $this->hashPart($result);
 875                 } else {
 876                         // If there's no such link ID, leave intact:
 877                         $result = $whole_match;
 878                 }
 879
 880                 return $result;
 881         }
 882
 883         /**
 884          * Callback to parse inline image tags
 885          * @param  array $matches
 886          * @return string
 887          */
 888         protected function _doImages_inline_callback($matches) {
 889                 $whole_match    = $matches[1];
 890                 $alt_text               = $matches[2];
 891                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 892                 $title                  =& $matches[7];
 893
 894                 $alt_text = $this->encodeAttribute($alt_text);
 895                 $url = $this->encodeURLAttribute($url);
 896                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 897                 if (isset($title)) {
 898                         $title = $this->encodeAttribute($title);
 899                         $result .=  " title=\"$title\""; // $title already quoted
 900                 }
 901                 $result .= $this->empty_element_suffix;
 902
 903                 return $this->hashPart($result);
 904         }
 905
 906         /**
 907          * Parse Markdown heading elements to HTML
 908          * @param  string $text
 909          * @return string
 910          */
 911         protected function doHeaders($text) {
 912                 /**
 913                  * Setext-style headers:
 914                  *        Header 1
 915                  *        ========
 916                  *
 917                  *        Header 2
 918                  *        --------
 919                  */
 920                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 921                         array($this, '_doHeaders_callback_setext'), $text);
 922
 923                 /**
 924                  * atx-style headers:
 925                  *   # Header 1
 926                  *   ## Header 2
 927                  *   ## Header 2 with closing hashes ##
 928                  *   ...
 929                  *   ###### Header 6
 930                  */
 931                 $text = preg_replace_callback('{
 932                                 ^(\#{1,6})      # $1 = string of #\'s
 933                                 [ ]*
 934                                 (.+?)           # $2 = Header text
 935                                 [ ]*
 936                                 \#*                     # optional closing #\'s (not counted)
 937                                 \n+
 938                         }xm',
 939                         array($this, '_doHeaders_callback_atx'), $text);
 940
 941                 return $text;
 942         }
 943
 944         /**
 945          * Setext header parsing callback
 946          * @param  array $matches
 947          * @return string
 948          */
 949         protected function _doHeaders_callback_setext($matches) {
 950                 // Terrible hack to check we haven't found an empty list item.
 951                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
 952                         return $matches[0];
 953                 }
 954
 955                 $level = $matches[2][0] == '=' ? 1 : 2;
 956
 957                 // ID attribute generation
 958                 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
 959
 960                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
 961                 return "\n" . $this->hashBlock($block) . "\n\n";
 962         }
 963
 964         /**
 965          * ATX header parsing callback
 966          * @param  array $matches
 967          * @return string
 968          */
 969         protected function _doHeaders_callback_atx($matches) {
 970                 // ID attribute generation
 971                 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
 972
 973                 $level = strlen($matches[1]);
 974                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
 975                 return "\n" . $this->hashBlock($block) . "\n\n";
 976         }
 977
 978         /**
 979          * If a header_id_func property is set, we can use it to automatically
 980          * generate an id attribute.
 981          *
 982          * This method returns a string in the form id="foo", or an empty string
 983          * otherwise.
 984          * @param  string $headerValue
 985          * @return string
 986          */
 987         protected function _generateIdFromHeaderValue($headerValue) {
 988                 if (!is_callable($this->header_id_func)) {
 989                         return "";
 990                 }
 991
 992                 $idValue = call_user_func($this->header_id_func, $headerValue);
 993                 if (!$idValue) {
 994                         return "";
 995                 }
 996
 997                 return ' id="' . $this->encodeAttribute($idValue) . '"';
 998         }
 999
1000         /**
1001          * Form HTML ordered (numbered) and unordered (bulleted) lists.
1002          * @param  string $text
1003          * @return string
1004          */
1005         protected function doLists($text) {
1006                 $less_than_tab = $this->tab_width - 1;
1007
1008                 // Re-usable patterns to match list item bullets and number markers:
1009                 $marker_ul_re  = '[*+-]';
1010                 $marker_ol_re  = '\d+[\.]';
1011
1012                 $markers_relist = array(
1013                         $marker_ul_re => $marker_ol_re,
1014                         $marker_ol_re => $marker_ul_re,
1015                         );
1016
1017                 foreach ($markers_relist as $marker_re => $other_marker_re) {
1018                         // Re-usable pattern to match any entirel ul or ol list:
1019                         $whole_list_re = '
1020                                 (                                                               # $1 = whole list
1021                                   (                                                             # $2
1022                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
1023                                         ('.$marker_re.')                        # $4 = first list item marker
1024                                         [ ]+
1025                                   )
1026                                   (?s:.+?)
1027                                   (                                                             # $5
1028                                           \z
1029                                         |
1030                                           \n{2,}
1031                                           (?=\S)
1032                                           (?!                                           # Negative lookahead for another list item marker
1033                                                 [ ]*
1034                                                 '.$marker_re.'[ ]+
1035                                           )
1036                                         |
1037                                           (?=                                           # Lookahead for another kind of list
1038                                             \n
1039                                                 \3                                              # Must have the same indentation
1040                                                 '.$other_marker_re.'[ ]+
1041                                           )
1042                                   )
1043                                 )
1044                         '; // mx
1045
1046                         // We use a different prefix before nested lists than top-level lists.
1047                         //See extended comment in _ProcessListItems().
1048
1049                         if ($this->list_level) {
1050                                 $text = preg_replace_callback('{
1051                                                 ^
1052                                                 '.$whole_list_re.'
1053                                         }mx',
1054                                         array($this, '_doLists_callback'), $text);
1055                         } else {
1056                                 $text = preg_replace_callback('{
1057                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1058                                                 '.$whole_list_re.'
1059                                         }mx',
1060                                         array($this, '_doLists_callback'), $text);
1061                         }
1062                 }
1063
1064                 return $text;
1065         }
1066
1067         /**
1068          * List parsing callback
1069          * @param  array $matches
1070          * @return string
1071          */
1072         protected function _doLists_callback($matches) {
1073                 // Re-usable patterns to match list item bullets and number markers:
1074                 $marker_ul_re  = '[*+-]';
1075                 $marker_ol_re  = '\d+[\.]';
1076                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1077                 $marker_ol_start_re = '[0-9]+';
1078
1079                 $list = $matches[1];
1080                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1081
1082                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1083
1084                 $list .= "\n";
1085                 $result = $this->processListItems($list, $marker_any_re);
1086
1087                 $ol_start = 1;
1088                 if ($this->enhanced_ordered_list) {
1089                         // Get the start number for ordered list.
1090                         if ($list_type == 'ol') {
1091                                 $ol_start_array = array();
1092                                 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1093                                 if ($ol_start_check){
1094                                         $ol_start = $ol_start_array[0];
1095                                 }
1096                         }
1097                 }
1098
1099                 if ($ol_start > 1 && $list_type == 'ol'){
1100                         $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1101                 } else {
1102                         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1103                 }
1104                 return "\n". $result ."\n\n";
1105         }
1106
1107         /**
1108          * Nesting tracker for list levels
1109          * @var integer
1110          */
1111         protected $list_level = 0;
1112
1113         /**
1114          * Process the contents of a single ordered or unordered list, splitting it
1115          * into individual list items.
1116          * @param  string $list_str
1117          * @param  string $marker_any_re
1118          * @return string
1119          */
1120         protected function processListItems($list_str, $marker_any_re) {
1121                 /**
1122                  * The $this->list_level global keeps track of when we're inside a list.
1123                  * Each time we enter a list, we increment it; when we leave a list,
1124                  * we decrement. If it's zero, we're not in a list anymore.
1125                  *
1126                  * We do this because when we're not inside a list, we want to treat
1127                  * something like this:
1128                  *
1129                  *              I recommend upgrading to version
1130                  *              8. Oops, now this line is treated
1131                  *              as a sub-list.
1132                  *
1133                  * As a single paragraph, despite the fact that the second line starts
1134                  * with a digit-period-space sequence.
1135                  *
1136                  * Whereas when we're inside a list (or sub-list), that line will be
1137                  * treated as the start of a sub-list. What a kludge, huh? This is
1138                  * an aspect of Markdown's syntax that's hard to parse perfectly
1139                  * without resorting to mind-reading. Perhaps the solution is to
1140                  * change the syntax rules such that sub-lists must start with a
1141                  * starting cardinal number; e.g. "1." or "a.".
1142                  */
1143                 $this->list_level++;
1144
1145                 // Trim trailing blank lines:
1146                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1147
1148                 $list_str = preg_replace_callback('{
1149                         (\n)?                                                   # leading line = $1
1150                         (^[ ]*)                                                 # leading whitespace = $2
1151                         ('.$marker_any_re.'                             # list marker and space = $3
1152                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1153                         )
1154                         ((?s:.*?))                                              # list item text   = $4
1155                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1156                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1157                         }xm',
1158                         array($this, '_processListItems_callback'), $list_str);
1159
1160                 $this->list_level--;
1161                 return $list_str;
1162         }
1163
1164         /**
1165          * List item parsing callback
1166          * @param  array $matches
1167          * @return string
1168          */
1169         protected function _processListItems_callback($matches) {
1170                 $item = $matches[4];
1171                 $leading_line =& $matches[1];
1172                 $leading_space =& $matches[2];
1173                 $marker_space = $matches[3];
1174                 $tailing_blank_line =& $matches[5];
1175
1176                 if ($leading_line || $tailing_blank_line ||
1177                         preg_match('/\n{2,}/', $item))
1178                 {
1179                         // Replace marker with the appropriate whitespace indentation
1180                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1181                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1182                 } else {
1183                         // Recursion for sub-lists:
1184                         $item = $this->doLists($this->outdent($item));
1185                         $item = $this->formParagraphs($item, false);
1186                 }
1187
1188                 return "<li>" . $item . "</li>\n";
1189         }
1190
1191         /**
1192          * Process Markdown `<pre><code>` blocks.
1193          * @param  string $text
1194          * @return string
1195          */
1196         protected function doCodeBlocks($text) {
1197                 $text = preg_replace_callback('{
1198                                 (?:\n\n|\A\n?)
1199                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1200                                   (?>
1201                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1202                                         .*\n+
1203                                   )+
1204                                 )
1205                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1206                         }xm',
1207                         array($this, '_doCodeBlocks_callback'), $text);
1208
1209                 return $text;
1210         }
1211
1212         /**
1213          * Code block parsing callback
1214          * @param  array $matches
1215          * @return string
1216          */
1217         protected function _doCodeBlocks_callback($matches) {
1218                 $codeblock = $matches[1];
1219
1220                 $codeblock = $this->outdent($codeblock);
1221                 if ($this->code_block_content_func) {
1222                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1223                 } else {
1224                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1225                 }
1226
1227                 # trim leading newlines and trailing newlines
1228                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1229
1230                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1231                 return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1232         }
1233
1234         /**
1235          * Create a code span markup for $code. Called from handleSpanToken.
1236          * @param  string $code
1237          * @return string
1238          */
1239         protected function makeCodeSpan($code) {
1240                 if ($this->code_span_content_func) {
1241                         $code = call_user_func($this->code_span_content_func, $code);
1242                 } else {
1243                         $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1244                 }
1245                 return $this->hashPart("<code>$code</code>");
1246         }
1247
1248         /**
1249          * Define the emphasis operators with their regex matches
1250          * @var array
1251          */
1252         protected $em_relist = array(
1253                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1254                 '*' => '(?<![\s*])\*(?!\*)',
1255                 '_' => '(?<![\s_])_(?!_)',
1256         );
1257
1258         /**
1259          * Define the strong operators with their regex matches
1260          * @var array
1261          */
1262         protected $strong_relist = array(
1263                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1264                 '**' => '(?<![\s*])\*\*(?!\*)',
1265                 '__' => '(?<![\s_])__(?!_)',
1266         );
1267
1268         /**
1269          * Define the emphasis + strong operators with their regex matches
1270          * @var array
1271          */
1272         protected $em_strong_relist = array(
1273                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1274                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1275                 '___' => '(?<![\s_])___(?!_)',
1276         );
1277
1278         /**
1279          * Container for prepared regular expressions
1280          * @var array
1281          */
1282         protected $em_strong_prepared_relist;
1283
1284         /**
1285          * Prepare regular expressions for searching emphasis tokens in any
1286          * context.
1287          * @return void
1288          */
1289         protected function prepareItalicsAndBold() {
1290                 foreach ($this->em_relist as $em => $em_re) {
1291                         foreach ($this->strong_relist as $strong => $strong_re) {
1292                                 // Construct list of allowed token expressions.
1293                                 $token_relist = array();
1294                                 if (isset($this->em_strong_relist["$em$strong"])) {
1295                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1296                                 }
1297                                 $token_relist[] = $em_re;
1298                                 $token_relist[] = $strong_re;
1299
1300                                 // Construct master expression from list.
1301                                 $token_re = '{(' . implode('|', $token_relist) . ')}';
1302                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1303                         }
1304                 }
1305         }
1306
1307         /**
1308          * Convert Markdown italics (emphasis) and bold (strong) to HTML
1309          * @param  string $text
1310          * @return string
1311          */
1312         protected function doItalicsAndBold($text) {
1313                 if ($this->in_emphasis_processing) {
1314                         return $text; // avoid reentrency
1315                 }
1316                 $this->in_emphasis_processing = true;
1317
1318                 $token_stack = array('');
1319                 $text_stack = array('');
1320                 $em = '';
1321                 $strong = '';
1322                 $tree_char_em = false;
1323
1324                 while (1) {
1325                         // Get prepared regular expression for seraching emphasis tokens
1326                         // in current context.
1327                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1328
1329                         // Each loop iteration search for the next emphasis token.
1330                         // Each token is then passed to handleSpanToken.
1331                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1332                         $text_stack[0] .= $parts[0];
1333                         $token =& $parts[1];
1334                         $text =& $parts[2];
1335
1336                         if (empty($token)) {
1337                                 // Reached end of text span: empty stack without emitting.
1338                                 // any more emphasis.
1339                                 while ($token_stack[0]) {
1340                                         $text_stack[1] .= array_shift($token_stack);
1341                                         $text_stack[0] .= array_shift($text_stack);
1342                                 }
1343                                 break;
1344                         }
1345
1346                         $token_len = strlen($token);
1347                         if ($tree_char_em) {
1348                                 // Reached closing marker while inside a three-char emphasis.
1349                                 if ($token_len == 3) {
1350                                         // Three-char closing marker, close em and strong.
1351                                         array_shift($token_stack);
1352                                         $span = array_shift($text_stack);
1353                                         $span = $this->runSpanGamut($span);
1354                                         $span = "<strong><em>$span</em></strong>";
1355                                         $text_stack[0] .= $this->hashPart($span);
1356                                         $em = '';
1357                                         $strong = '';
1358                                 } else {
1359                                         // Other closing marker: close one em or strong and
1360                                         // change current token state to match the other
1361                                         $token_stack[0] = str_repeat($token[0], 3-$token_len);
1362                                         $tag = $token_len == 2 ? "strong" : "em";
1363                                         $span = $text_stack[0];
1364                                         $span = $this->runSpanGamut($span);
1365                                         $span = "<$tag>$span</$tag>";
1366                                         $text_stack[0] = $this->hashPart($span);
1367                                         $$tag = ''; // $$tag stands for $em or $strong
1368                                 }
1369                                 $tree_char_em = false;
1370                         } else if ($token_len == 3) {
1371                                 if ($em) {
1372                                         // Reached closing marker for both em and strong.
1373                                         // Closing strong marker:
1374                                         for ($i = 0; $i < 2; ++$i) {
1375                                                 $shifted_token = array_shift($token_stack);
1376                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1377                                                 $span = array_shift($text_stack);
1378                                                 $span = $this->runSpanGamut($span);
1379                                                 $span = "<$tag>$span</$tag>";
1380                                                 $text_stack[0] .= $this->hashPart($span);
1381                                                 $$tag = ''; // $$tag stands for $em or $strong
1382                                         }
1383                                 } else {
1384                                         // Reached opening three-char emphasis marker. Push on token
1385                                         // stack; will be handled by the special condition above.
1386                                         $em = $token[0];
1387                                         $strong = "$em$em";
1388                                         array_unshift($token_stack, $token);
1389                                         array_unshift($text_stack, '');
1390                                         $tree_char_em = true;
1391                                 }
1392                         } else if ($token_len == 2) {
1393                                 if ($strong) {
1394                                         // Unwind any dangling emphasis marker:
1395                                         if (strlen($token_stack[0]) == 1) {
1396                                                 $text_stack[1] .= array_shift($token_stack);
1397                                                 $text_stack[0] .= array_shift($text_stack);
1398                                                 $em = '';
1399                                         }
1400                                         // Closing strong marker:
1401                                         array_shift($token_stack);
1402                                         $span = array_shift($text_stack);
1403                                         $span = $this->runSpanGamut($span);
1404                                         $span = "<strong>$span</strong>";
1405                                         $text_stack[0] .= $this->hashPart($span);
1406                                         $strong = '';
1407                                 } else {
1408                                         array_unshift($token_stack, $token);
1409                                         array_unshift($text_stack, '');
1410                                         $strong = $token;
1411                                 }
1412                         } else {
1413                                 // Here $token_len == 1
1414                                 if ($em) {
1415                                         if (strlen($token_stack[0]) == 1) {
1416                                                 // Closing emphasis marker:
1417                                                 array_shift($token_stack);
1418                                                 $span = array_shift($text_stack);
1419                                                 $span = $this->runSpanGamut($span);
1420                                                 $span = "<em>$span</em>";
1421                                                 $text_stack[0] .= $this->hashPart($span);
1422                                                 $em = '';
1423                                         } else {
1424                                                 $text_stack[0] .= $token;
1425                                         }
1426                                 } else {
1427                                         array_unshift($token_stack, $token);
1428                                         array_unshift($text_stack, '');
1429                                         $em = $token;
1430                                 }
1431                         }
1432                 }
1433                 $this->in_emphasis_processing = false;
1434                 return $text_stack[0];
1435         }
1436
1437         /**
1438          * Parse Markdown blockquotes to HTML
1439          * @param  string $text
1440          * @return string
1441          */
1442         protected function doBlockQuotes($text) {
1443                 $text = preg_replace_callback('/
1444                           (                                                             # Wrap whole match in $1
1445                                 (?>
1446                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1447                                         .+\n                                    # rest of the first line
1448                                   (.+\n)*                                       # subsequent consecutive lines
1449                                   \n*                                           # blanks
1450                                 )+
1451                           )
1452                         /xm',
1453                         array($this, '_doBlockQuotes_callback'), $text);
1454
1455                 return $text;
1456         }
1457
1458         /**
1459          * Blockquote parsing callback
1460          * @param  array $matches
1461          * @return string
1462          */
1463         protected function _doBlockQuotes_callback($matches) {
1464                 $bq = $matches[1];
1465                 // trim one level of quoting - trim whitespace-only lines
1466                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1467                 $bq = $this->runBlockGamut($bq); // recurse
1468
1469                 $bq = preg_replace('/^/m', "  ", $bq);
1470                 // These leading spaces cause problem with <pre> content,
1471                 // so we need to fix that:
1472                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1473                         array($this, '_doBlockQuotes_callback2'), $bq);
1474
1475                 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1476         }
1477
1478         /**
1479          * Blockquote parsing callback
1480          * @param  array $matches
1481          * @return string
1482          */
1483         protected function _doBlockQuotes_callback2($matches) {
1484                 $pre = $matches[1];
1485                 $pre = preg_replace('/^  /m', '', $pre);
1486                 return $pre;
1487         }
1488
1489         /**
1490          * Parse paragraphs
1491          *
1492          * @param  string $text String to process in paragraphs
1493          * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1494          * @return string
1495          */
1496         protected function formParagraphs($text, $wrap_in_p = true) {
1497                 // Strip leading and trailing lines:
1498                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1499
1500                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1501
1502                 // Wrap <p> tags and unhashify HTML blocks
1503                 foreach ($grafs as $key => $value) {
1504                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1505                                 // Is a paragraph.
1506                                 $value = $this->runSpanGamut($value);
1507                                 if ($wrap_in_p) {
1508                                         $value = preg_replace('/^([ ]*)/', "<p>", $value);
1509                                         $value .= "</p>";
1510                                 }
1511                                 $grafs[$key] = $this->unhash($value);
1512                         } else {
1513                                 // Is a block.
1514                                 // Modify elements of @grafs in-place...
1515                                 $graf = $value;
1516                                 $block = $this->html_hashes[$graf];
1517                                 $graf = $block;
1518 //                              if (preg_match('{
1519 //                                      \A
1520 //                                      (                                                       # $1 = <div> tag
1521 //                                        <div  \s+
1522 //                                        [^>]*
1523 //                                        \b
1524 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1525 //                                        1
1526 //                                        \2
1527 //                                        [^>]*
1528 //                                        >
1529 //                                      )
1530 //                                      (                                                       # $3 = contents
1531 //                                      .*
1532 //                                      )
1533 //                                      (</div>)                                        # $4 = closing tag
1534 //                                      \z
1535 //                                      }xs', $block, $matches))
1536 //                              {
1537 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1538 //
1539 //                                      // We can't call Markdown(), because that resets the hash;
1540 //                                      // that initialization code should be pulled into its own sub, though.
1541 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1542 //
1543 //                                      // Run document gamut methods on the content.
1544 //                                      foreach ($this->document_gamut as $method => $priority) {
1545 //                                              $div_content = $this->$method($div_content);
1546 //                                      }
1547 //
1548 //                                      $div_open = preg_replace(
1549 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1550 //
1551 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1552 //                              }
1553                                 $grafs[$key] = $graf;
1554                         }
1555                 }
1556
1557                 return implode("\n\n", $grafs);
1558         }
1559
1560         /**
1561          * Encode text for a double-quoted HTML attribute. This function
1562          * is *not* suitable for attributes enclosed in single quotes.
1563          * @param  string $text
1564          * @return string
1565          */
1566         protected function encodeAttribute($text) {
1567                 $text = $this->encodeAmpsAndAngles($text);
1568                 $text = str_replace('"', '&quot;', $text);
1569                 return $text;
1570         }
1571
1572         /**
1573          * Encode text for a double-quoted HTML attribute containing a URL,
1574          * applying the URL filter if set. Also generates the textual
1575          * representation for the URL (removing mailto: or tel:) storing it in $text.
1576          * This function is *not* suitable for attributes enclosed in single quotes.
1577          *
1578          * @param  string $url
1579          * @param  string &$text Passed by reference
1580          * @return string        URL
1581          */
1582         protected function encodeURLAttribute($url, &$text = null) {
1583                 if ($this->url_filter_func) {
1584                         $url = call_user_func($this->url_filter_func, $url);
1585                 }
1586
1587                 if (preg_match('{^mailto:}i', $url)) {
1588                         $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1589                 } else if (preg_match('{^tel:}i', $url)) {
1590                         $url = $this->encodeAttribute($url);
1591                         $text = substr($url, 4);
1592                 } else {
1593                         $url = $this->encodeAttribute($url);
1594                         $text = $url;
1595                 }
1596
1597                 return $url;
1598         }
1599
1600         /**
1601          * Smart processing for ampersands and angle brackets that need to
1602          * be encoded. Valid character entities are left alone unless the
1603          * no-entities mode is set.
1604          * @param  string $text
1605          * @return string
1606          */
1607         protected function encodeAmpsAndAngles($text) {
1608                 if ($this->no_entities) {
1609                         $text = str_replace('&', '&amp;', $text);
1610                 } else {
1611                         // Ampersand-encoding based entirely on Nat Irons's Amputator
1612                         // MT plugin: <http://bumppo.net/projects/amputator/>
1613                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1614                                                                 '&amp;', $text);
1615                 }
1616                 // Encode remaining <'s
1617                 $text = str_replace('<', '&lt;', $text);
1618
1619                 return $text;
1620         }
1621
1622         /**
1623          * Parse Markdown automatic links to anchor HTML tags
1624          * @param  string $text
1625          * @return string
1626          */
1627         protected function doAutoLinks($text) {
1628                 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1629                         array($this, '_doAutoLinks_url_callback'), $text);
1630
1631                 // Email addresses: <address@domain.foo>
1632                 $text = preg_replace_callback('{
1633                         <
1634                         (?:mailto:)?
1635                         (
1636                                 (?:
1637                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1638                                 |
1639                                         ".*?"
1640                                 )
1641                                 \@
1642                                 (?:
1643                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1644                                 |
1645                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1646                                 )
1647                         )
1648                         >
1649                         }xi',
1650                         array($this, '_doAutoLinks_email_callback'), $text);
1651
1652                 return $text;
1653         }
1654
1655         /**
1656          * Parse URL callback
1657          * @param  array $matches
1658          * @return string
1659          */
1660         protected function _doAutoLinks_url_callback($matches) {
1661                 $url = $this->encodeURLAttribute($matches[1], $text);
1662                 $link = "<a href=\"$url\">$text</a>";
1663                 return $this->hashPart($link);
1664         }
1665
1666         /**
1667          * Parse email address callback
1668          * @param  array $matches
1669          * @return string
1670          */
1671         protected function _doAutoLinks_email_callback($matches) {
1672                 $addr = $matches[1];
1673                 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1674                 $link = "<a href=\"$url\">$text</a>";
1675                 return $this->hashPart($link);
1676         }
1677
1678         /**
1679          * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1680          *
1681          * Output: the same text but with most characters encoded as either a
1682          *         decimal or hex entity, in the hopes of foiling most address
1683          *         harvesting spam bots. E.g.:
1684          *
1685          *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1686          *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1687          *        &#x6d;
1688          *
1689          * Note: the additional output $tail is assigned the same value as the
1690          * ouput, minus the number of characters specified by $head_length.
1691          *
1692          * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1693          * With some optimizations by Milian Wolff. Forced encoding of HTML
1694          * attribute special characters by Allan Odgaard.
1695          *
1696          * @param  string  $text
1697          * @param  string  &$tail
1698          * @param  integer $head_length
1699          * @return string
1700          */
1701         protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1702                 if ($text == "") {
1703                         return $tail = "";
1704                 }
1705
1706                 $chars = preg_split('/(?<!^)(?!$)/', $text);
1707                 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1708
1709                 foreach ($chars as $key => $char) {
1710                         $ord = ord($char);
1711                         // Ignore non-ascii chars.
1712                         if ($ord < 128) {
1713                                 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1714                                 // roughly 10% raw, 45% hex, 45% dec
1715                                 // '@' *must* be encoded. I insist.
1716                                 // '"' and '>' have to be encoded inside the attribute
1717                                 if ($r > 90 && strpos('@"&>', $char) === false) {
1718                                         /* do nothing */
1719                                 } else if ($r < 45) {
1720                                         $chars[$key] = '&#x'.dechex($ord).';';
1721                                 } else {
1722                                         $chars[$key] = '&#'.$ord.';';
1723                                 }
1724                         }
1725                 }
1726
1727                 $text = implode('', $chars);
1728                 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1729
1730                 return $text;
1731         }
1732
1733         /**
1734          * Take the string $str and parse it into tokens, hashing embeded HTML,
1735          * escaped characters and handling code spans.
1736          * @param  string $str
1737          * @return string
1738          */
1739         protected function parseSpan($str) {
1740                 $output = '';
1741
1742                 $span_re = '{
1743                                 (
1744                                         \\\\'.$this->escape_chars_re.'
1745                                 |
1746                                         (?<![`\\\\])
1747                                         `+                                              # code span marker
1748                         '.( $this->no_markup ? '' : '
1749                                 |
1750                                         <!--    .*?     -->             # comment
1751                                 |
1752                                         <\?.*?\?> | <%.*?%>             # processing instruction
1753                                 |
1754                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1755                                         (?>
1756                                                 \s
1757                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1758                                         )?
1759                                         >
1760                                 |
1761                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1762                                 |
1763                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1764                         ').'
1765                                 )
1766                                 }xs';
1767
1768                 while (1) {
1769                         // Each loop iteration seach for either the next tag, the next
1770                         // openning code span marker, or the next escaped character.
1771                         // Each token is then passed to handleSpanToken.
1772                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1773
1774                         // Create token from text preceding tag.
1775                         if ($parts[0] != "") {
1776                                 $output .= $parts[0];
1777                         }
1778
1779                         // Check if we reach the end.
1780                         if (isset($parts[1])) {
1781                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1782                                 $str = $parts[2];
1783                         } else {
1784                                 break;
1785                         }
1786                 }
1787
1788                 return $output;
1789         }
1790
1791         /**
1792          * Handle $token provided by parseSpan by determining its nature and
1793          * returning the corresponding value that should replace it.
1794          * @param  string $token
1795          * @param  string &$str
1796          * @return string
1797          */
1798         protected function handleSpanToken($token, &$str) {
1799                 switch ($token[0]) {
1800                         case "\\":
1801                                 return $this->hashPart("&#". ord($token[1]). ";");
1802                         case "`":
1803                                 // Search for end marker in remaining text.
1804                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1805                                         $str, $matches))
1806                                 {
1807                                         $str = $matches[2];
1808                                         $codespan = $this->makeCodeSpan($matches[1]);
1809                                         return $this->hashPart($codespan);
1810                                 }
1811                                 return $token; // Return as text since no ending marker found.
1812                         default:
1813                                 return $this->hashPart($token);
1814                 }
1815         }
1816
1817         /**
1818          * Remove one level of line-leading tabs or spaces
1819          * @param  string $text
1820          * @return string
1821          */
1822         protected function outdent($text) {
1823                 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1824         }
1825
1826
1827         /**
1828          * String length function for detab. `_initDetab` will create a function to
1829          * handle UTF-8 if the default function does not exist.
1830          * @var string
1831          */
1832         protected $utf8_strlen = 'mb_strlen';
1833
1834         /**
1835          * Replace tabs with the appropriate amount of spaces.
1836          *
1837          * For each line we separate the line in blocks delemited by tab characters.
1838          * Then we reconstruct every line by adding the  appropriate number of space
1839          * between each blocks.
1840          *
1841          * @param  string $text
1842          * @return string
1843          */
1844         protected function detab($text) {
1845                 $text = preg_replace_callback('/^.*\t.*$/m',
1846                         array($this, '_detab_callback'), $text);
1847
1848                 return $text;
1849         }
1850
1851         /**
1852          * Replace tabs callback
1853          * @param  string $matches
1854          * @return string
1855          */
1856         protected function _detab_callback($matches) {
1857                 $line = $matches[0];
1858                 $strlen = $this->utf8_strlen; // strlen function for UTF-8.
1859
1860                 // Split in blocks.
1861                 $blocks = explode("\t", $line);
1862                 // Add each blocks to the line.
1863                 $line = $blocks[0];
1864                 unset($blocks[0]); // Do not add first block twice.
1865                 foreach ($blocks as $block) {
1866                         // Calculate amount of space, insert spaces, insert block.
1867                         $amount = $this->tab_width -
1868                                 $strlen($line, 'UTF-8') % $this->tab_width;
1869                         $line .= str_repeat(" ", $amount) . $block;
1870                 }
1871                 return $line;
1872         }
1873
1874         /**
1875          * Check for the availability of the function in the `utf8_strlen` property
1876          * (initially `mb_strlen`). If the function is not available, create a
1877          * function that will loosely count the number of UTF-8 characters with a
1878          * regular expression.
1879          * @return void
1880          */
1881         protected function _initDetab() {
1882
1883                 if (function_exists($this->utf8_strlen)) {
1884                         return;
1885                 }
1886
1887                 $this->utf8_strlen = function($text) {
1888                         return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1889                 };
1890         }
1891
1892         /**
1893          * Swap back in all the tags hashed by _HashHTMLBlocks.
1894          * @param  string $text
1895          * @return string
1896          */
1897         protected function unhash($text) {
1898                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1899                         array($this, '_unhash_callback'), $text);
1900         }
1901
1902         /**
1903          * Unhashing callback
1904          * @param  array $matches
1905          * @return string
1906          */
1907         protected function _unhash_callback($matches) {
1908                 return $this->html_hashes[$matches[0]];
1909         }
1910 }