lib/markdown/Markdown.php

   1 <?php
   2 /**
   3  * Markdown  -  A text-to-HTML conversion tool for web writers
   4  *
   5  * @package   php-markdown
   6  * @author    Michel Fortin <michel.fortin@michelf.com>
   7  * @copyright 2004-2016 Michel Fortin <https://michelf.com/projects/php-markdown/>
   8  * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
   9  */
  10
  11 namespace Michelf;
  12
  13 /**
  14  * Markdown Parser Class
  15  */
  16 class Markdown implements MarkdownInterface {
  17         /**
  18          * Define the package version
  19          * @var string
  20          */
  21         const MARKDOWNLIB_VERSION = "1.7.0";
  22
  23         /**
  24          * Simple function interface - Initialize the parser and return the result
  25          * of its transform method. This will work fine for derived classes too.
  26          *
  27          * @api
  28          *
  29          * @param  string $text
  30          * @return string
  31          */
  32         public static function defaultTransform($text) {
  33                 // Take parser class on which this function was called.
  34                 $parser_class = \get_called_class();
  35
  36                 // Try to take parser from the static parser list
  37                 static $parser_list;
  38                 $parser =& $parser_list[$parser_class];
  39
  40                 // Create the parser it not already set
  41                 if (!$parser) {
  42                         $parser = new $parser_class;
  43                 }
  44
  45                 // Transform text using parser.
  46                 return $parser->transform($text);
  47         }
  48
  49         /**
  50          * Configuration variables
  51          */
  52
  53         /**
  54          * Change to ">" for HTML output.
  55          * @var string
  56          */
  57         public $empty_element_suffix = " />";
  58
  59         /**
  60          * The width of indentation of the output markup
  61          * @var int
  62          */
  63         public $tab_width = 4;
  64
  65         /**
  66          * Change to `true` to disallow markup or entities.
  67          * @var boolean
  68          */
  69         public $no_markup   = false;
  70         public $no_entities = false;
  71
  72
  73         /**
  74          * Change to `true` to enable line breaks on \n without two trailling spaces
  75          * @var boolean
  76          */
  77         public $hard_wrap = false;
  78
  79         /**
  80          * Predefined URLs and titles for reference links and images.
  81          * @var array
  82          */
  83         public $predef_urls   = array();
  84         public $predef_titles = array();
  85
  86         /**
  87          * Optional filter function for URLs
  88          * @var callable
  89          */
  90         public $url_filter_func = null;
  91
  92         /**
  93          * Optional header id="" generation callback function.
  94          * @var callable
  95          */
  96         public $header_id_func = null;
  97
  98         /**
  99          * Optional function for converting code block content to HTML
 100          * @var callable
 101          */
 102         public $code_block_content_func = null;
 103
 104         /**
 105          * Optional function for converting code span content to HTML.
 106          * @var callable
 107          */
 108         public $code_span_content_func = null;
 109
 110         /**
 111          * Class attribute to toggle "enhanced ordered list" behaviour
 112          * setting this to true will allow ordered lists to start from the index
 113          * number that is defined first.
 114          *
 115          * For example:
 116          * 2. List item two
 117          * 3. List item three
 118          *
 119          * Becomes:
 120          * <ol start="2">
 121          * <li>List item two</li>
 122          * <li>List item three</li>
 123          * </ol>
 124          *
 125          * @var bool
 126          */
 127         public $enhanced_ordered_list = false;
 128
 129         /**
 130          * Parser implementation
 131          */
 132
 133         /**
 134          * Regex to match balanced [brackets].
 135          * Needed to insert a maximum bracked depth while converting to PHP.
 136          * @var int
 137          */
 138         protected $nested_brackets_depth = 6;
 139         protected $nested_brackets_re;
 140
 141         protected $nested_url_parenthesis_depth = 4;
 142         protected $nested_url_parenthesis_re;
 143
 144         /**
 145          * Table of hash values for escaped characters:
 146          * @var string
 147          */
 148         protected $escape_chars = '\`*_{}[]()>#+-.!';
 149         protected $escape_chars_re;
 150
 151         /**
 152          * Constructor function. Initialize appropriate member variables.
 153          * @return void
 154          */
 155         public function __construct() {
 156                 $this->_initDetab();
 157                 $this->prepareItalicsAndBold();
 158
 159                 $this->nested_brackets_re =
 160                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 161                         str_repeat('\])*', $this->nested_brackets_depth);
 162
 163                 $this->nested_url_parenthesis_re =
 164                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 165                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 166
 167                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 168
 169                 // Sort document, block, and span gamut in ascendent priority order.
 170                 asort($this->document_gamut);
 171                 asort($this->block_gamut);
 172                 asort($this->span_gamut);
 173         }
 174
 175
 176         /**
 177          * Internal hashes used during transformation.
 178          * @var array
 179          */
 180         protected $urls        = array();
 181         protected $titles      = array();
 182         protected $html_hashes = array();
 183
 184         /**
 185          * Status flag to avoid invalid nesting.
 186          * @var boolean
 187          */
 188         protected $in_anchor = false;
 189
 190         /**
 191          * Called before the transformation process starts to setup parser states.
 192          * @return void
 193          */
 194         protected function setup() {
 195                 // Clear global hashes.
 196                 $this->urls        = $this->predef_urls;
 197                 $this->titles      = $this->predef_titles;
 198                 $this->html_hashes = array();
 199                 $this->in_anchor   = false;
 200         }
 201
 202         /**
 203          * Called after the transformation process to clear any variable which may
 204          * be taking up memory unnecessarly.
 205          * @return void
 206          */
 207         protected function teardown() {
 208                 $this->urls        = array();
 209                 $this->titles      = array();
 210                 $this->html_hashes = array();
 211         }
 212
 213         /**
 214          * Main function. Performs some preprocessing on the input text and pass
 215          * it through the document gamut.
 216          *
 217          * @api
 218          *
 219          * @param  string $text
 220          * @return string
 221          */
 222         public function transform($text) {
 223                 $this->setup();
 224
 225                 # Remove UTF-8 BOM and marker character in input, if present.
 226                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 227
 228                 # Standardize line endings:
 229                 #   DOS to Unix and Mac to Unix
 230                 $text = preg_replace('{\r\n?}', "\n", $text);
 231
 232                 # Make sure $text ends with a couple of newlines:
 233                 $text .= "\n\n";
 234
 235                 # Convert all tabs to spaces.
 236                 $text = $this->detab($text);
 237
 238                 # Turn block-level HTML blocks into hash entries
 239                 $text = $this->hashHTMLBlocks($text);
 240
 241                 # Strip any lines consisting only of spaces and tabs.
 242                 # This makes subsequent regexen easier to write, because we can
 243                 # match consecutive blank lines with /\n+/ instead of something
 244                 # contorted like /[ ]*\n+/ .
 245                 $text = preg_replace('/^[ ]+$/m', '', $text);
 246
 247                 # Run document gamut methods.
 248                 foreach ($this->document_gamut as $method => $priority) {
 249                         $text = $this->$method($text);
 250                 }
 251
 252                 $this->teardown();
 253
 254                 return $text . "\n";
 255         }
 256
 257         /**
 258          * Define the document gamut
 259          * @var array
 260          */
 261         protected $document_gamut = array(
 262                 // Strip link definitions, store in hashes.
 263                 "stripLinkDefinitions" => 20,
 264                 "runBasicBlockGamut"   => 30,
 265         );
 266
 267         /**
 268          * Strips link definitions from text, stores the URLs and titles in
 269          * hash references
 270          * @param  string $text
 271          * @return string
 272          */
 273         protected function stripLinkDefinitions($text) {
 274
 275                 $less_than_tab = $this->tab_width - 1;
 276
 277                 // Link defs are in the form: ^[id]: url "optional title"
 278                 $text = preg_replace_callback('{
 279                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 280                                                           [ ]*
 281                                                           \n?                           # maybe *one* newline
 282                                                           [ ]*
 283                                                         (?:
 284                                                           <(.+?)>                       # url = $2
 285                                                         |
 286                                                           (\S+?)                        # url = $3
 287                                                         )
 288                                                           [ ]*
 289                                                           \n?                           # maybe one newline
 290                                                           [ ]*
 291                                                         (?:
 292                                                                 (?<=\s)                 # lookbehind for whitespace
 293                                                                 ["(]
 294                                                                 (.*?)                   # title = $4
 295                                                                 [")]
 296                                                                 [ ]*
 297                                                         )?      # title is optional
 298                                                         (?:\n+|\Z)
 299                         }xm',
 300                         array($this, '_stripLinkDefinitions_callback'),
 301                         $text
 302                 );
 303                 return $text;
 304         }
 305
 306         /**
 307          * The callback to strip link definitions
 308          * @param  array $matches
 309          * @return string
 310          */
 311         protected function _stripLinkDefinitions_callback($matches) {
 312                 $link_id = strtolower($matches[1]);
 313                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 314                 $this->urls[$link_id] = $url;
 315                 $this->titles[$link_id] =& $matches[4];
 316                 return ''; // String that will replace the block
 317         }
 318
 319         /**
 320          * Hashify HTML blocks
 321          * @param  string $text
 322          * @return string
 323          */
 324         protected function hashHTMLBlocks($text) {
 325                 if ($this->no_markup) {
 326                         return $text;
 327                 }
 328
 329                 $less_than_tab = $this->tab_width - 1;
 330
 331                 /**
 332                  * Hashify HTML blocks:
 333                  *
 334                  * We only want to do this for block-level HTML tags, such as headers,
 335                  * lists, and tables. That's because we still want to wrap <p>s around
 336                  * "paragraphs" that are wrapped in non-block-level tags, such as
 337                  * anchors, phrase emphasis, and spans. The list of tags we're looking
 338                  * for is hard-coded:
 339                  *
 340                  * *  List "a" is made of tags which can be both inline or block-level.
 341                  *    These will be treated block-level when the start tag is alone on
 342                  *    its line, otherwise they're not matched here and will be taken as
 343                  *    inline later.
 344                  * *  List "b" is made of tags which are always block-level;
 345                  */
 346                 $block_tags_a_re = 'ins|del';
 347                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 348                                                    'script|noscript|style|form|fieldset|iframe|math|svg|'.
 349                                                    'article|section|nav|aside|hgroup|header|footer|'.
 350                                                    'figure';
 351
 352                 // Regular expression for the content of a block tag.
 353                 $nested_tags_level = 4;
 354                 $attr = '
 355                         (?>                             # optional tag attributes
 356                           \s                    # starts with whitespace
 357                           (?>
 358                                 [^>"/]+         # text outside quotes
 359                           |
 360                                 /+(?!>)         # slash not followed by ">"
 361                           |
 362                                 "[^"]*"         # text inside double quotes (tolerate ">")
 363                           |
 364                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 365                           )*
 366                         )?
 367                         ';
 368                 $content =
 369                         str_repeat('
 370                                 (?>
 371                                   [^<]+                 # content without tag
 372                                 |
 373                                   <\2                   # nested opening tag
 374                                         '.$attr.'       # attributes
 375                                         (?>
 376                                           />
 377                                         |
 378                                           >', $nested_tags_level).      // end of opening tag
 379                                           '.*?'.                                        // last level nested tag content
 380                         str_repeat('
 381                                           </\2\s*>      # closing nested tag
 382                                         )
 383                                   |
 384                                         <(?!/\2\s*>     # other tags with a different name
 385                                   )
 386                                 )*',
 387                                 $nested_tags_level);
 388                 $content2 = str_replace('\2', '\3', $content);
 389
 390                 /**
 391                  * First, look for nested blocks, e.g.:
 392                  *      <div>
 393                  *              <div>
 394                  *              tags for inner block must be indented.
 395                  *              </div>
 396                  *      </div>
 397                  *
 398                  * The outermost tags must start at the left margin for this to match,
 399                  * and the inner nested divs must be indented.
 400                  * We need to do this before the next, more liberal match, because the
 401                  * next match will start at the first `<div>` and stop at the
 402                  * first `</div>`.
 403                  */
 404                 $text = preg_replace_callback('{(?>
 405                         (?>
 406                                 (?<=\n)                 # Starting on its own line
 407                                 |                               # or
 408                                 \A\n?                   # the at beginning of the doc
 409                         )
 410                         (                                               # save in $1
 411
 412                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 413                           # in between.
 414
 415                                                 [ ]{0,'.$less_than_tab.'}
 416                                                 <('.$block_tags_b_re.')# start tag = $2
 417                                                 '.$attr.'>                      # attributes followed by > and \n
 418                                                 '.$content.'            # content, support nesting
 419                                                 </\2>                           # the matching end tag
 420                                                 [ ]*                            # trailing spaces/tabs
 421                                                 (?=\n+|\Z)      # followed by a newline or end of document
 422
 423                         | # Special version for tags of group a.
 424
 425                                                 [ ]{0,'.$less_than_tab.'}
 426                                                 <('.$block_tags_a_re.')# start tag = $3
 427                                                 '.$attr.'>[ ]*\n        # attributes followed by >
 428                                                 '.$content2.'           # content, support nesting
 429                                                 </\3>                           # the matching end tag
 430                                                 [ ]*                            # trailing spaces/tabs
 431                                                 (?=\n+|\Z)      # followed by a newline or end of document
 432
 433                         | # Special case just for <hr />. It was easier to make a special
 434                           # case than to make the other regex more complicated.
 435
 436                                                 [ ]{0,'.$less_than_tab.'}
 437                                                 <(hr)                           # start tag = $2
 438                                                 '.$attr.'                       # attributes
 439                                                 /?>                                     # the matching end tag
 440                                                 [ ]*
 441                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 442
 443                         | # Special case for standalone HTML comments:
 444
 445                                         [ ]{0,'.$less_than_tab.'}
 446                                         (?s:
 447                                                 <!-- .*? -->
 448                                         )
 449                                         [ ]*
 450                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 451
 452                         | # PHP and ASP-style processor instructions (<? and <%)
 453
 454                                         [ ]{0,'.$less_than_tab.'}
 455                                         (?s:
 456                                                 <([?%])                 # $2
 457                                                 .*?
 458                                                 \2>
 459                                         )
 460                                         [ ]*
 461                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 462
 463                         )
 464                         )}Sxmi',
 465                         array($this, '_hashHTMLBlocks_callback'),
 466                         $text
 467                 );
 468
 469                 return $text;
 470         }
 471
 472         /**
 473          * The callback for hashing HTML blocks
 474          * @param  string $matches
 475          * @return string
 476          */
 477         protected function _hashHTMLBlocks_callback($matches) {
 478                 $text = $matches[1];
 479                 $key  = $this->hashBlock($text);
 480                 return "\n\n$key\n\n";
 481         }
 482
 483         /**
 484          * Called whenever a tag must be hashed when a function insert an atomic
 485          * element in the text stream. Passing $text to through this function gives
 486          * a unique text-token which will be reverted back when calling unhash.
 487          *
 488          * The $boundary argument specify what character should be used to surround
 489          * the token. By convension, "B" is used for block elements that needs not
 490          * to be wrapped into paragraph tags at the end, ":" is used for elements
 491          * that are word separators and "X" is used in the general case.
 492          *
 493          * @param  string $text
 494          * @param  string $boundary
 495          * @return string
 496          */
 497         protected function hashPart($text, $boundary = 'X') {
 498                 // Swap back any tag hash found in $text so we do not have to `unhash`
 499                 // multiple times at the end.
 500                 $text = $this->unhash($text);
 501
 502                 // Then hash the block.
 503                 static $i = 0;
 504                 $key = "$boundary\x1A" . ++$i . $boundary;
 505                 $this->html_hashes[$key] = $text;
 506                 return $key; // String that will replace the tag.
 507         }
 508
 509         /**
 510          * Shortcut function for hashPart with block-level boundaries.
 511          * @param  string $text
 512          * @return string
 513          */
 514         protected function hashBlock($text) {
 515                 return $this->hashPart($text, 'B');
 516         }
 517
 518         /**
 519          * Define the block gamut - these are all the transformations that form
 520          * block-level tags like paragraphs, headers, and list items.
 521          * @var array
 522          */
 523         protected $block_gamut = array(
 524                 "doHeaders"         => 10,
 525                 "doHorizontalRules" => 20,
 526                 "doLists"           => 40,
 527                 "doCodeBlocks"      => 50,
 528                 "doBlockQuotes"     => 60,
 529         );
 530
 531         /**
 532          * Run block gamut tranformations.
 533          *
 534          * We need to escape raw HTML in Markdown source before doing anything
 535          * else. This need to be done for each block, and not only at the
 536          * begining in the Markdown function since hashed blocks can be part of
 537          * list items and could have been indented. Indented blocks would have
 538          * been seen as a code block in a previous pass of hashHTMLBlocks.
 539          *
 540          * @param  string $text
 541          * @return string
 542          */
 543         protected function runBlockGamut($text) {
 544                 $text = $this->hashHTMLBlocks($text);
 545                 return $this->runBasicBlockGamut($text);
 546         }
 547
 548         /**
 549          * Run block gamut tranformations, without hashing HTML blocks. This is
 550          * useful when HTML blocks are known to be already hashed, like in the first
 551          * whole-document pass.
 552          *
 553          * @param  string $text
 554          * @return string
 555          */
 556         protected function runBasicBlockGamut($text) {
 557
 558                 foreach ($this->block_gamut as $method => $priority) {
 559                         $text = $this->$method($text);
 560                 }
 561
 562                 // Finally form paragraph and restore hashed blocks.
 563                 $text = $this->formParagraphs($text);
 564
 565                 return $text;
 566         }
 567
 568         /**
 569          * Convert horizontal rules
 570          * @param  string $text
 571          * @return string
 572          */
 573         protected function doHorizontalRules($text) {
 574                 return preg_replace(
 575                         '{
 576                                 ^[ ]{0,3}       # Leading space
 577                                 ([-*_])         # $1: First marker
 578                                 (?>                     # Repeated marker group
 579                                         [ ]{0,2}        # Zero, one, or two spaces.
 580                                         \1                      # Marker character
 581                                 ){2,}           # Group repeated at least twice
 582                                 [ ]*            # Tailing spaces
 583                                 $                       # End of line.
 584                         }mx',
 585                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 586                         $text
 587                 );
 588         }
 589
 590         /**
 591          * These are all the transformations that occur *within* block-level
 592          * tags like paragraphs, headers, and list items.
 593          * @var array
 594          */
 595         protected $span_gamut = array(
 596                 // Process character escapes, code spans, and inline HTML
 597                 // in one shot.
 598                 "parseSpan"           => -30,
 599                 // Process anchor and image tags. Images must come first,
 600                 // because ![foo][f] looks like an anchor.
 601                 "doImages"            =>  10,
 602                 "doAnchors"           =>  20,
 603                 // Make links out of things like `<https://example.com/>`
 604                 // Must come after doAnchors, because you can use < and >
 605                 // delimiters in inline links like [this](<url>).
 606                 "doAutoLinks"         =>  30,
 607                 "encodeAmpsAndAngles" =>  40,
 608                 "doItalicsAndBold"    =>  50,
 609                 "doHardBreaks"        =>  60,
 610         );
 611
 612         /**
 613          * Run span gamut transformations
 614          * @param  string $text
 615          * @return string
 616          */
 617         protected function runSpanGamut($text) {
 618                 foreach ($this->span_gamut as $method => $priority) {
 619                         $text = $this->$method($text);
 620                 }
 621
 622                 return $text;
 623         }
 624
 625         /**
 626          * Do hard breaks
 627          * @param  string $text
 628          * @return string
 629          */
 630         protected function doHardBreaks($text) {
 631                 if ($this->hard_wrap) {
 632                         return preg_replace_callback('/ *\n/',
 633                                 array($this, '_doHardBreaks_callback'), $text);
 634                 } else {
 635                         return preg_replace_callback('/ {2,}\n/',
 636                                 array($this, '_doHardBreaks_callback'), $text);
 637                 }
 638         }
 639
 640         /**
 641          * Trigger part hashing for the hard break (callback method)
 642          * @param  array $matches
 643          * @return string
 644          */
 645         protected function _doHardBreaks_callback($matches) {
 646                 return $this->hashPart("<br$this->empty_element_suffix\n");
 647         }
 648
 649         /**
 650          * Turn Markdown link shortcuts into XHTML <a> tags.
 651          * @param  string $text
 652          * @return string
 653          */
 654         protected function doAnchors($text) {
 655                 if ($this->in_anchor) {
 656                         return $text;
 657                 }
 658                 $this->in_anchor = true;
 659
 660                 // First, handle reference-style links: [link text] [id]
 661                 $text = preg_replace_callback('{
 662                         (                                       # wrap whole match in $1
 663                           \[
 664                                 ('.$this->nested_brackets_re.') # link text = $2
 665                           \]
 666
 667                           [ ]?                          # one optional space
 668                           (?:\n[ ]*)?           # one optional newline followed by spaces
 669
 670                           \[
 671                                 (.*?)           # id = $3
 672                           \]
 673                         )
 674                         }xs',
 675                         array($this, '_doAnchors_reference_callback'), $text);
 676
 677                 // Next, inline-style links: [link text](url "optional title")
 678                 $text = preg_replace_callback('{
 679                         (                               # wrap whole match in $1
 680                           \[
 681                                 ('.$this->nested_brackets_re.') # link text = $2
 682                           \]
 683                           \(                    # literal paren
 684                                 [ \n]*
 685                                 (?:
 686                                         <(.+?)> # href = $3
 687                                 |
 688                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 689                                 )
 690                                 [ \n]*
 691                                 (                       # $5
 692                                   ([\'"])       # quote char = $6
 693                                   (.*?)         # Title = $7
 694                                   \6            # matching quote
 695                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 696                                 )?                      # title is optional
 697                           \)
 698                         )
 699                         }xs',
 700                         array($this, '_doAnchors_inline_callback'), $text);
 701
 702                 // Last, handle reference-style shortcuts: [link text]
 703                 // These must come last in case you've also got [link text][1]
 704                 // or [link text](/foo)
 705                 $text = preg_replace_callback('{
 706                         (                                       # wrap whole match in $1
 707                           \[
 708                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 709                           \]
 710                         )
 711                         }xs',
 712                         array($this, '_doAnchors_reference_callback'), $text);
 713
 714                 $this->in_anchor = false;
 715                 return $text;
 716         }
 717
 718         /**
 719          * Callback method to parse referenced anchors
 720          * @param  string $matches
 721          * @return string
 722          */
 723         protected function _doAnchors_reference_callback($matches) {
 724                 $whole_match =  $matches[1];
 725                 $link_text   =  $matches[2];
 726                 $link_id     =& $matches[3];
 727
 728                 if ($link_id == "") {
 729                         // for shortcut links like [this][] or [this].
 730                         $link_id = $link_text;
 731                 }
 732
 733                 // lower-case and turn embedded newlines into spaces
 734                 $link_id = strtolower($link_id);
 735                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 736
 737                 if (isset($this->urls[$link_id])) {
 738                         $url = $this->urls[$link_id];
 739                         $url = $this->encodeURLAttribute($url);
 740
 741                         $result = "<a href=\"$url\"";
 742                         if ( isset( $this->titles[$link_id] ) ) {
 743                                 $title = $this->titles[$link_id];
 744                                 $title = $this->encodeAttribute($title);
 745                                 $result .=  " title=\"$title\"";
 746                         }
 747
 748                         $link_text = $this->runSpanGamut($link_text);
 749                         $result .= ">$link_text</a>";
 750                         $result = $this->hashPart($result);
 751                 } else {
 752                         $result = $whole_match;
 753                 }
 754                 return $result;
 755         }
 756
 757         /**
 758          * Callback method to parse inline anchors
 759          * @param  string $matches
 760          * @return string
 761          */
 762         protected function _doAnchors_inline_callback($matches) {
 763                 $whole_match    =  $matches[1];
 764                 $link_text              =  $this->runSpanGamut($matches[2]);
 765                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 766                 $title                  =& $matches[7];
 767
 768                 // If the URL was of the form <s p a c e s> it got caught by the HTML
 769                 // tag parser and hashed. Need to reverse the process before using
 770                 // the URL.
 771                 $unhashed = $this->unhash($url);
 772                 if ($unhashed != $url)
 773                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 774
 775                 $url = $this->encodeURLAttribute($url);
 776
 777                 $result = "<a href=\"$url\"";
 778                 if (isset($title)) {
 779                         $title = $this->encodeAttribute($title);
 780                         $result .=  " title=\"$title\"";
 781                 }
 782
 783                 $link_text = $this->runSpanGamut($link_text);
 784                 $result .= ">$link_text</a>";
 785
 786                 return $this->hashPart($result);
 787         }
 788
 789         /**
 790          * Turn Markdown image shortcuts into <img> tags.
 791          * @param  string $text
 792          * @return string
 793          */
 794         protected function doImages($text) {
 795                 // First, handle reference-style labeled images: ![alt text][id]
 796                 $text = preg_replace_callback('{
 797                         (                               # wrap whole match in $1
 798                           !\[
 799                                 ('.$this->nested_brackets_re.')         # alt text = $2
 800                           \]
 801
 802                           [ ]?                          # one optional space
 803                           (?:\n[ ]*)?           # one optional newline followed by spaces
 804
 805                           \[
 806                                 (.*?)           # id = $3
 807                           \]
 808
 809                         )
 810                         }xs',
 811                         array($this, '_doImages_reference_callback'), $text);
 812
 813                 // Next, handle inline images:  ![alt text](url "optional title")
 814                 // Don't forget: encode * and _
 815                 $text = preg_replace_callback('{
 816                         (                               # wrap whole match in $1
 817                           !\[
 818                                 ('.$this->nested_brackets_re.')         # alt text = $2
 819                           \]
 820                           \s?                   # One optional whitespace character
 821                           \(                    # literal paren
 822                                 [ \n]*
 823                                 (?:
 824                                         <(\S*)> # src url = $3
 825                                 |
 826                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 827                                 )
 828                                 [ \n]*
 829                                 (                       # $5
 830                                   ([\'"])       # quote char = $6
 831                                   (.*?)         # title = $7
 832                                   \6            # matching quote
 833                                   [ \n]*
 834                                 )?                      # title is optional
 835                           \)
 836                         )
 837                         }xs',
 838                         array($this, '_doImages_inline_callback'), $text);
 839
 840                 return $text;
 841         }
 842
 843         /**
 844          * Callback to parse references image tags
 845          * @param  array $matches
 846          * @return string
 847          */
 848         protected function _doImages_reference_callback($matches) {
 849                 $whole_match = $matches[1];
 850                 $alt_text    = $matches[2];
 851                 $link_id     = strtolower($matches[3]);
 852
 853                 if ($link_id == "") {
 854                         $link_id = strtolower($alt_text); // for shortcut links like ![this][].
 855                 }
 856
 857                 $alt_text = $this->encodeAttribute($alt_text);
 858                 if (isset($this->urls[$link_id])) {
 859                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
 860                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 861                         if (isset($this->titles[$link_id])) {
 862                                 $title = $this->titles[$link_id];
 863                                 $title = $this->encodeAttribute($title);
 864                                 $result .=  " title=\"$title\"";
 865                         }
 866                         $result .= $this->empty_element_suffix;
 867                         $result = $this->hashPart($result);
 868                 } else {
 869                         // If there's no such link ID, leave intact:
 870                         $result = $whole_match;
 871                 }
 872
 873                 return $result;
 874         }
 875
 876         /**
 877          * Callback to parse inline image tags
 878          * @param  array $matches
 879          * @return string
 880          */
 881         protected function _doImages_inline_callback($matches) {
 882                 $whole_match    = $matches[1];
 883                 $alt_text               = $matches[2];
 884                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 885                 $title                  =& $matches[7];
 886
 887                 $alt_text = $this->encodeAttribute($alt_text);
 888                 $url = $this->encodeURLAttribute($url);
 889                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 890                 if (isset($title)) {
 891                         $title = $this->encodeAttribute($title);
 892                         $result .=  " title=\"$title\""; // $title already quoted
 893                 }
 894                 $result .= $this->empty_element_suffix;
 895
 896                 return $this->hashPart($result);
 897         }
 898
 899         /**
 900          * Parse Markdown heading elements to HTML
 901          * @param  string $text
 902          * @return string
 903          */
 904         protected function doHeaders($text) {
 905                 /**
 906                  * Setext-style headers:
 907                  *        Header 1
 908                  *        ========
 909                  *
 910                  *        Header 2
 911                  *        --------
 912                  */
 913                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 914                         array($this, '_doHeaders_callback_setext'), $text);
 915
 916                 /**
 917                  * atx-style headers:
 918                  *   # Header 1
 919                  *   ## Header 2
 920                  *   ## Header 2 with closing hashes ##
 921                  *   ...
 922                  *   ###### Header 6
 923                  */
 924                 $text = preg_replace_callback('{
 925                                 ^(\#{1,6})      # $1 = string of #\'s
 926                                 [ ]*
 927                                 (.+?)           # $2 = Header text
 928                                 [ ]*
 929                                 \#*                     # optional closing #\'s (not counted)
 930                                 \n+
 931                         }xm',
 932                         array($this, '_doHeaders_callback_atx'), $text);
 933
 934                 return $text;
 935         }
 936
 937         /**
 938          * Setext header parsing callback
 939          * @param  array $matches
 940          * @return string
 941          */
 942         protected function _doHeaders_callback_setext($matches) {
 943                 // Terrible hack to check we haven't found an empty list item.
 944                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
 945                         return $matches[0];
 946                 }
 947
 948                 $level = $matches[2]{0} == '=' ? 1 : 2;
 949
 950                 // ID attribute generation
 951                 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
 952
 953                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
 954                 return "\n" . $this->hashBlock($block) . "\n\n";
 955         }
 956
 957         /**
 958          * ATX header parsing callback
 959          * @param  array $matches
 960          * @return string
 961          */
 962         protected function _doHeaders_callback_atx($matches) {
 963                 // ID attribute generation
 964                 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
 965
 966                 $level = strlen($matches[1]);
 967                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
 968                 return "\n" . $this->hashBlock($block) . "\n\n";
 969         }
 970
 971         /**
 972          * If a header_id_func property is set, we can use it to automatically
 973          * generate an id attribute.
 974          *
 975          * This method returns a string in the form id="foo", or an empty string
 976          * otherwise.
 977          * @param  string $headerValue
 978          * @return string
 979          */
 980         protected function _generateIdFromHeaderValue($headerValue) {
 981                 if (!is_callable($this->header_id_func)) {
 982                         return "";
 983                 }
 984
 985                 $idValue = call_user_func($this->header_id_func, $headerValue);
 986                 if (!$idValue) {
 987                         return "";
 988                 }
 989
 990                 return ' id="' . $this->encodeAttribute($idValue) . '"';
 991         }
 992
 993         /**
 994          * Form HTML ordered (numbered) and unordered (bulleted) lists.
 995          * @param  string $text
 996          * @return string
 997          */
 998         protected function doLists($text) {
 999                 $less_than_tab = $this->tab_width - 1;
1000
1001                 // Re-usable patterns to match list item bullets and number markers:
1002                 $marker_ul_re  = '[*+-]';
1003                 $marker_ol_re  = '\d+[\.]';
1004
1005                 $markers_relist = array(
1006                         $marker_ul_re => $marker_ol_re,
1007                         $marker_ol_re => $marker_ul_re,
1008                         );
1009
1010                 foreach ($markers_relist as $marker_re => $other_marker_re) {
1011                         // Re-usable pattern to match any entirel ul or ol list:
1012                         $whole_list_re = '
1013                                 (                                                               # $1 = whole list
1014                                   (                                                             # $2
1015                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
1016                                         ('.$marker_re.')                        # $4 = first list item marker
1017                                         [ ]+
1018                                   )
1019                                   (?s:.+?)
1020                                   (                                                             # $5
1021                                           \z
1022                                         |
1023                                           \n{2,}
1024                                           (?=\S)
1025                                           (?!                                           # Negative lookahead for another list item marker
1026                                                 [ ]*
1027                                                 '.$marker_re.'[ ]+
1028                                           )
1029                                         |
1030                                           (?=                                           # Lookahead for another kind of list
1031                                             \n
1032                                                 \3                                              # Must have the same indentation
1033                                                 '.$other_marker_re.'[ ]+
1034                                           )
1035                                   )
1036                                 )
1037                         '; // mx
1038
1039                         // We use a different prefix before nested lists than top-level lists.
1040                         //See extended comment in _ProcessListItems().
1041
1042                         if ($this->list_level) {
1043                                 $text = preg_replace_callback('{
1044                                                 ^
1045                                                 '.$whole_list_re.'
1046                                         }mx',
1047                                         array($this, '_doLists_callback'), $text);
1048                         } else {
1049                                 $text = preg_replace_callback('{
1050                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1051                                                 '.$whole_list_re.'
1052                                         }mx',
1053                                         array($this, '_doLists_callback'), $text);
1054                         }
1055                 }
1056
1057                 return $text;
1058         }
1059
1060         /**
1061          * List parsing callback
1062          * @param  array $matches
1063          * @return string
1064          */
1065         protected function _doLists_callback($matches) {
1066                 // Re-usable patterns to match list item bullets and number markers:
1067                 $marker_ul_re  = '[*+-]';
1068                 $marker_ol_re  = '\d+[\.]';
1069                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1070                 $marker_ol_start_re = '[0-9]+';
1071
1072                 $list = $matches[1];
1073                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1074
1075                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1076
1077                 $list .= "\n";
1078                 $result = $this->processListItems($list, $marker_any_re);
1079
1080                 $ol_start = 1;
1081                 if ($this->enhanced_ordered_list) {
1082                         // Get the start number for ordered list.
1083                         if ($list_type == 'ol') {
1084                                 $ol_start_array = array();
1085                                 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1086                                 if ($ol_start_check){
1087                                         $ol_start = $ol_start_array[0];
1088                                 }
1089                         }
1090                 }
1091
1092                 if ($ol_start > 1 && $list_type == 'ol'){
1093                         $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1094                 } else {
1095                         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1096                 }
1097                 return "\n". $result ."\n\n";
1098         }
1099
1100         /**
1101          * Nesting tracker for list levels
1102          * @var integer
1103          */
1104         protected $list_level = 0;
1105
1106         /**
1107          * Process the contents of a single ordered or unordered list, splitting it
1108          * into individual list items.
1109          * @param  string $list_str
1110          * @param  string $marker_any_re
1111          * @return string
1112          */
1113         protected function processListItems($list_str, $marker_any_re) {
1114                 /**
1115                  * The $this->list_level global keeps track of when we're inside a list.
1116                  * Each time we enter a list, we increment it; when we leave a list,
1117                  * we decrement. If it's zero, we're not in a list anymore.
1118                  *
1119                  * We do this because when we're not inside a list, we want to treat
1120                  * something like this:
1121                  *
1122                  *              I recommend upgrading to version
1123                  *              8. Oops, now this line is treated
1124                  *              as a sub-list.
1125                  *
1126                  * As a single paragraph, despite the fact that the second line starts
1127                  * with a digit-period-space sequence.
1128                  *
1129                  * Whereas when we're inside a list (or sub-list), that line will be
1130                  * treated as the start of a sub-list. What a kludge, huh? This is
1131                  * an aspect of Markdown's syntax that's hard to parse perfectly
1132                  * without resorting to mind-reading. Perhaps the solution is to
1133                  * change the syntax rules such that sub-lists must start with a
1134                  * starting cardinal number; e.g. "1." or "a.".
1135                  */
1136                 $this->list_level++;
1137
1138                 // Trim trailing blank lines:
1139                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1140
1141                 $list_str = preg_replace_callback('{
1142                         (\n)?                                                   # leading line = $1
1143                         (^[ ]*)                                                 # leading whitespace = $2
1144                         ('.$marker_any_re.'                             # list marker and space = $3
1145                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1146                         )
1147                         ((?s:.*?))                                              # list item text   = $4
1148                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1149                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1150                         }xm',
1151                         array($this, '_processListItems_callback'), $list_str);
1152
1153                 $this->list_level--;
1154                 return $list_str;
1155         }
1156
1157         /**
1158          * List item parsing callback
1159          * @param  array $matches
1160          * @return string
1161          */
1162         protected function _processListItems_callback($matches) {
1163                 $item = $matches[4];
1164                 $leading_line =& $matches[1];
1165                 $leading_space =& $matches[2];
1166                 $marker_space = $matches[3];
1167                 $tailing_blank_line =& $matches[5];
1168
1169                 if ($leading_line || $tailing_blank_line ||
1170                         preg_match('/\n{2,}/', $item))
1171                 {
1172                         // Replace marker with the appropriate whitespace indentation
1173                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1174                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1175                 } else {
1176                         // Recursion for sub-lists:
1177                         $item = $this->doLists($this->outdent($item));
1178                         $item = $this->formParagraphs($item, false);
1179                 }
1180
1181                 return "<li>" . $item . "</li>\n";
1182         }
1183
1184         /**
1185          * Process Markdown `<pre><code>` blocks.
1186          * @param  string $text
1187          * @return string
1188          */
1189         protected function doCodeBlocks($text) {
1190                 $text = preg_replace_callback('{
1191                                 (?:\n\n|\A\n?)
1192                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1193                                   (?>
1194                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1195                                         .*\n+
1196                                   )+
1197                                 )
1198                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1199                         }xm',
1200                         array($this, '_doCodeBlocks_callback'), $text);
1201
1202                 return $text;
1203         }
1204
1205         /**
1206          * Code block parsing callback
1207          * @param  array $matches
1208          * @return string
1209          */
1210         protected function _doCodeBlocks_callback($matches) {
1211                 $codeblock = $matches[1];
1212
1213                 $codeblock = $this->outdent($codeblock);
1214                 if ($this->code_block_content_func) {
1215                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1216                 } else {
1217                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1218                 }
1219
1220                 # trim leading newlines and trailing newlines
1221                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1222
1223                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1224                 return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1225         }
1226
1227         /**
1228          * Create a code span markup for $code. Called from handleSpanToken.
1229          * @param  string $code
1230          * @return string
1231          */
1232         protected function makeCodeSpan($code) {
1233                 if ($this->code_span_content_func) {
1234                         $code = call_user_func($this->code_span_content_func, $code);
1235                 } else {
1236                         $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1237                 }
1238                 return $this->hashPart("<code>$code</code>");
1239         }
1240
1241         /**
1242          * Define the emphasis operators with their regex matches
1243          * @var array
1244          */
1245         protected $em_relist = array(
1246                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1247                 '*' => '(?<![\s*])\*(?!\*)',
1248                 '_' => '(?<![\s_])_(?!_)',
1249         );
1250
1251         /**
1252          * Define the strong operators with their regex matches
1253          * @var array
1254          */
1255         protected $strong_relist = array(
1256                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1257                 '**' => '(?<![\s*])\*\*(?!\*)',
1258                 '__' => '(?<![\s_])__(?!_)',
1259         );
1260
1261         /**
1262          * Define the emphasis + strong operators with their regex matches
1263          * @var array
1264          */
1265         protected $em_strong_relist = array(
1266                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1267                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1268                 '___' => '(?<![\s_])___(?!_)',
1269         );
1270
1271         /**
1272          * Container for prepared regular expressions
1273          * @var array
1274          */
1275         protected $em_strong_prepared_relist;
1276
1277         /**
1278          * Prepare regular expressions for searching emphasis tokens in any
1279          * context.
1280          * @return void
1281          */
1282         protected function prepareItalicsAndBold() {
1283                 foreach ($this->em_relist as $em => $em_re) {
1284                         foreach ($this->strong_relist as $strong => $strong_re) {
1285                                 // Construct list of allowed token expressions.
1286                                 $token_relist = array();
1287                                 if (isset($this->em_strong_relist["$em$strong"])) {
1288                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1289                                 }
1290                                 $token_relist[] = $em_re;
1291                                 $token_relist[] = $strong_re;
1292
1293                                 // Construct master expression from list.
1294                                 $token_re = '{(' . implode('|', $token_relist) . ')}';
1295                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1296                         }
1297                 }
1298         }
1299
1300         /**
1301          * Convert Markdown italics (emphasis) and bold (strong) to HTML
1302          * @param  string $text
1303          * @return string
1304          */
1305         protected function doItalicsAndBold($text) {
1306                 $token_stack = array('');
1307                 $text_stack = array('');
1308                 $em = '';
1309                 $strong = '';
1310                 $tree_char_em = false;
1311
1312                 while (1) {
1313                         // Get prepared regular expression for seraching emphasis tokens
1314                         // in current context.
1315                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1316
1317                         // Each loop iteration search for the next emphasis token.
1318                         // Each token is then passed to handleSpanToken.
1319                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1320                         $text_stack[0] .= $parts[0];
1321                         $token =& $parts[1];
1322                         $text =& $parts[2];
1323
1324                         if (empty($token)) {
1325                                 // Reached end of text span: empty stack without emitting.
1326                                 // any more emphasis.
1327                                 while ($token_stack[0]) {
1328                                         $text_stack[1] .= array_shift($token_stack);
1329                                         $text_stack[0] .= array_shift($text_stack);
1330                                 }
1331                                 break;
1332                         }
1333
1334                         $token_len = strlen($token);
1335                         if ($tree_char_em) {
1336                                 // Reached closing marker while inside a three-char emphasis.
1337                                 if ($token_len == 3) {
1338                                         // Three-char closing marker, close em and strong.
1339                                         array_shift($token_stack);
1340                                         $span = array_shift($text_stack);
1341                                         $span = $this->runSpanGamut($span);
1342                                         $span = "<strong><em>$span</em></strong>";
1343                                         $text_stack[0] .= $this->hashPart($span);
1344                                         $em = '';
1345                                         $strong = '';
1346                                 } else {
1347                                         // Other closing marker: close one em or strong and
1348                                         // change current token state to match the other
1349                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1350                                         $tag = $token_len == 2 ? "strong" : "em";
1351                                         $span = $text_stack[0];
1352                                         $span = $this->runSpanGamut($span);
1353                                         $span = "<$tag>$span</$tag>";
1354                                         $text_stack[0] = $this->hashPart($span);
1355                                         $$tag = ''; // $$tag stands for $em or $strong
1356                                 }
1357                                 $tree_char_em = false;
1358                         } else if ($token_len == 3) {
1359                                 if ($em) {
1360                                         // Reached closing marker for both em and strong.
1361                                         // Closing strong marker:
1362                                         for ($i = 0; $i < 2; ++$i) {
1363                                                 $shifted_token = array_shift($token_stack);
1364                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1365                                                 $span = array_shift($text_stack);
1366                                                 $span = $this->runSpanGamut($span);
1367                                                 $span = "<$tag>$span</$tag>";
1368                                                 $text_stack[0] .= $this->hashPart($span);
1369                                                 $$tag = ''; // $$tag stands for $em or $strong
1370                                         }
1371                                 } else {
1372                                         // Reached opening three-char emphasis marker. Push on token
1373                                         // stack; will be handled by the special condition above.
1374                                         $em = $token{0};
1375                                         $strong = "$em$em";
1376                                         array_unshift($token_stack, $token);
1377                                         array_unshift($text_stack, '');
1378                                         $tree_char_em = true;
1379                                 }
1380                         } else if ($token_len == 2) {
1381                                 if ($strong) {
1382                                         // Unwind any dangling emphasis marker:
1383                                         if (strlen($token_stack[0]) == 1) {
1384                                                 $text_stack[1] .= array_shift($token_stack);
1385                                                 $text_stack[0] .= array_shift($text_stack);
1386                                         }
1387                                         // Closing strong marker:
1388                                         array_shift($token_stack);
1389                                         $span = array_shift($text_stack);
1390                                         $span = $this->runSpanGamut($span);
1391                                         $span = "<strong>$span</strong>";
1392                                         $text_stack[0] .= $this->hashPart($span);
1393                                         $strong = '';
1394                                 } else {
1395                                         array_unshift($token_stack, $token);
1396                                         array_unshift($text_stack, '');
1397                                         $strong = $token;
1398                                 }
1399                         } else {
1400                                 // Here $token_len == 1
1401                                 if ($em) {
1402                                         if (strlen($token_stack[0]) == 1) {
1403                                                 // Closing emphasis marker:
1404                                                 array_shift($token_stack);
1405                                                 $span = array_shift($text_stack);
1406                                                 $span = $this->runSpanGamut($span);
1407                                                 $span = "<em>$span</em>";
1408                                                 $text_stack[0] .= $this->hashPart($span);
1409                                                 $em = '';
1410                                         } else {
1411                                                 $text_stack[0] .= $token;
1412                                         }
1413                                 } else {
1414                                         array_unshift($token_stack, $token);
1415                                         array_unshift($text_stack, '');
1416                                         $em = $token;
1417                                 }
1418                         }
1419                 }
1420                 return $text_stack[0];
1421         }
1422
1423         /**
1424          * Parse Markdown blockquotes to HTML
1425          * @param  string $text
1426          * @return string
1427          */
1428         protected function doBlockQuotes($text) {
1429                 $text = preg_replace_callback('/
1430                           (                                                             # Wrap whole match in $1
1431                                 (?>
1432                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1433                                         .+\n                                    # rest of the first line
1434                                   (.+\n)*                                       # subsequent consecutive lines
1435                                   \n*                                           # blanks
1436                                 )+
1437                           )
1438                         /xm',
1439                         array($this, '_doBlockQuotes_callback'), $text);
1440
1441                 return $text;
1442         }
1443
1444         /**
1445          * Blockquote parsing callback
1446          * @param  array $matches
1447          * @return string
1448          */
1449         protected function _doBlockQuotes_callback($matches) {
1450                 $bq = $matches[1];
1451                 // trim one level of quoting - trim whitespace-only lines
1452                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1453                 $bq = $this->runBlockGamut($bq); // recurse
1454
1455                 $bq = preg_replace('/^/m', "  ", $bq);
1456                 // These leading spaces cause problem with <pre> content,
1457                 // so we need to fix that:
1458                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1459                         array($this, '_doBlockQuotes_callback2'), $bq);
1460
1461                 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1462         }
1463
1464         /**
1465          * Blockquote parsing callback
1466          * @param  array $matches
1467          * @return string
1468          */
1469         protected function _doBlockQuotes_callback2($matches) {
1470                 $pre = $matches[1];
1471                 $pre = preg_replace('/^  /m', '', $pre);
1472                 return $pre;
1473         }
1474
1475         /**
1476          * Parse paragraphs
1477          *
1478          * @param  string $text String to process in paragraphs
1479          * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1480          * @return string
1481          */
1482         protected function formParagraphs($text, $wrap_in_p = true) {
1483                 // Strip leading and trailing lines:
1484                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1485
1486                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1487
1488                 // Wrap <p> tags and unhashify HTML blocks
1489                 foreach ($grafs as $key => $value) {
1490                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1491                                 // Is a paragraph.
1492                                 $value = $this->runSpanGamut($value);
1493                                 if ($wrap_in_p) {
1494                                         $value = preg_replace('/^([ ]*)/', "<p>", $value);
1495                                         $value .= "</p>";
1496                                 }
1497                                 $grafs[$key] = $this->unhash($value);
1498                         } else {
1499                                 // Is a block.
1500                                 // Modify elements of @grafs in-place...
1501                                 $graf = $value;
1502                                 $block = $this->html_hashes[$graf];
1503                                 $graf = $block;
1504 //                              if (preg_match('{
1505 //                                      \A
1506 //                                      (                                                       # $1 = <div> tag
1507 //                                        <div  \s+
1508 //                                        [^>]*
1509 //                                        \b
1510 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1511 //                                        1
1512 //                                        \2
1513 //                                        [^>]*
1514 //                                        >
1515 //                                      )
1516 //                                      (                                                       # $3 = contents
1517 //                                      .*
1518 //                                      )
1519 //                                      (</div>)                                        # $4 = closing tag
1520 //                                      \z
1521 //                                      }xs', $block, $matches))
1522 //                              {
1523 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1524 //
1525 //                                      // We can't call Markdown(), because that resets the hash;
1526 //                                      // that initialization code should be pulled into its own sub, though.
1527 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1528 //
1529 //                                      // Run document gamut methods on the content.
1530 //                                      foreach ($this->document_gamut as $method => $priority) {
1531 //                                              $div_content = $this->$method($div_content);
1532 //                                      }
1533 //
1534 //                                      $div_open = preg_replace(
1535 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1536 //
1537 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1538 //                              }
1539                                 $grafs[$key] = $graf;
1540                         }
1541                 }
1542
1543                 return implode("\n\n", $grafs);
1544         }
1545
1546         /**
1547          * Encode text for a double-quoted HTML attribute. This function
1548          * is *not* suitable for attributes enclosed in single quotes.
1549          * @param  string $text
1550          * @return string
1551          */
1552         protected function encodeAttribute($text) {
1553                 $text = $this->encodeAmpsAndAngles($text);
1554                 $text = str_replace('"', '&quot;', $text);
1555                 return $text;
1556         }
1557
1558         /**
1559          * Encode text for a double-quoted HTML attribute containing a URL,
1560          * applying the URL filter if set. Also generates the textual
1561          * representation for the URL (removing mailto: or tel:) storing it in $text.
1562          * This function is *not* suitable for attributes enclosed in single quotes.
1563          *
1564          * @param  string $url
1565          * @param  string &$text Passed by reference
1566          * @return string        URL
1567          */
1568         protected function encodeURLAttribute($url, &$text = null) {
1569                 if ($this->url_filter_func) {
1570                         $url = call_user_func($this->url_filter_func, $url);
1571                 }
1572
1573                 if (preg_match('{^mailto:}i', $url)) {
1574                         $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1575                 } else if (preg_match('{^tel:}i', $url)) {
1576                         $url = $this->encodeAttribute($url);
1577                         $text = substr($url, 4);
1578                 } else {
1579                         $url = $this->encodeAttribute($url);
1580                         $text = $url;
1581                 }
1582
1583                 return $url;
1584         }
1585
1586         /**
1587          * Smart processing for ampersands and angle brackets that need to
1588          * be encoded. Valid character entities are left alone unless the
1589          * no-entities mode is set.
1590          * @param  string $text
1591          * @return string
1592          */
1593         protected function encodeAmpsAndAngles($text) {
1594                 if ($this->no_entities) {
1595                         $text = str_replace('&', '&amp;', $text);
1596                 } else {
1597                         // Ampersand-encoding based entirely on Nat Irons's Amputator
1598                         // MT plugin: <http://bumppo.net/projects/amputator/>
1599                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1600                                                                 '&amp;', $text);
1601                 }
1602                 // Encode remaining <'s
1603                 $text = str_replace('<', '&lt;', $text);
1604
1605                 return $text;
1606         }
1607
1608         /**
1609          * Parse Markdown automatic links to anchor HTML tags
1610          * @param  string $text
1611          * @return string
1612          */
1613         protected function doAutoLinks($text) {
1614                 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1615                         array($this, '_doAutoLinks_url_callback'), $text);
1616
1617                 // Email addresses: <address@domain.foo>
1618                 $text = preg_replace_callback('{
1619                         <
1620                         (?:mailto:)?
1621                         (
1622                                 (?:
1623                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1624                                 |
1625                                         ".*?"
1626                                 )
1627                                 \@
1628                                 (?:
1629                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1630                                 |
1631                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1632                                 )
1633                         )
1634                         >
1635                         }xi',
1636                         array($this, '_doAutoLinks_email_callback'), $text);
1637
1638                 return $text;
1639         }
1640
1641         /**
1642          * Parse URL callback
1643          * @param  array $matches
1644          * @return string
1645          */
1646         protected function _doAutoLinks_url_callback($matches) {
1647                 $url = $this->encodeURLAttribute($matches[1], $text);
1648                 $link = "<a href=\"$url\">$text</a>";
1649                 return $this->hashPart($link);
1650         }
1651
1652         /**
1653          * Parse email address callback
1654          * @param  array $matches
1655          * @return string
1656          */
1657         protected function _doAutoLinks_email_callback($matches) {
1658                 $addr = $matches[1];
1659                 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1660                 $link = "<a href=\"$url\">$text</a>";
1661                 return $this->hashPart($link);
1662         }
1663
1664         /**
1665          * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1666          *
1667          * Output: the same text but with most characters encoded as either a
1668          *         decimal or hex entity, in the hopes of foiling most address
1669          *         harvesting spam bots. E.g.:
1670          *
1671          *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1672          *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1673          *        &#x6d;
1674          *
1675          * Note: the additional output $tail is assigned the same value as the
1676          * ouput, minus the number of characters specified by $head_length.
1677          *
1678          * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1679          * With some optimizations by Milian Wolff. Forced encoding of HTML
1680          * attribute special characters by Allan Odgaard.
1681          *
1682          * @param  string  $text
1683          * @param  string  &$tail
1684          * @param  integer $head_length
1685          * @return string
1686          */
1687         protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1688                 if ($text == "") {
1689                         return $tail = "";
1690                 }
1691
1692                 $chars = preg_split('/(?<!^)(?!$)/', $text);
1693                 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1694
1695                 foreach ($chars as $key => $char) {
1696                         $ord = ord($char);
1697                         // Ignore non-ascii chars.
1698                         if ($ord < 128) {
1699                                 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1700                                 // roughly 10% raw, 45% hex, 45% dec
1701                                 // '@' *must* be encoded. I insist.
1702                                 // '"' and '>' have to be encoded inside the attribute
1703                                 if ($r > 90 && strpos('@"&>', $char) === false) {
1704                                         /* do nothing */
1705                                 } else if ($r < 45) {
1706                                         $chars[$key] = '&#x'.dechex($ord).';';
1707                                 } else {
1708                                         $chars[$key] = '&#'.$ord.';';
1709                                 }
1710                         }
1711                 }
1712
1713                 $text = implode('', $chars);
1714                 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1715
1716                 return $text;
1717         }
1718
1719         /**
1720          * Take the string $str and parse it into tokens, hashing embeded HTML,
1721          * escaped characters and handling code spans.
1722          * @param  string $str
1723          * @return string
1724          */
1725         protected function parseSpan($str) {
1726                 $output = '';
1727
1728                 $span_re = '{
1729                                 (
1730                                         \\\\'.$this->escape_chars_re.'
1731                                 |
1732                                         (?<![`\\\\])
1733                                         `+                                              # code span marker
1734                         '.( $this->no_markup ? '' : '
1735                                 |
1736                                         <!--    .*?     -->             # comment
1737                                 |
1738                                         <\?.*?\?> | <%.*?%>             # processing instruction
1739                                 |
1740                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1741                                         (?>
1742                                                 \s
1743                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1744                                         )?
1745                                         >
1746                                 |
1747                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1748                                 |
1749                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1750                         ').'
1751                                 )
1752                                 }xs';
1753
1754                 while (1) {
1755                         // Each loop iteration seach for either the next tag, the next
1756                         // openning code span marker, or the next escaped character.
1757                         // Each token is then passed to handleSpanToken.
1758                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1759
1760                         // Create token from text preceding tag.
1761                         if ($parts[0] != "") {
1762                                 $output .= $parts[0];
1763                         }
1764
1765                         // Check if we reach the end.
1766                         if (isset($parts[1])) {
1767                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1768                                 $str = $parts[2];
1769                         } else {
1770                                 break;
1771                         }
1772                 }
1773
1774                 return $output;
1775         }
1776
1777         /**
1778          * Handle $token provided by parseSpan by determining its nature and
1779          * returning the corresponding value that should replace it.
1780          * @param  string $token
1781          * @param  string &$str
1782          * @return string
1783          */
1784         protected function handleSpanToken($token, &$str) {
1785                 switch ($token{0}) {
1786                         case "\\":
1787                                 return $this->hashPart("&#". ord($token{1}). ";");
1788                         case "`":
1789                                 // Search for end marker in remaining text.
1790                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1791                                         $str, $matches))
1792                                 {
1793                                         $str = $matches[2];
1794                                         $codespan = $this->makeCodeSpan($matches[1]);
1795                                         return $this->hashPart($codespan);
1796                                 }
1797                                 return $token; // Return as text since no ending marker found.
1798                         default:
1799                                 return $this->hashPart($token);
1800                 }
1801         }
1802
1803         /**
1804          * Remove one level of line-leading tabs or spaces
1805          * @param  string $text
1806          * @return string
1807          */
1808         protected function outdent($text) {
1809                 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1810         }
1811
1812
1813         /**
1814          * String length function for detab. `_initDetab` will create a function to
1815          * handle UTF-8 if the default function does not exist.
1816          * @var string
1817          */
1818         protected $utf8_strlen = 'mb_strlen';
1819
1820         /**
1821          * Replace tabs with the appropriate amount of spaces.
1822          *
1823          * For each line we separate the line in blocks delemited by tab characters.
1824          * Then we reconstruct every line by adding the  appropriate number of space
1825          * between each blocks.
1826          *
1827          * @param  string $text
1828          * @return string
1829          */
1830         protected function detab($text) {
1831                 $text = preg_replace_callback('/^.*\t.*$/m',
1832                         array($this, '_detab_callback'), $text);
1833
1834                 return $text;
1835         }
1836
1837         /**
1838          * Replace tabs callback
1839          * @param  string $matches
1840          * @return string
1841          */
1842         protected function _detab_callback($matches) {
1843                 $line = $matches[0];
1844                 $strlen = $this->utf8_strlen; // strlen function for UTF-8.
1845
1846                 // Split in blocks.
1847                 $blocks = explode("\t", $line);
1848                 // Add each blocks to the line.
1849                 $line = $blocks[0];
1850                 unset($blocks[0]); // Do not add first block twice.
1851                 foreach ($blocks as $block) {
1852                         // Calculate amount of space, insert spaces, insert block.
1853                         $amount = $this->tab_width -
1854                                 $strlen($line, 'UTF-8') % $this->tab_width;
1855                         $line .= str_repeat(" ", $amount) . $block;
1856                 }
1857                 return $line;
1858         }
1859
1860         /**
1861          * Check for the availability of the function in the `utf8_strlen` property
1862          * (initially `mb_strlen`). If the function is not available, create a
1863          * function that will loosely count the number of UTF-8 characters with a
1864          * regular expression.
1865          * @return void
1866          */
1867         protected function _initDetab() {
1868
1869                 if (function_exists($this->utf8_strlen)) {
1870                         return;
1871                 }
1872
1873                 $this->utf8_strlen = function($text) {
1874                         return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1875                 };
1876         }
1877
1878         /**
1879          * Swap back in all the tags hashed by _HashHTMLBlocks.
1880          * @param  string $text
1881          * @return string
1882          */
1883         protected function unhash($text) {
1884                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1885                         array($this, '_unhash_callback'), $text);
1886         }
1887
1888         /**
1889          * Unhashing callback
1890          * @param  array $matches
1891          * @return string
1892          */
1893         protected function _unhash_callback($matches) {
1894                 return $this->html_hashes[$matches[0]];
1895         }
1896 }