lib/markdown.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown & Extra
   6 # Copyright (c) 2004-2009 Michel Fortin
   7 # <http://michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13
  14
  15 define( 'MARKDOWN_VERSION',  "1.0.1n" ); # Sat 10 Oct 2009
  16 define( 'MARKDOWNEXTRA_VERSION',  "1.2.4" ); # Sat 10 Oct 2009
  17
  18
  19 #
  20 # Global default settings:
  21 #
  22
  23 # Change to ">" for HTML output
  24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  25
  26 # Define the width of a tab for code blocks.
  27 @define( 'MARKDOWN_TAB_WIDTH',     4 );
  28
  29 # Optional title attribute for footnote links and backlinks.
  30 @define( 'MARKDOWN_FN_LINK_TITLE',         "" );
  31 @define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
  32
  33 # Optional class attribute for footnote links and backlinks.
  34 @define( 'MARKDOWN_FN_LINK_CLASS',         "" );
  35 @define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
  36
  37
  38 #
  39 # WordPress settings:
  40 #
  41
  42 # Change to false to remove Markdown from posts and/or comments.
  43 @define( 'MARKDOWN_WP_POSTS',      true );
  44 @define( 'MARKDOWN_WP_COMMENTS',   true );
  45
  46
  47
  48 ### Standard Function Interface ###
  49
  50 @define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
  51
  52 function Markdown($text) {
  53 #
  54 # Initialize the parser and return the result of its transform method.
  55 #
  56         # Setup static parser variable.
  57         static $parser;
  58         if (!isset($parser)) {
  59                 $parser_class = MARKDOWN_PARSER_CLASS;
  60                 $parser = new $parser_class;
  61         }
  62
  63         # Transform text using parser.
  64         return $parser->transform($text);
  65 }
  66
  67
  68 ### WordPress Plugin Interface ###
  69
  70 /*
  71 Plugin Name: Markdown Extra
  72 Plugin URI: http://michelf.com/projects/php-markdown/
  73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
  74 Version: 1.2.4
  75 Author: Michel Fortin
  76 Author URI: http://michelf.com/
  77 */
  78
  79 if (isset($wp_version)) {
  80         # More details about how it works here:
  81         # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  82
  83         # Post content and excerpts
  84         # - Remove WordPress paragraph generator.
  85         # - Run Markdown on excerpt, then remove all tags.
  86         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  87         if (MARKDOWN_WP_POSTS) {
  88                 remove_filter('the_content',     'wpautop');
  89         remove_filter('the_content_rss', 'wpautop');
  90                 remove_filter('the_excerpt',     'wpautop');
  91                 add_filter('the_content',     'mdwp_MarkdownPost', 6);
  92         add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
  93                 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
  94                 add_filter('get_the_excerpt', 'trim', 7);
  95                 add_filter('the_excerpt',     'mdwp_add_p');
  96                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
  97
  98                 remove_filter('content_save_pre',  'balanceTags', 50);
  99                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
 100                 add_filter('the_content',         'balanceTags', 50);
 101                 add_filter('get_the_excerpt', 'balanceTags', 9);
 102         }
 103
 104         # Add a footnote id prefix to posts when inside a loop.
 105         function mdwp_MarkdownPost($text) {
 106                 static $parser;
 107                 if (!$parser) {
 108                         $parser_class = MARKDOWN_PARSER_CLASS;
 109                         $parser = new $parser_class;
 110                 }
 111                 if (is_single() || is_page() || is_feed()) {
 112                         $parser->fn_id_prefix = "";
 113                 } else {
 114                         $parser->fn_id_prefix = get_the_ID() . ".";
 115                 }
 116                 return $parser->transform($text);
 117         }
 118
 119         # Comments
 120         # - Remove WordPress paragraph generator.
 121         # - Remove WordPress auto-link generator.
 122         # - Scramble important tags before passing them to the kses filter.
 123         # - Run Markdown on excerpt then remove paragraph tags.
 124         if (MARKDOWN_WP_COMMENTS) {
 125                 remove_filter('comment_text', 'wpautop', 30);
 126                 remove_filter('comment_text', 'make_clickable');
 127                 add_filter('pre_comment_content', 'Markdown', 6);
 128                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 129                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 130                 add_filter('get_comment_text',    'Markdown', 6);
 131                 add_filter('get_comment_excerpt', 'Markdown', 6);
 132                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 133
 134                 global $mdwp_hidden_tags, $mdwp_placeholders;
 135                 $mdwp_hidden_tags = explode(' ',
 136                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
 137                 $mdwp_placeholders = explode(' ', str_rot13(
 138                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
 139                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
 140         }
 141
 142         function mdwp_add_p($text) {
 143                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 144                         $text = '<p>'.$text.'</p>';
 145                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 146                 }
 147                 return $text;
 148         }
 149
 150         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 151
 152         function mdwp_hide_tags($text) {
 153                 global $mdwp_hidden_tags, $mdwp_placeholders;
 154                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
 155         }
 156         function mdwp_show_tags($text) {
 157                 global $mdwp_hidden_tags, $mdwp_placeholders;
 158                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
 159         }
 160 }
 161
 162
 163 ### bBlog Plugin Info ###
 164
 165 function identify_modifier_markdown() {
 166         return array(
 167                 'name' => 'markdown',
 168                 'type' => 'modifier',
 169                 'nicename' => 'PHP Markdown Extra',
 170                 'description' => 'A text-to-HTML conversion tool for web writers',
 171                 'authors' => 'Michel Fortin and John Gruber',
 172                 'licence' => 'GPL',
 173                 'version' => MARKDOWNEXTRA_VERSION,
 174                 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>',
 175                 );
 176 }
 177
 178
 179 ### Smarty Modifier Interface ###
 180
 181 function smarty_modifier_markdown($text) {
 182         return Markdown($text);
 183 }
 184
 185
 186 ### Textile Compatibility Mode ###
 187
 188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
 189
 190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 191         # Try to include PHP SmartyPants. Should be in the same directory.
 192         @include_once 'smartypants.php';
 193         # Fake Textile class. It calls Markdown instead.
 194         class Textile {
 195                 function TextileThis($text, $lite='', $encode='') {
 196                         if ($lite == '' && $encode == '')    $text = Markdown($text);
 197                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 198                         return $text;
 199                 }
 200                 # Fake restricted version: restrictions are not supported for now.
 201                 function TextileRestricted($text, $lite='', $noimage='') {
 202                         return $this->TextileThis($text, $lite);
 203                 }
 204                 # Workaround to ensure compatibility with TextPattern 4.0.3.
 205                 function blockLite($text) { return $text; }
 206         }
 207 }
 208
 209
 210
 211 #
 212 # Markdown Parser Class
 213 #
 214
 215 class Markdown_Parser {
 216
 217         # Regex to match balanced [brackets].
 218         # Needed to insert a maximum bracked depth while converting to PHP.
 219         var $nested_brackets_depth = 6;
 220         var $nested_brackets_re;
 221
 222         var $nested_url_parenthesis_depth = 4;
 223         var $nested_url_parenthesis_re;
 224
 225         # Table of hash values for escaped characters:
 226         var $escape_chars = '\`*_{}[]()>#+-.!';
 227         var $escape_chars_re;
 228
 229         # Change to ">" for HTML output.
 230         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 231         var $tab_width = MARKDOWN_TAB_WIDTH;
 232
 233         # Change to `true` to disallow markup or entities.
 234         var $no_markup = false;
 235         var $no_entities = false;
 236
 237         # Predefined urls and titles for reference links and images.
 238         var $predef_urls = array();
 239         var $predef_titles = array();
 240
 241
 242         function Markdown_Parser() {
 243         #
 244         # Constructor function. Initialize appropriate member variables.
 245         #
 246                 $this->_initDetab();
 247                 $this->prepareItalicsAndBold();
 248
 249                 $this->nested_brackets_re =
 250                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 251                         str_repeat('\])*', $this->nested_brackets_depth);
 252
 253                 $this->nested_url_parenthesis_re =
 254                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 255                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 256
 257                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 258
 259                 # Sort document, block, and span gamut in ascendent priority order.
 260                 asort($this->document_gamut);
 261                 asort($this->block_gamut);
 262                 asort($this->span_gamut);
 263         }
 264
 265
 266         # Internal hashes used during transformation.
 267         var $urls = array();
 268         var $titles = array();
 269         var $html_hashes = array();
 270
 271         # Status flag to avoid invalid nesting.
 272         var $in_anchor = false;
 273
 274
 275         function setup() {
 276         #
 277         # Called before the transformation process starts to setup parser
 278         # states.
 279         #
 280                 # Clear global hashes.
 281                 $this->urls = $this->predef_urls;
 282                 $this->titles = $this->predef_titles;
 283                 $this->html_hashes = array();
 284
 285                 $in_anchor = false;
 286         }
 287
 288         function teardown() {
 289         #
 290         # Called after the transformation process to clear any variable
 291         # which may be taking up memory unnecessarly.
 292         #
 293                 $this->urls = array();
 294                 $this->titles = array();
 295                 $this->html_hashes = array();
 296         }
 297
 298
 299         function transform($text) {
 300         #
 301         # Main function. Performs some preprocessing on the input text
 302         # and pass it through the document gamut.
 303         #
 304                 $this->setup();
 305
 306                 # Remove UTF-8 BOM and marker character in input, if present.
 307                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 308
 309                 # Standardize line endings:
 310                 #   DOS to Unix and Mac to Unix
 311                 $text = preg_replace('{\r\n?}', "\n", $text);
 312
 313                 # Make sure $text ends with a couple of newlines:
 314                 $text .= "\n\n";
 315
 316                 # Convert all tabs to spaces.
 317                 $text = $this->detab($text);
 318
 319                 # Turn block-level HTML blocks into hash entries
 320                 $text = $this->hashHTMLBlocks($text);
 321
 322                 # Strip any lines consisting only of spaces and tabs.
 323                 # This makes subsequent regexen easier to write, because we can
 324                 # match consecutive blank lines with /\n+/ instead of something
 325                 # contorted like /[ ]*\n+/ .
 326                 $text = preg_replace('/^[ ]+$/m', '', $text);
 327
 328                 # Run document gamut methods.
 329                 foreach ($this->document_gamut as $method => $priority) {
 330                         $text = $this->$method($text);
 331                 }
 332
 333                 $this->teardown();
 334
 335                 return $text . "\n";
 336         }
 337
 338         var $document_gamut = array(
 339                 # Strip link definitions, store in hashes.
 340                 "stripLinkDefinitions" => 20,
 341
 342                 "runBasicBlockGamut"   => 30,
 343                 );
 344
 345
 346         function stripLinkDefinitions($text) {
 347         #
 348         # Strips link definitions from text, stores the URLs and titles in
 349         # hash references.
 350         #
 351                 $less_than_tab = $this->tab_width - 1;
 352
 353                 # Link defs are in the form: ^[id]: url "optional title"
 354                 $text = preg_replace_callback('{
 355                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 356                                                           [ ]*
 357                                                           \n?                           # maybe *one* newline
 358                                                           [ ]*
 359                                                         (?:
 360                                                           <(.+?)>                       # url = $2
 361                                                         |
 362                                                           (\S+?)                        # url = $3
 363                                                         )
 364                                                           [ ]*
 365                                                           \n?                           # maybe one newline
 366                                                           [ ]*
 367                                                         (?:
 368                                                                 (?<=\s)                 # lookbehind for whitespace
 369                                                                 ["(]
 370                                                                 (.*?)                   # title = $4
 371                                                                 [")]
 372                                                                 [ ]*
 373                                                         )?      # title is optional
 374                                                         (?:\n+|\Z)
 375                         }xm',
 376                         array(&$this, '_stripLinkDefinitions_callback'),
 377                         $text);
 378                 return $text;
 379         }
 380         function _stripLinkDefinitions_callback($matches) {
 381                 $link_id = strtolower($matches[1]);
 382                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 383                 $this->urls[$link_id] = $url;
 384                 $this->titles[$link_id] =& $matches[4];
 385                 return ''; # String that will replace the block
 386         }
 387
 388
 389         function hashHTMLBlocks($text) {
 390                 if ($this->no_markup)  return $text;
 391
 392                 $less_than_tab = $this->tab_width - 1;
 393
 394                 # Hashify HTML blocks:
 395                 # We only want to do this for block-level HTML tags, such as headers,
 396                 # lists, and tables. That's because we still want to wrap <p>s around
 397                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 398                 # phrase emphasis, and spans. The list of tags we're looking for is
 399                 # hard-coded:
 400                 #
 401                 # *  List "a" is made of tags which can be both inline or block-level.
 402                 #    These will be treated block-level when the start tag is alone on
 403                 #    its line, otherwise they're not matched here and will be taken as
 404                 #    inline later.
 405                 # *  List "b" is made of tags which are always block-level;
 406                 #
 407                 $block_tags_a_re = 'ins|del';
 408                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 409                                                    'script|noscript|form|fieldset|iframe|math';
 410
 411                 # Regular expression for the content of a block tag.
 412                 $nested_tags_level = 4;
 413                 $attr = '
 414                         (?>                             # optional tag attributes
 415                           \s                    # starts with whitespace
 416                           (?>
 417                                 [^>"/]+         # text outside quotes
 418                           |
 419                                 /+(?!>)         # slash not followed by ">"
 420                           |
 421                                 "[^"]*"         # text inside double quotes (tolerate ">")
 422                           |
 423                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 424                           )*
 425                         )?
 426                         ';
 427                 $content =
 428                         str_repeat('
 429                                 (?>
 430                                   [^<]+                 # content without tag
 431                                 |
 432                                   <\2                   # nested opening tag
 433                                         '.$attr.'       # attributes
 434                                         (?>
 435                                           />
 436                                         |
 437                                           >', $nested_tags_level).      # end of opening tag
 438                                           '.*?'.                                        # last level nested tag content
 439                         str_repeat('
 440                                           </\2\s*>      # closing nested tag
 441                                         )
 442                                   |
 443                                         <(?!/\2\s*>     # other tags with a different name
 444                                   )
 445                                 )*',
 446                                 $nested_tags_level);
 447                 $content2 = str_replace('\2', '\3', $content);
 448
 449                 # First, look for nested blocks, e.g.:
 450                 #       <div>
 451                 #               <div>
 452                 #               tags for inner block must be indented.
 453                 #               </div>
 454                 #       </div>
 455                 #
 456                 # The outermost tags must start at the left margin for this to match, and
 457                 # the inner nested divs must be indented.
 458                 # We need to do this before the next, more liberal match, because the next
 459                 # match will start at the first `<div>` and stop at the first `</div>`.
 460                 $text = preg_replace_callback('{(?>
 461                         (?>
 462                                 (?<=\n\n)               # Starting after a blank line
 463                                 |                               # or
 464                                 \A\n?                   # the beginning of the doc
 465                         )
 466                         (                                               # save in $1
 467
 468                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 469                           # in between.
 470
 471                                                 [ ]{0,'.$less_than_tab.'}
 472                                                 <('.$block_tags_b_re.')# start tag = $2
 473                                                 '.$attr.'>                      # attributes followed by > and \n
 474                                                 '.$content.'            # content, support nesting
 475                                                 </\2>                           # the matching end tag
 476                                                 [ ]*                            # trailing spaces/tabs
 477                                                 (?=\n+|\Z)      # followed by a newline or end of document
 478
 479                         | # Special version for tags of group a.
 480
 481                                                 [ ]{0,'.$less_than_tab.'}
 482                                                 <('.$block_tags_a_re.')# start tag = $3
 483                                                 '.$attr.'>[ ]*\n        # attributes followed by >
 484                                                 '.$content2.'           # content, support nesting
 485                                                 </\3>                           # the matching end tag
 486                                                 [ ]*                            # trailing spaces/tabs
 487                                                 (?=\n+|\Z)      # followed by a newline or end of document
 488
 489                         | # Special case just for <hr />. It was easier to make a special
 490                           # case than to make the other regex more complicated.
 491
 492                                                 [ ]{0,'.$less_than_tab.'}
 493                                                 <(hr)                           # start tag = $2
 494                                                 '.$attr.'                       # attributes
 495                                                 /?>                                     # the matching end tag
 496                                                 [ ]*
 497                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 498
 499                         | # Special case for standalone HTML comments:
 500
 501                                         [ ]{0,'.$less_than_tab.'}
 502                                         (?s:
 503                                                 <!-- .*? -->
 504                                         )
 505                                         [ ]*
 506                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 507
 508                         | # PHP and ASP-style processor instructions (<? and <%)
 509
 510                                         [ ]{0,'.$less_than_tab.'}
 511                                         (?s:
 512                                                 <([?%])                 # $2
 513                                                 .*?
 514                                                 \2>
 515                                         )
 516                                         [ ]*
 517                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 518
 519                         )
 520                         )}Sxmi',
 521                         array(&$this, '_hashHTMLBlocks_callback'),
 522                         $text);
 523
 524                 return $text;
 525         }
 526         function _hashHTMLBlocks_callback($matches) {
 527                 $text = $matches[1];
 528                 $key  = $this->hashBlock($text);
 529                 return "\n\n$key\n\n";
 530         }
 531
 532
 533         function hashPart($text, $boundary = 'X') {
 534         #
 535         # Called whenever a tag must be hashed when a function insert an atomic
 536         # element in the text stream. Passing $text to through this function gives
 537         # a unique text-token which will be reverted back when calling unhash.
 538         #
 539         # The $boundary argument specify what character should be used to surround
 540         # the token. By convension, "B" is used for block elements that needs not
 541         # to be wrapped into paragraph tags at the end, ":" is used for elements
 542         # that are word separators and "X" is used in the general case.
 543         #
 544                 # Swap back any tag hash found in $text so we do not have to `unhash`
 545                 # multiple times at the end.
 546                 $text = $this->unhash($text);
 547
 548                 # Then hash the block.
 549                 static $i = 0;
 550                 $key = "$boundary\x1A" . ++$i . $boundary;
 551                 $this->html_hashes[$key] = $text;
 552                 return $key; # String that will replace the tag.
 553         }
 554
 555
 556         function hashBlock($text) {
 557         #
 558         # Shortcut function for hashPart with block-level boundaries.
 559         #
 560                 return $this->hashPart($text, 'B');
 561         }
 562
 563
 564         var $block_gamut = array(
 565         #
 566         # These are all the transformations that form block-level
 567         # tags like paragraphs, headers, and list items.
 568         #
 569                 "doHeaders"         => 10,
 570                 "doHorizontalRules" => 20,
 571
 572                 "doLists"           => 40,
 573                 "doCodeBlocks"      => 50,
 574                 "doBlockQuotes"     => 60,
 575                 );
 576
 577         function runBlockGamut($text) {
 578         #
 579         # Run block gamut tranformations.
 580         #
 581                 # We need to escape raw HTML in Markdown source before doing anything
 582                 # else. This need to be done for each block, and not only at the
 583                 # begining in the Markdown function since hashed blocks can be part of
 584                 # list items and could have been indented. Indented blocks would have
 585                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 586                 $text = $this->hashHTMLBlocks($text);
 587
 588                 return $this->runBasicBlockGamut($text);
 589         }
 590
 591         function runBasicBlockGamut($text) {
 592         #
 593         # Run block gamut tranformations, without hashing HTML blocks. This is
 594         # useful when HTML blocks are known to be already hashed, like in the first
 595         # whole-document pass.
 596         #
 597                 foreach ($this->block_gamut as $method => $priority) {
 598                         $text = $this->$method($text);
 599                 }
 600
 601                 # Finally form paragraph and restore hashed blocks.
 602                 $text = $this->formParagraphs($text);
 603
 604                 return $text;
 605         }
 606
 607
 608         function doHorizontalRules($text) {
 609                 # Do Horizontal Rules:
 610                 return preg_replace(
 611                         '{
 612                                 ^[ ]{0,3}       # Leading space
 613                                 ([-*_])         # $1: First marker
 614                                 (?>                     # Repeated marker group
 615                                         [ ]{0,2}        # Zero, one, or two spaces.
 616                                         \1                      # Marker character
 617                                 ){2,}           # Group repeated at least twice
 618                                 [ ]*            # Tailing spaces
 619                                 $                       # End of line.
 620                         }mx',
 621                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 622                         $text);
 623         }
 624
 625
 626         var $span_gamut = array(
 627         #
 628         # These are all the transformations that occur *within* block-level
 629         # tags like paragraphs, headers, and list items.
 630         #
 631                 # Process character escapes, code spans, and inline HTML
 632                 # in one shot.
 633                 "parseSpan"           => -30,
 634
 635                 # Process anchor and image tags. Images must come first,
 636                 # because ![foo][f] looks like an anchor.
 637                 "doImages"            =>  10,
 638                 "doAnchors"           =>  20,
 639
 640                 # Make links out of things like `<http://example.com/>`
 641                 # Must come after doAnchors, because you can use < and >
 642                 # delimiters in inline links like [this](<url>).
 643                 "doAutoLinks"         =>  30,
 644                 "encodeAmpsAndAngles" =>  40,
 645
 646                 "doItalicsAndBold"    =>  50,
 647                 "doHardBreaks"        =>  60,
 648                 );
 649
 650         function runSpanGamut($text) {
 651         #
 652         # Run span gamut tranformations.
 653         #
 654                 foreach ($this->span_gamut as $method => $priority) {
 655                         $text = $this->$method($text);
 656                 }
 657
 658                 return $text;
 659         }
 660
 661
 662         function doHardBreaks($text) {
 663                 # Do hard breaks:
 664                 return preg_replace_callback('/ {2,}\n/',
 665                         array(&$this, '_doHardBreaks_callback'), $text);
 666         }
 667         function _doHardBreaks_callback($matches) {
 668                 return $this->hashPart("<br$this->empty_element_suffix\n");
 669         }
 670
 671
 672         function doAnchors($text) {
 673         #
 674         # Turn Markdown link shortcuts into XHTML <a> tags.
 675         #
 676                 if ($this->in_anchor) return $text;
 677                 $this->in_anchor = true;
 678
 679                 #
 680                 # First, handle reference-style links: [link text] [id]
 681                 #
 682                 $text = preg_replace_callback('{
 683                         (                                       # wrap whole match in $1
 684                           \[
 685                                 ('.$this->nested_brackets_re.') # link text = $2
 686                           \]
 687
 688                           [ ]?                          # one optional space
 689                           (?:\n[ ]*)?           # one optional newline followed by spaces
 690
 691                           \[
 692                                 (.*?)           # id = $3
 693                           \]
 694                         )
 695                         }xs',
 696                         array(&$this, '_doAnchors_reference_callback'), $text);
 697
 698                 #
 699                 # Next, inline-style links: [link text](url "optional title")
 700                 #
 701                 $text = preg_replace_callback('{
 702                         (                               # wrap whole match in $1
 703                           \[
 704                                 ('.$this->nested_brackets_re.') # link text = $2
 705                           \]
 706                           \(                    # literal paren
 707                                 [ \n]*
 708                                 (?:
 709                                         <(.+?)> # href = $3
 710                                 |
 711                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
 712                                 )
 713                                 [ \n]*
 714                                 (                       # $5
 715                                   ([\'"])       # quote char = $6
 716                                   (.*?)         # Title = $7
 717                                   \6            # matching quote
 718                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
 719                                 )?                      # title is optional
 720                           \)
 721                         )
 722                         }xs',
 723                         array(&$this, '_doAnchors_inline_callback'), $text);
 724
 725                 #
 726                 # Last, handle reference-style shortcuts: [link text]
 727                 # These must come last in case you've also got [link text][1]
 728                 # or [link text](/foo)
 729                 #
 730                 $text = preg_replace_callback('{
 731                         (                                       # wrap whole match in $1
 732                           \[
 733                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 734                           \]
 735                         )
 736                         }xs',
 737                         array(&$this, '_doAnchors_reference_callback'), $text);
 738
 739                 $this->in_anchor = false;
 740                 return $text;
 741         }
 742         function _doAnchors_reference_callback($matches) {
 743                 $whole_match =  $matches[1];
 744                 $link_text   =  $matches[2];
 745                 $link_id     =& $matches[3];
 746
 747                 if ($link_id == "") {
 748                         # for shortcut links like [this][] or [this].
 749                         $link_id = $link_text;
 750                 }
 751
 752                 # lower-case and turn embedded newlines into spaces
 753                 $link_id = strtolower($link_id);
 754                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 755
 756                 if (isset($this->urls[$link_id])) {
 757                         $url = $this->urls[$link_id];
 758                         $url = $this->encodeAttribute($url);
 759
 760                         $result = "<a href=\"$url\"";
 761                         if ( isset( $this->titles[$link_id] ) ) {
 762                                 $title = $this->titles[$link_id];
 763                                 $title = $this->encodeAttribute($title);
 764                                 $result .=  " title=\"$title\"";
 765                         }
 766
 767                         $link_text = $this->runSpanGamut($link_text);
 768                         $result .= ">$link_text</a>";
 769                         $result = $this->hashPart($result);
 770                 }
 771                 else {
 772                         $result = $whole_match;
 773                 }
 774                 return $result;
 775         }
 776         function _doAnchors_inline_callback($matches) {
 777                 $whole_match    =  $matches[1];
 778                 $link_text              =  $this->runSpanGamut($matches[2]);
 779                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 780                 $title                  =& $matches[7];
 781
 782                 $url = $this->encodeAttribute($url);
 783
 784                 $result = "<a href=\"$url\"";
 785                 if (isset($title)) {
 786                         $title = $this->encodeAttribute($title);
 787                         $result .=  " title=\"$title\"";
 788                 }
 789
 790                 $link_text = $this->runSpanGamut($link_text);
 791                 $result .= ">$link_text</a>";
 792
 793                 return $this->hashPart($result);
 794         }
 795
 796
 797         function doImages($text) {
 798         #
 799         # Turn Markdown image shortcuts into <img> tags.
 800         #
 801                 #
 802                 # First, handle reference-style labeled images: ![alt text][id]
 803                 #
 804                 $text = preg_replace_callback('{
 805                         (                               # wrap whole match in $1
 806                           !\[
 807                                 ('.$this->nested_brackets_re.')         # alt text = $2
 808                           \]
 809
 810                           [ ]?                          # one optional space
 811                           (?:\n[ ]*)?           # one optional newline followed by spaces
 812
 813                           \[
 814                                 (.*?)           # id = $3
 815                           \]
 816
 817                         )
 818                         }xs',
 819                         array(&$this, '_doImages_reference_callback'), $text);
 820
 821                 #
 822                 # Next, handle inline images:  ![alt text](url "optional title")
 823                 # Don't forget: encode * and _
 824                 #
 825                 $text = preg_replace_callback('{
 826                         (                               # wrap whole match in $1
 827                           !\[
 828                                 ('.$this->nested_brackets_re.')         # alt text = $2
 829                           \]
 830                           \s?                   # One optional whitespace character
 831                           \(                    # literal paren
 832                                 [ \n]*
 833                                 (?:
 834                                         <(\S*)> # src url = $3
 835                                 |
 836                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
 837                                 )
 838                                 [ \n]*
 839                                 (                       # $5
 840                                   ([\'"])       # quote char = $6
 841                                   (.*?)         # title = $7
 842                                   \6            # matching quote
 843                                   [ \n]*
 844                                 )?                      # title is optional
 845                           \)
 846                         )
 847                         }xs',
 848                         array(&$this, '_doImages_inline_callback'), $text);
 849
 850                 return $text;
 851         }
 852         function _doImages_reference_callback($matches) {
 853                 $whole_match = $matches[1];
 854                 $alt_text    = $matches[2];
 855                 $link_id     = strtolower($matches[3]);
 856
 857                 if ($link_id == "") {
 858                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 859                 }
 860
 861                 $alt_text = $this->encodeAttribute($alt_text);
 862                 if (isset($this->urls[$link_id])) {
 863                         $url = $this->encodeAttribute($this->urls[$link_id]);
 864                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 865                         if (isset($this->titles[$link_id])) {
 866                                 $title = $this->titles[$link_id];
 867                                 $title = $this->encodeAttribute($title);
 868                                 $result .=  " title=\"$title\"";
 869                         }
 870                         $result .= $this->empty_element_suffix;
 871                         $result = $this->hashPart($result);
 872                 }
 873                 else {
 874                         # If there's no such link ID, leave intact:
 875                         $result = $whole_match;
 876                 }
 877
 878                 return $result;
 879         }
 880         function _doImages_inline_callback($matches) {
 881                 $whole_match    = $matches[1];
 882                 $alt_text               = $matches[2];
 883                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 884                 $title                  =& $matches[7];
 885
 886                 $alt_text = $this->encodeAttribute($alt_text);
 887                 $url = $this->encodeAttribute($url);
 888                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 889                 if (isset($title)) {
 890                         $title = $this->encodeAttribute($title);
 891                         $result .=  " title=\"$title\""; # $title already quoted
 892                 }
 893                 $result .= $this->empty_element_suffix;
 894
 895                 return $this->hashPart($result);
 896         }
 897
 898
 899         function doHeaders($text) {
 900                 # Setext-style headers:
 901                 #         Header 1
 902                 #         ========
 903                 #
 904                 #         Header 2
 905                 #         --------
 906                 #
 907                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 908                         array(&$this, '_doHeaders_callback_setext'), $text);
 909
 910                 # atx-style headers:
 911                 #       # Header 1
 912                 #       ## Header 2
 913                 #       ## Header 2 with closing hashes ##
 914                 #       ...
 915                 #       ###### Header 6
 916                 #
 917                 $text = preg_replace_callback('{
 918                                 ^(\#{1,6})      # $1 = string of #\'s
 919                                 [ ]*
 920                                 (.+?)           # $2 = Header text
 921                                 [ ]*
 922                                 \#*                     # optional closing #\'s (not counted)
 923                                 \n+
 924                         }xm',
 925                         array(&$this, '_doHeaders_callback_atx'), $text);
 926
 927                 return $text;
 928         }
 929         function _doHeaders_callback_setext($matches) {
 930                 # Terrible hack to check we haven't found an empty list item.
 931                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 932                         return $matches[0];
 933
 934                 $level = $matches[2]{0} == '=' ? 1 : 2;
 935                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 936                 return "\n" . $this->hashBlock($block) . "\n\n";
 937         }
 938         function _doHeaders_callback_atx($matches) {
 939                 $level = strlen($matches[1]);
 940                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 941                 return "\n" . $this->hashBlock($block) . "\n\n";
 942         }
 943
 944
 945         function doLists($text) {
 946         #
 947         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 948         #
 949                 $less_than_tab = $this->tab_width - 1;
 950
 951                 # Re-usable patterns to match list item bullets and number markers:
 952                 $marker_ul_re  = '[*+-]';
 953                 $marker_ol_re  = '\d+[.]';
 954                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 955
 956                 $markers_relist = array(
 957                         $marker_ul_re => $marker_ol_re,
 958                         $marker_ol_re => $marker_ul_re,
 959                         );
 960
 961                 foreach ($markers_relist as $marker_re => $other_marker_re) {
 962                         # Re-usable pattern to match any entirel ul or ol list:
 963                         $whole_list_re = '
 964                                 (                                                               # $1 = whole list
 965                                   (                                                             # $2
 966                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
 967                                         ('.$marker_re.')                        # $4 = first list item marker
 968                                         [ ]+
 969                                   )
 970                                   (?s:.+?)
 971                                   (                                                             # $5
 972                                           \z
 973                                         |
 974                                           \n{2,}
 975                                           (?=\S)
 976                                           (?!                                           # Negative lookahead for another list item marker
 977                                                 [ ]*
 978                                                 '.$marker_re.'[ ]+
 979                                           )
 980                                         |
 981                                           (?=                                           # Lookahead for another kind of list
 982                                             \n
 983                                                 \3                                              # Must have the same indentation
 984                                                 '.$other_marker_re.'[ ]+
 985                                           )
 986                                   )
 987                                 )
 988                         '; // mx
 989
 990                         # We use a different prefix before nested lists than top-level lists.
 991                         # See extended comment in _ProcessListItems().
 992
 993                         if ($this->list_level) {
 994                                 $text = preg_replace_callback('{
 995                                                 ^
 996                                                 '.$whole_list_re.'
 997                                         }mx',
 998                                         array(&$this, '_doLists_callback'), $text);
 999                         }
1000                         else {
1001                                 $text = preg_replace_callback('{
1002                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1003                                                 '.$whole_list_re.'
1004                                         }mx',
1005                                         array(&$this, '_doLists_callback'), $text);
1006                         }
1007                 }
1008
1009                 return $text;
1010         }
1011         function _doLists_callback($matches) {
1012                 # Re-usable patterns to match list item bullets and number markers:
1013                 $marker_ul_re  = '[*+-]';
1014                 $marker_ol_re  = '\d+[.]';
1015                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1016
1017                 $list = $matches[1];
1018                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1019
1020                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1021
1022                 $list .= "\n";
1023                 $result = $this->processListItems($list, $marker_any_re);
1024
1025                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1026                 return "\n". $result ."\n\n";
1027         }
1028
1029         var $list_level = 0;
1030
1031         function processListItems($list_str, $marker_any_re) {
1032         #
1033         #       Process the contents of a single ordered or unordered list, splitting it
1034         #       into individual list items.
1035         #
1036                 # The $this->list_level global keeps track of when we're inside a list.
1037                 # Each time we enter a list, we increment it; when we leave a list,
1038                 # we decrement. If it's zero, we're not in a list anymore.
1039                 #
1040                 # We do this because when we're not inside a list, we want to treat
1041                 # something like this:
1042                 #
1043                 #               I recommend upgrading to version
1044                 #               8. Oops, now this line is treated
1045                 #               as a sub-list.
1046                 #
1047                 # As a single paragraph, despite the fact that the second line starts
1048                 # with a digit-period-space sequence.
1049                 #
1050                 # Whereas when we're inside a list (or sub-list), that line will be
1051                 # treated as the start of a sub-list. What a kludge, huh? This is
1052                 # an aspect of Markdown's syntax that's hard to parse perfectly
1053                 # without resorting to mind-reading. Perhaps the solution is to
1054                 # change the syntax rules such that sub-lists must start with a
1055                 # starting cardinal number; e.g. "1." or "a.".
1056
1057                 $this->list_level++;
1058
1059                 # trim trailing blank lines:
1060                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1061
1062                 $list_str = preg_replace_callback('{
1063                         (\n)?                                                   # leading line = $1
1064                         (^[ ]*)                                                 # leading whitespace = $2
1065                         ('.$marker_any_re.'                             # list marker and space = $3
1066                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1067                         )
1068                         ((?s:.*?))                                              # list item text   = $4
1069                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1070                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1071                         }xm',
1072                         array(&$this, '_processListItems_callback'), $list_str);
1073
1074                 $this->list_level--;
1075                 return $list_str;
1076         }
1077         function _processListItems_callback($matches) {
1078                 $item = $matches[4];
1079                 $leading_line =& $matches[1];
1080                 $leading_space =& $matches[2];
1081                 $marker_space = $matches[3];
1082                 $tailing_blank_line =& $matches[5];
1083
1084                 if ($leading_line || $tailing_blank_line ||
1085                         preg_match('/\n{2,}/', $item))
1086                 {
1087                         # Replace marker with the appropriate whitespace indentation
1088                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1089                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1090                 }
1091                 else {
1092                         # Recursion for sub-lists:
1093                         $item = $this->doLists($this->outdent($item));
1094                         $item = preg_replace('/\n+$/', '', $item);
1095                         $item = $this->runSpanGamut($item);
1096                 }
1097
1098                 return "<li>" . $item . "</li>\n";
1099         }
1100
1101
1102         function doCodeBlocks($text) {
1103         #
1104         #       Process Markdown `<pre><code>` blocks.
1105         #
1106                 $text = preg_replace_callback('{
1107                                 (?:\n\n|\A\n?)
1108                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1109                                   (?>
1110                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1111                                         .*\n+
1112                                   )+
1113                                 )
1114                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1115                         }xm',
1116                         array(&$this, '_doCodeBlocks_callback'), $text);
1117
1118                 return $text;
1119         }
1120         function _doCodeBlocks_callback($matches) {
1121                 $codeblock = $matches[1];
1122
1123                 $codeblock = $this->outdent($codeblock);
1124                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1125
1126                 # trim leading newlines and trailing newlines
1127                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1128
1129                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1130                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1131         }
1132
1133
1134         function makeCodeSpan($code) {
1135         #
1136         # Create a code span markup for $code. Called from handleSpanToken.
1137         #
1138                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1139                 return $this->hashPart("<code>$code</code>");
1140         }
1141
1142
1143         var $em_relist = array(
1144                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)',
1145                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
1146                 '_' => '(?<=\S|^)(?<!_)_(?!_)',
1147                 );
1148         var $strong_relist = array(
1149                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)',
1150                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
1151                 '__' => '(?<=\S|^)(?<!_)__(?!_)',
1152                 );
1153         var $em_strong_relist = array(
1154                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)',
1155                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
1156                 '___' => '(?<=\S|^)(?<!_)___(?!_)',
1157                 );
1158         var $em_strong_prepared_relist;
1159
1160         function prepareItalicsAndBold() {
1161         #
1162         # Prepare regular expressions for searching emphasis tokens in any
1163         # context.
1164         #
1165                 foreach ($this->em_relist as $em => $em_re) {
1166                         foreach ($this->strong_relist as $strong => $strong_re) {
1167                                 # Construct list of allowed token expressions.
1168                                 $token_relist = array();
1169                                 if (isset($this->em_strong_relist["$em$strong"])) {
1170                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1171                                 }
1172                                 $token_relist[] = $em_re;
1173                                 $token_relist[] = $strong_re;
1174
1175                                 # Construct master expression from list.
1176                                 $token_re = '{('. implode('|', $token_relist) .')}';
1177                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1178                         }
1179                 }
1180         }
1181
1182         function doItalicsAndBold($text) {
1183                 $token_stack = array('');
1184                 $text_stack = array('');
1185                 $em = '';
1186                 $strong = '';
1187                 $tree_char_em = false;
1188
1189                 while (1) {
1190                         #
1191                         # Get prepared regular expression for seraching emphasis tokens
1192                         # in current context.
1193                         #
1194                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1195
1196                         #
1197                         # Each loop iteration search for the next emphasis token.
1198                         # Each token is then passed to handleSpanToken.
1199                         #
1200                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1201                         $text_stack[0] .= $parts[0];
1202                         $token =& $parts[1];
1203                         $text =& $parts[2];
1204
1205                         if (empty($token)) {
1206                                 # Reached end of text span: empty stack without emitting.
1207                                 # any more emphasis.
1208                                 while ($token_stack[0]) {
1209                                         $text_stack[1] .= array_shift($token_stack);
1210                                         $text_stack[0] .= array_shift($text_stack);
1211                                 }
1212                                 break;
1213                         }
1214
1215                         $token_len = strlen($token);
1216                         if ($tree_char_em) {
1217                                 # Reached closing marker while inside a three-char emphasis.
1218                                 if ($token_len == 3) {
1219                                         # Three-char closing marker, close em and strong.
1220                                         array_shift($token_stack);
1221                                         $span = array_shift($text_stack);
1222                                         $span = $this->runSpanGamut($span);
1223                                         $span = "<strong><em>$span</em></strong>";
1224                                         $text_stack[0] .= $this->hashPart($span);
1225                                         $em = '';
1226                                         $strong = '';
1227                                 } else {
1228                                         # Other closing marker: close one em or strong and
1229                                         # change current token state to match the other
1230                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1231                                         $tag = $token_len == 2 ? "strong" : "em";
1232                                         $span = $text_stack[0];
1233                                         $span = $this->runSpanGamut($span);
1234                                         $span = "<$tag>$span</$tag>";
1235                                         $text_stack[0] = $this->hashPart($span);
1236                                         $$tag = ''; # $$tag stands for $em or $strong
1237                                 }
1238                                 $tree_char_em = false;
1239                         } else if ($token_len == 3) {
1240                                 if ($em) {
1241                                         # Reached closing marker for both em and strong.
1242                                         # Closing strong marker:
1243                                         for ($i = 0; $i < 2; ++$i) {
1244                                                 $shifted_token = array_shift($token_stack);
1245                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1246                                                 $span = array_shift($text_stack);
1247                                                 $span = $this->runSpanGamut($span);
1248                                                 $span = "<$tag>$span</$tag>";
1249                                                 $text_stack[0] .= $this->hashPart($span);
1250                                                 $$tag = ''; # $$tag stands for $em or $strong
1251                                         }
1252                                 } else {
1253                                         # Reached opening three-char emphasis marker. Push on token
1254                                         # stack; will be handled by the special condition above.
1255                                         $em = $token{0};
1256                                         $strong = "$em$em";
1257                                         array_unshift($token_stack, $token);
1258                                         array_unshift($text_stack, '');
1259                                         $tree_char_em = true;
1260                                 }
1261                         } else if ($token_len == 2) {
1262                                 if ($strong) {
1263                                         # Unwind any dangling emphasis marker:
1264                                         if (strlen($token_stack[0]) == 1) {
1265                                                 $text_stack[1] .= array_shift($token_stack);
1266                                                 $text_stack[0] .= array_shift($text_stack);
1267                                         }
1268                                         # Closing strong marker:
1269                                         array_shift($token_stack);
1270                                         $span = array_shift($text_stack);
1271                                         $span = $this->runSpanGamut($span);
1272                                         $span = "<strong>$span</strong>";
1273                                         $text_stack[0] .= $this->hashPart($span);
1274                                         $strong = '';
1275                                 } else {
1276                                         array_unshift($token_stack, $token);
1277                                         array_unshift($text_stack, '');
1278                                         $strong = $token;
1279                                 }
1280                         } else {
1281                                 # Here $token_len == 1
1282                                 if ($em) {
1283                                         if (strlen($token_stack[0]) == 1) {
1284                                                 # Closing emphasis marker:
1285                                                 array_shift($token_stack);
1286                                                 $span = array_shift($text_stack);
1287                                                 $span = $this->runSpanGamut($span);
1288                                                 $span = "<em>$span</em>";
1289                                                 $text_stack[0] .= $this->hashPart($span);
1290                                                 $em = '';
1291                                         } else {
1292                                                 $text_stack[0] .= $token;
1293                                         }
1294                                 } else {
1295                                         array_unshift($token_stack, $token);
1296                                         array_unshift($text_stack, '');
1297                                         $em = $token;
1298                                 }
1299                         }
1300                 }
1301                 return $text_stack[0];
1302         }
1303
1304
1305         function doBlockQuotes($text) {
1306                 $text = preg_replace_callback('/
1307                           (                                                             # Wrap whole match in $1
1308                                 (?>
1309                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1310                                         .+\n                                    # rest of the first line
1311                                   (.+\n)*                                       # subsequent consecutive lines
1312                                   \n*                                           # blanks
1313                                 )+
1314                           )
1315                         /xm',
1316                         array(&$this, '_doBlockQuotes_callback'), $text);
1317
1318                 return $text;
1319         }
1320         function _doBlockQuotes_callback($matches) {
1321                 $bq = $matches[1];
1322                 # trim one level of quoting - trim whitespace-only lines
1323                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1324                 $bq = $this->runBlockGamut($bq);                # recurse
1325
1326                 $bq = preg_replace('/^/m', "  ", $bq);
1327                 # These leading spaces cause problem with <pre> content,
1328                 # so we need to fix that:
1329                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1330                         array(&$this, '_doBlockQuotes_callback2'), $bq);
1331
1332                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1333         }
1334         function _doBlockQuotes_callback2($matches) {
1335                 $pre = $matches[1];
1336                 $pre = preg_replace('/^  /m', '', $pre);
1337                 return $pre;
1338         }
1339
1340
1341         function formParagraphs($text) {
1342         #
1343         #       Params:
1344         #               $text - string to process with html <p> tags
1345         #
1346                 # Strip leading and trailing lines:
1347                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1348
1349                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1350
1351                 #
1352                 # Wrap <p> tags and unhashify HTML blocks
1353                 #
1354                 foreach ($grafs as $key => $value) {
1355                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1356                                 # Is a paragraph.
1357                                 $value = $this->runSpanGamut($value);
1358                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1359                                 $value .= "</p>";
1360                                 $grafs[$key] = $this->unhash($value);
1361                         }
1362                         else {
1363                                 # Is a block.
1364                                 # Modify elements of @grafs in-place...
1365                                 $graf = $value;
1366                                 $block = $this->html_hashes[$graf];
1367                                 $graf = $block;
1368 //                              if (preg_match('{
1369 //                                      \A
1370 //                                      (                                                       # $1 = <div> tag
1371 //                                        <div  \s+
1372 //                                        [^>]*
1373 //                                        \b
1374 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1375 //                                        1
1376 //                                        \2
1377 //                                        [^>]*
1378 //                                        >
1379 //                                      )
1380 //                                      (                                                       # $3 = contents
1381 //                                      .*
1382 //                                      )
1383 //                                      (</div>)                                        # $4 = closing tag
1384 //                                      \z
1385 //                                      }xs', $block, $matches))
1386 //                              {
1387 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1388 //
1389 //                                      # We can't call Markdown(), because that resets the hash;
1390 //                                      # that initialization code should be pulled into its own sub, though.
1391 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1392 //
1393 //                                      # Run document gamut methods on the content.
1394 //                                      foreach ($this->document_gamut as $method => $priority) {
1395 //                                              $div_content = $this->$method($div_content);
1396 //                                      }
1397 //
1398 //                                      $div_open = preg_replace(
1399 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1400 //
1401 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1402 //                              }
1403                                 $grafs[$key] = $graf;
1404                         }
1405                 }
1406
1407                 return implode("\n\n", $grafs);
1408         }
1409
1410
1411         function encodeAttribute($text) {
1412         #
1413         # Encode text for a double-quoted HTML attribute. This function
1414         # is *not* suitable for attributes enclosed in single quotes.
1415         #
1416                 $text = $this->encodeAmpsAndAngles($text);
1417                 $text = str_replace('"', '&quot;', $text);
1418                 return $text;
1419         }
1420
1421
1422         function encodeAmpsAndAngles($text) {
1423         #
1424         # Smart processing for ampersands and angle brackets that need to
1425         # be encoded. Valid character entities are left alone unless the
1426         # no-entities mode is set.
1427         #
1428                 if ($this->no_entities) {
1429                         $text = str_replace('&', '&amp;', $text);
1430                 } else {
1431                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1432                         # MT plugin: <http://bumppo.net/projects/amputator/>
1433                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1434                                                                 '&amp;', $text);;
1435                 }
1436                 # Encode remaining <'s
1437                 $text = str_replace('<', '&lt;', $text);
1438
1439                 return $text;
1440         }
1441
1442
1443         function doAutoLinks($text) {
1444                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1445                         array(&$this, '_doAutoLinks_url_callback'), $text);
1446
1447                 # Email addresses: <address@domain.foo>
1448                 $text = preg_replace_callback('{
1449                         <
1450                         (?:mailto:)?
1451                         (
1452                                 (?:
1453                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1454                                 |
1455                                         ".*?"
1456                                 )
1457                                 \@
1458                                 (?:
1459                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1460                                 |
1461                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1462                                 )
1463                         )
1464                         >
1465                         }xi',
1466                         array(&$this, '_doAutoLinks_email_callback'), $text);
1467
1468                 return $text;
1469         }
1470         function _doAutoLinks_url_callback($matches) {
1471                 $url = $this->encodeAttribute($matches[1]);
1472                 $link = "<a href=\"$url\">$url</a>";
1473                 return $this->hashPart($link);
1474         }
1475         function _doAutoLinks_email_callback($matches) {
1476                 $address = $matches[1];
1477                 $link = $this->encodeEmailAddress($address);
1478                 return $this->hashPart($link);
1479         }
1480
1481
1482         function encodeEmailAddress($addr) {
1483         #
1484         #       Input: an email address, e.g. "foo@example.com"
1485         #
1486         #       Output: the email address as a mailto link, with each character
1487         #               of the address encoded as either a decimal or hex entity, in
1488         #               the hopes of foiling most address harvesting spam bots. E.g.:
1489         #
1490         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1491         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1492         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1493         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1494         #
1495         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1496         #   With some optimizations by Milian Wolff.
1497         #
1498                 $addr = "mailto:" . $addr;
1499                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1500                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1501
1502                 foreach ($chars as $key => $char) {
1503                         $ord = ord($char);
1504                         # Ignore non-ascii chars.
1505                         if ($ord < 128) {
1506                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1507                                 # roughly 10% raw, 45% hex, 45% dec
1508                                 # '@' *must* be encoded. I insist.
1509                                 if ($r > 90 && $char != '@') /* do nothing */;
1510                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1511                                 else              $chars[$key] = '&#'.$ord.';';
1512                         }
1513                 }
1514
1515                 $addr = implode('', $chars);
1516                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1517                 $addr = "<a href=\"$addr\">$text</a>";
1518
1519                 return $addr;
1520         }
1521
1522
1523         function parseSpan($str) {
1524         #
1525         # Take the string $str and parse it into tokens, hashing embeded HTML,
1526         # escaped characters and handling code spans.
1527         #
1528                 $output = '';
1529
1530                 $span_re = '{
1531                                 (
1532                                         \\\\'.$this->escape_chars_re.'
1533                                 |
1534                                         (?<![`\\\\])
1535                                         `+                                              # code span marker
1536                         '.( $this->no_markup ? '' : '
1537                                 |
1538                                         <!--    .*?     -->             # comment
1539                                 |
1540                                         <\?.*?\?> | <%.*?%>             # processing instruction
1541                                 |
1542                                         <[/!$]?[-a-zA-Z0-9:_]+  # regular tags
1543                                         (?>
1544                                                 \s
1545                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1546                                         )?
1547                                         >
1548                         ').'
1549                                 )
1550                                 }xs';
1551
1552                 while (1) {
1553                         #
1554                         # Each loop iteration seach for either the next tag, the next
1555                         # openning code span marker, or the next escaped character.
1556                         # Each token is then passed to handleSpanToken.
1557                         #
1558                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1559
1560                         # Create token from text preceding tag.
1561                         if ($parts[0] != "") {
1562                                 $output .= $parts[0];
1563                         }
1564
1565                         # Check if we reach the end.
1566                         if (isset($parts[1])) {
1567                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1568                                 $str = $parts[2];
1569                         }
1570                         else {
1571                                 break;
1572                         }
1573                 }
1574
1575                 return $output;
1576         }
1577
1578
1579         function handleSpanToken($token, &$str) {
1580         #
1581         # Handle $token provided by parseSpan by determining its nature and
1582         # returning the corresponding value that should replace it.
1583         #
1584                 switch ($token{0}) {
1585                         case "\\":
1586                                 return $this->hashPart("&#". ord($token{1}). ";");
1587                         case "`":
1588                                 # Search for end marker in remaining text.
1589                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1590                                         $str, $matches))
1591                                 {
1592                                         $str = $matches[2];
1593                                         $codespan = $this->makeCodeSpan($matches[1]);
1594                                         return $this->hashPart($codespan);
1595                                 }
1596                                 return $token; // return as text since no ending marker found.
1597                         default:
1598                                 return $this->hashPart($token);
1599                 }
1600         }
1601
1602
1603         function outdent($text) {
1604         #
1605         # Remove one level of line-leading tabs or spaces
1606         #
1607                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1608         }
1609
1610
1611         # String length function for detab. `_initDetab` will create a function to
1612         # hanlde UTF-8 if the default function does not exist.
1613         var $utf8_strlen = 'mb_strlen';
1614
1615         function detab($text) {
1616         #
1617         # Replace tabs with the appropriate amount of space.
1618         #
1619                 # For each line we separate the line in blocks delemited by
1620                 # tab characters. Then we reconstruct every line by adding the
1621                 # appropriate number of space between each blocks.
1622
1623                 $text = preg_replace_callback('/^.*\t.*$/m',
1624                         array(&$this, '_detab_callback'), $text);
1625
1626                 return $text;
1627         }
1628         function _detab_callback($matches) {
1629                 $line = $matches[0];
1630                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1631
1632                 # Split in blocks.
1633                 $blocks = explode("\t", $line);
1634                 # Add each blocks to the line.
1635                 $line = $blocks[0];
1636                 unset($blocks[0]); # Do not add first block twice.
1637                 foreach ($blocks as $block) {
1638                         # Calculate amount of space, insert spaces, insert block.
1639                         $amount = $this->tab_width -
1640                                 $strlen($line, 'UTF-8') % $this->tab_width;
1641                         $line .= str_repeat(" ", $amount) . $block;
1642                 }
1643                 return $line;
1644         }
1645         function _initDetab() {
1646         #
1647         # Check for the availability of the function in the `utf8_strlen` property
1648         # (initially `mb_strlen`). If the function is not available, create a
1649         # function that will loosely count the number of UTF-8 characters with a
1650         # regular expression.
1651         #
1652                 if (function_exists($this->utf8_strlen)) return;
1653                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1654                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1655                         $text, $m);');
1656         }
1657
1658
1659         function unhash($text) {
1660         #
1661         # Swap back in all the tags hashed by _HashHTMLBlocks.
1662         #
1663                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1664                         array(&$this, '_unhash_callback'), $text);
1665         }
1666         function _unhash_callback($matches) {
1667                 return $this->html_hashes[$matches[0]];
1668         }
1669
1670 }
1671
1672
1673 #
1674 # Markdown Extra Parser Class
1675 #
1676
1677 class MarkdownExtra_Parser extends Markdown_Parser {
1678
1679         # Prefix for footnote ids.
1680         var $fn_id_prefix = "";
1681
1682         # Optional title attribute for footnote links and backlinks.
1683         var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1684         var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1685
1686         # Optional class attribute for footnote links and backlinks.
1687         var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1688         var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1689
1690         # Predefined abbreviations.
1691         var $predef_abbr = array();
1692
1693
1694         function MarkdownExtra_Parser() {
1695         #
1696         # Constructor function. Initialize the parser object.
1697         #
1698                 # Add extra escapable characters before parent constructor
1699                 # initialize the table.
1700                 $this->escape_chars .= ':|';
1701
1702                 # Insert extra document, block, and span transformations.
1703                 # Parent constructor will do the sorting.
1704                 $this->document_gamut += array(
1705                         "doFencedCodeBlocks" => 5,
1706                         "stripFootnotes"     => 15,
1707                         "stripAbbreviations" => 25,
1708                         "appendFootnotes"    => 50,
1709                         );
1710                 $this->block_gamut += array(
1711                         "doFencedCodeBlocks" => 5,
1712                         "doTables"           => 15,
1713                         "doDefLists"         => 45,
1714                         );
1715                 $this->span_gamut += array(
1716                         "doFootnotes"        => 5,
1717                         "doAbbreviations"    => 70,
1718                         );
1719
1720                 parent::Markdown_Parser();
1721         }
1722
1723
1724         # Extra variables used during extra transformations.
1725         var $footnotes = array();
1726         var $footnotes_ordered = array();
1727         var $abbr_desciptions = array();
1728         var $abbr_word_re = '';
1729
1730         # Give the current footnote number.
1731         var $footnote_counter = 1;
1732
1733
1734         function setup() {
1735         #
1736         # Setting up Extra-specific variables.
1737         #
1738                 parent::setup();
1739
1740                 $this->footnotes = array();
1741                 $this->footnotes_ordered = array();
1742                 $this->abbr_desciptions = array();
1743                 $this->abbr_word_re = '';
1744                 $this->footnote_counter = 1;
1745
1746                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1747                         if ($this->abbr_word_re)
1748                                 $this->abbr_word_re .= '|';
1749                         $this->abbr_word_re .= preg_quote($abbr_word);
1750                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1751                 }
1752         }
1753
1754         function teardown() {
1755         #
1756         # Clearing Extra-specific variables.
1757         #
1758                 $this->footnotes = array();
1759                 $this->footnotes_ordered = array();
1760                 $this->abbr_desciptions = array();
1761                 $this->abbr_word_re = '';
1762
1763                 parent::teardown();
1764         }
1765
1766
1767         ### HTML Block Parser ###
1768
1769         # Tags that are always treated as block tags:
1770         var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1771
1772         # Tags treated as block tags only if the opening tag is alone on it's line:
1773         var $context_block_tags_re = 'script|noscript|math|ins|del';
1774
1775         # Tags where markdown="1" default to span mode:
1776         var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1777
1778         # Tags which must not have their contents modified, no matter where
1779         # they appear:
1780         var $clean_tags_re = 'script|math';
1781
1782         # Tags that do not need to be closed.
1783         var $auto_close_tags_re = 'hr|img';
1784
1785
1786         function hashHTMLBlocks($text) {
1787         #
1788         # Hashify HTML Blocks and "clean tags".
1789         #
1790         # We only want to do this for block-level HTML tags, such as headers,
1791         # lists, and tables. That's because we still want to wrap <p>s around
1792         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1793         # phrase emphasis, and spans. The list of tags we're looking for is
1794         # hard-coded.
1795         #
1796         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1797         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1798         # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1799         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1800         # These two functions are calling each other. It's recursive!
1801         #
1802                 #
1803                 # Call the HTML-in-Markdown hasher.
1804                 #
1805                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1806
1807                 return $text;
1808         }
1809         function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1810                                                                                 $enclosing_tag_re = '', $span = false)
1811         {
1812         #
1813         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1814         #
1815         # *   $indent is the number of space to be ignored when checking for code
1816         #     blocks. This is important because if we don't take the indent into
1817         #     account, something like this (which looks right) won't work as expected:
1818         #
1819         #     <div>
1820         #         <div markdown="1">
1821         #         Hello World.  <-- Is this a Markdown code block or text?
1822         #         </div>  <-- Is this a Markdown code block or a real tag?
1823         #     <div>
1824         #
1825         #     If you don't like this, just don't indent the tag on which
1826         #     you apply the markdown="1" attribute.
1827         #
1828         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1829         #     tag with that name. Nested tags supported.
1830         #
1831         # *   If $span is true, text inside must treated as span. So any double
1832         #     newline will be replaced by a single newline so that it does not create
1833         #     paragraphs.
1834         #
1835         # Returns an array of that form: ( processed text , remaining text )
1836         #
1837                 if ($text === '') return array('', '');
1838
1839                 # Regex to check for the presense of newlines around a block tag.
1840                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1841                 $newline_after_re =
1842                         '{
1843                                 ^                                               # Start of text following the tag.
1844                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
1845                                 [ ]*\n                                  # Must be followed by newline.
1846                         }xs';
1847
1848                 # Regex to match any tag.
1849                 $block_tag_re =
1850                         '{
1851                                 (                                       # $2: Capture hole tag.
1852                                         </?                                     # Any opening or closing tag.
1853                                                 (?>                             # Tag name.
1854                                                         '.$this->block_tags_re.'                        |
1855                                                         '.$this->context_block_tags_re.'        |
1856                                                         '.$this->clean_tags_re.'                |
1857                                                         (?!\s)'.$enclosing_tag_re.'
1858                                                 )
1859                                                 (?:
1860                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1861                                                         (?>
1862                                                                 ".*?"           |       # Double quotes (can contain `>`)
1863                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
1864                                                                 .+?                             # Anything but quotes and `>`.
1865                                                         )*?
1866                                                 )?
1867                                         >                                       # End of tag.
1868                                 |
1869                                         <!--    .*?     -->     # HTML Comment
1870                                 |
1871                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1872                                 |
1873                                         <!\[CDATA\[.*?\]\]>     # CData Block
1874                                 |
1875                                         # Code span marker
1876                                         `+
1877                                 '. ( !$span ? ' # If not in span.
1878                                 |
1879                                         # Indented code block
1880                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
1881                                         [ ]{'.($indent+4).'}[^\n]* \n
1882                                         (?>
1883                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1884                                         )*
1885                                 |
1886                                         # Fenced code block marker
1887                                         (?> ^ | \n )
1888                                         [ ]{'.($indent).'}~~~+[ ]*\n
1889                                 ' : '' ). ' # End (if not is span).
1890                                 )
1891                         }xs';
1892
1893
1894                 $depth = 0;             # Current depth inside the tag tree.
1895                 $parsed = "";   # Parsed text that will be returned.
1896
1897                 #
1898                 # Loop through every tag until we find the closing tag of the parent
1899                 # or loop until reaching the end of text if no parent tag specified.
1900                 #
1901                 do {
1902                         #
1903                         # Split the text using the first $tag_match pattern found.
1904                         # Text before  pattern will be first in the array, text after
1905                         # pattern will be at the end, and between will be any catches made
1906                         # by the pattern.
1907                         #
1908                         $parts = preg_split($block_tag_re, $text, 2,
1909                                                                 PREG_SPLIT_DELIM_CAPTURE);
1910
1911                         # If in Markdown span mode, add a empty-string span-level hash
1912                         # after each newline to prevent triggering any block element.
1913                         if ($span) {
1914                                 $void = $this->hashPart("", ':');
1915                                 $newline = "$void\n";
1916                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1917                         }
1918
1919                         $parsed .= $parts[0]; # Text before current tag.
1920
1921                         # If end of $text has been reached. Stop loop.
1922                         if (count($parts) < 3) {
1923                                 $text = "";
1924                                 break;
1925                         }
1926
1927                         $tag  = $parts[1]; # Tag to handle.
1928                         $text = $parts[2]; # Remaining text after current tag.
1929                         $tag_re = preg_quote($tag); # For use in a regular expression.
1930
1931                         #
1932                         # Check for: Code span marker
1933                         #
1934                         if ($tag{0} == "`") {
1935                                 # Find corresponding end marker.
1936                                 $tag_re = preg_quote($tag);
1937                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1938                                         $text, $matches))
1939                                 {
1940                                         # End marker found: pass text unchanged until marker.
1941                                         $parsed .= $tag . $matches[0];
1942                                         $text = substr($text, strlen($matches[0]));
1943                                 }
1944                                 else {
1945                                         # Unmatched marker: just skip it.
1946                                         $parsed .= $tag;
1947                                 }
1948                         }
1949                         #
1950                         # Check for: Indented code block.
1951                         #
1952                         else if ($tag{0} == "\n" || $tag{0} == " ") {
1953                                 # Indented code block: pass it unchanged, will be handled
1954                                 # later.
1955                                 $parsed .= $tag;
1956                         }
1957                         #
1958                         # Check for: Fenced code block marker.
1959                         #
1960                         else if ($tag{0} == "~") {
1961                                 # Fenced code block marker: find matching end marker.
1962                                 $tag_re = preg_quote(trim($tag));
1963                                 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text,
1964                                         $matches))
1965                                 {
1966                                         # End marker found: pass text unchanged until marker.
1967                                         $parsed .= $tag . $matches[0];
1968                                         $text = substr($text, strlen($matches[0]));
1969                                 }
1970                                 else {
1971                                         # No end marker: just skip it.
1972                                         $parsed .= $tag;
1973                                 }
1974                         }
1975                         #
1976                         # Check for: Opening Block level tag or
1977                         #            Opening Context Block tag (like ins and del)
1978                         #               used as a block tag (tag is alone on it's line).
1979                         #
1980                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1981                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1982                                         preg_match($newline_before_re, $parsed) &&
1983                                         preg_match($newline_after_re, $text)    )
1984                                 )
1985                         {
1986                                 # Need to parse tag and following text using the HTML parser.
1987                                 list($block_text, $text) =
1988                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1989
1990                                 # Make sure it stays outside of any paragraph by adding newlines.
1991                                 $parsed .= "\n\n$block_text\n\n";
1992                         }
1993                         #
1994                         # Check for: Clean tag (like script, math)
1995                         #            HTML Comments, processing instructions.
1996                         #
1997                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1998                                 $tag{1} == '!' || $tag{1} == '?')
1999                         {
2000                                 # Need to parse tag and following text using the HTML parser.
2001                                 # (don't check for markdown attribute)
2002                                 list($block_text, $text) =
2003                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
2004
2005                                 $parsed .= $block_text;
2006                         }
2007                         #
2008                         # Check for: Tag with same name as enclosing tag.
2009                         #
2010                         else if ($enclosing_tag_re !== '' &&
2011                                 # Same name as enclosing tag.
2012                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2013                         {
2014                                 #
2015                                 # Increase/decrease nested tag count.
2016                                 #
2017                                 if ($tag{1} == '/')                                             $depth--;
2018                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
2019
2020                                 if ($depth < 0) {
2021                                         #
2022                                         # Going out of parent element. Clean up and break so we
2023                                         # return to the calling function.
2024                                         #
2025                                         $text = $tag . $text;
2026                                         break;
2027                                 }
2028
2029                                 $parsed .= $tag;
2030                         }
2031                         else {
2032                                 $parsed .= $tag;
2033                         }
2034                 } while ($depth >= 0);
2035
2036                 return array($parsed, $text);
2037         }
2038         function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2039         #
2040         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2041         #
2042         # *   Calls $hash_method to convert any blocks.
2043         # *   Stops when the first opening tag closes.
2044         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2045         #     (it is not inside clean tags)
2046         #
2047         # Returns an array of that form: ( processed text , remaining text )
2048         #
2049                 if ($text === '') return array('', '');
2050
2051                 # Regex to match `markdown` attribute inside of a tag.
2052                 $markdown_attr_re = '
2053                         {
2054                                 \s*                     # Eat whitespace before the `markdown` attribute
2055                                 markdown
2056                                 \s*=\s*
2057                                 (?>
2058                                         (["\'])         # $1: quote delimiter
2059                                         (.*?)           # $2: attribute value
2060                                         \1                      # matching delimiter
2061                                 |
2062                                         ([^\s>]*)       # $3: unquoted attribute value
2063                                 )
2064                                 ()                              # $4: make $3 always defined (avoid warnings)
2065                         }xs';
2066
2067                 # Regex to match any tag.
2068                 $tag_re = '{
2069                                 (                                       # $2: Capture hole tag.
2070                                         </?                                     # Any opening or closing tag.
2071                                                 [\w:$]+                 # Tag name.
2072                                                 (?:
2073                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2074                                                         (?>
2075                                                                 ".*?"           |       # Double quotes (can contain `>`)
2076                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
2077                                                                 .+?                             # Anything but quotes and `>`.
2078                                                         )*?
2079                                                 )?
2080                                         >                                       # End of tag.
2081                                 |
2082                                         <!--    .*?     -->     # HTML Comment
2083                                 |
2084                                         <\?.*?\?> | <%.*?%>     # Processing instruction
2085                                 |
2086                                         <!\[CDATA\[.*?\]\]>     # CData Block
2087                                 )
2088                         }xs';
2089
2090                 $original_text = $text;         # Save original text in case of faliure.
2091
2092                 $depth          = 0;    # Current depth inside the tag tree.
2093                 $block_text     = "";   # Temporary text holder for current text.
2094                 $parsed         = "";   # Parsed text that will be returned.
2095
2096                 #
2097                 # Get the name of the starting tag.
2098                 # (This pattern makes $base_tag_name_re safe without quoting.)
2099                 #
2100                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2101                         $base_tag_name_re = $matches[1];
2102
2103                 #
2104                 # Loop through every tag until we find the corresponding closing tag.
2105                 #
2106                 do {
2107                         #
2108                         # Split the text using the first $tag_match pattern found.
2109                         # Text before  pattern will be first in the array, text after
2110                         # pattern will be at the end, and between will be any catches made
2111                         # by the pattern.
2112                         #
2113                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2114
2115                         if (count($parts) < 3) {
2116                                 #
2117                                 # End of $text reached with unbalenced tag(s).
2118                                 # In that case, we return original text unchanged and pass the
2119                                 # first character as filtered to prevent an infinite loop in the
2120                                 # parent function.
2121                                 #
2122                                 return array($original_text{0}, substr($original_text, 1));
2123                         }
2124
2125                         $block_text .= $parts[0]; # Text before current tag.
2126                         $tag         = $parts[1]; # Tag to handle.
2127                         $text        = $parts[2]; # Remaining text after current tag.
2128
2129                         #
2130                         # Check for: Auto-close tag (like <hr/>)
2131                         #                        Comments and Processing Instructions.
2132                         #
2133                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2134                                 $tag{1} == '!' || $tag{1} == '?')
2135                         {
2136                                 # Just add the tag to the block as if it was text.
2137                                 $block_text .= $tag;
2138                         }
2139                         else {
2140                                 #
2141                                 # Increase/decrease nested tag count. Only do so if
2142                                 # the tag's name match base tag's.
2143                                 #
2144                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2145                                         if ($tag{1} == '/')                                             $depth--;
2146                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
2147                                 }
2148
2149                                 #
2150                                 # Check for `markdown="1"` attribute and handle it.
2151                                 #
2152                                 if ($md_attr &&
2153                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
2154                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2155                                 {
2156                                         # Remove `markdown` attribute from opening tag.
2157                                         $tag = preg_replace($markdown_attr_re, '', $tag);
2158
2159                                         # Check if text inside this tag must be parsed in span mode.
2160                                         $this->mode = $attr_m[2] . $attr_m[3];
2161                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2162                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2163
2164                                         # Calculate indent before tag.
2165                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2166                                                 $strlen = $this->utf8_strlen;
2167                                                 $indent = $strlen($matches[1], 'UTF-8');
2168                                         } else {
2169                                                 $indent = 0;
2170                                         }
2171
2172                                         # End preceding block with this tag.
2173                                         $block_text .= $tag;
2174                                         $parsed .= $this->$hash_method($block_text);
2175
2176                                         # Get enclosing tag name for the ParseMarkdown function.
2177                                         # (This pattern makes $tag_name_re safe without quoting.)
2178                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2179                                         $tag_name_re = $matches[1];
2180
2181                                         # Parse the content using the HTML-in-Markdown parser.
2182                                         list ($block_text, $text)
2183                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2184                                                         $tag_name_re, $span_mode);
2185
2186                                         # Outdent markdown text.
2187                                         if ($indent > 0) {
2188                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2189                                                                                                         $block_text);
2190                                         }
2191
2192                                         # Append tag content to parsed text.
2193                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2194                                         else                            $parsed .= "$block_text";
2195
2196                                         # Start over a new block.
2197                                         $block_text = "";
2198                                 }
2199                                 else $block_text .= $tag;
2200                         }
2201
2202                 } while ($depth > 0);
2203
2204                 #
2205                 # Hash last block text that wasn't processed inside the loop.
2206                 #
2207                 $parsed .= $this->$hash_method($block_text);
2208
2209                 return array($parsed, $text);
2210         }
2211
2212
2213         function hashClean($text) {
2214         #
2215         # Called whenever a tag must be hashed when a function insert a "clean" tag
2216         # in $text, it pass through this function and is automaticaly escaped,
2217         # blocking invalid nested overlap.
2218         #
2219                 return $this->hashPart($text, 'C');
2220         }
2221
2222
2223         function doHeaders($text) {
2224         #
2225         # Redefined to add id attribute support.
2226         #
2227                 # Setext-style headers:
2228                 #         Header 1  {#header1}
2229                 #         ========
2230                 #
2231                 #         Header 2  {#header2}
2232                 #         --------
2233                 #
2234                 $text = preg_replace_callback(
2235                         '{
2236                                 (^.+?)                                                          # $1: Header text
2237                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?        # $2: Id attribute
2238                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
2239                         }mx',
2240                         array(&$this, '_doHeaders_callback_setext'), $text);
2241
2242                 # atx-style headers:
2243                 #       # Header 1        {#header1}
2244                 #       ## Header 2       {#header2}
2245                 #       ## Header 2 with closing hashes ##  {#header3}
2246                 #       ...
2247                 #       ###### Header 6   {#header2}
2248                 #
2249                 $text = preg_replace_callback('{
2250                                 ^(\#{1,6})      # $1 = string of #\'s
2251                                 [ ]*
2252                                 (.+?)           # $2 = Header text
2253                                 [ ]*
2254                                 \#*                     # optional closing #\'s (not counted)
2255                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2256                                 [ ]*
2257                                 \n+
2258                         }xm',
2259                         array(&$this, '_doHeaders_callback_atx'), $text);
2260
2261                 return $text;
2262         }
2263         function _doHeaders_attr($attr) {
2264                 if (empty($attr))  return "";
2265                 return " id=\"$attr\"";
2266         }
2267         function _doHeaders_callback_setext($matches) {
2268                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2269                         return $matches[0];
2270                 $level = $matches[3]{0} == '=' ? 1 : 2;
2271                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2272                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2273                 return "\n" . $this->hashBlock($block) . "\n\n";
2274         }
2275         function _doHeaders_callback_atx($matches) {
2276                 $level = strlen($matches[1]);
2277                 $attr  = $this->_doHeaders_attr($id =& $matches[3]);
2278                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2279                 return "\n" . $this->hashBlock($block) . "\n\n";
2280         }
2281
2282
2283         function doTables($text) {
2284         #
2285         # Form HTML tables.
2286         #
2287                 $less_than_tab = $this->tab_width - 1;
2288                 #
2289                 # Find tables with leading pipe.
2290                 #
2291                 #       | Header 1 | Header 2
2292                 #       | -------- | --------
2293                 #       | Cell 1   | Cell 2
2294                 #       | Cell 3   | Cell 4
2295                 #
2296                 $text = preg_replace_callback('
2297                         {
2298                                 ^                                                       # Start of a line
2299                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2300                                 [|]                                                     # Optional leading pipe (present)
2301                                 (.+) \n                                         # $1: Header row (at least one pipe)
2302
2303                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2304                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2305
2306                                 (                                                       # $3: Cells
2307                                         (?>
2308                                                 [ ]*                            # Allowed whitespace.
2309                                                 [|] .* \n                       # Row content.
2310                                         )*
2311                                 )
2312                                 (?=\n|\Z)                                       # Stop at final double newline.
2313                         }xm',
2314                         array(&$this, '_doTable_leadingPipe_callback'), $text);
2315
2316                 #
2317                 # Find tables without leading pipe.
2318                 #
2319                 #       Header 1 | Header 2
2320                 #       -------- | --------
2321                 #       Cell 1   | Cell 2
2322                 #       Cell 3   | Cell 4
2323                 #
2324                 $text = preg_replace_callback('
2325                         {
2326                                 ^                                                       # Start of a line
2327                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2328                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2329
2330                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2331                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2332
2333                                 (                                                       # $3: Cells
2334                                         (?>
2335                                                 .* [|] .* \n            # Row content
2336                                         )*
2337                                 )
2338                                 (?=\n|\Z)                                       # Stop at final double newline.
2339                         }xm',
2340                         array(&$this, '_DoTable_callback'), $text);
2341
2342                 return $text;
2343         }
2344         function _doTable_leadingPipe_callback($matches) {
2345                 $head           = $matches[1];
2346                 $underline      = $matches[2];
2347                 $content        = $matches[3];
2348
2349                 # Remove leading pipe for each row.
2350                 $content        = preg_replace('/^ *[|]/m', '', $content);
2351
2352                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2353         }
2354         function _doTable_callback($matches) {
2355                 $head           = $matches[1];
2356                 $underline      = $matches[2];
2357                 $content        = $matches[3];
2358
2359                 # Remove any tailing pipes for each line.
2360                 $head           = preg_replace('/[|] *$/m', '', $head);
2361                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2362                 $content        = preg_replace('/[|] *$/m', '', $content);
2363
2364                 # Reading alignement from header underline.
2365                 $separators     = preg_split('/ *[|] */', $underline);
2366                 foreach ($separators as $n => $s) {
2367                         if (preg_match('/^ *-+: *$/', $s))              $attr[$n] = ' align="right"';
2368                         else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2369                         else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2370                         else                                                                    $attr[$n] = '';
2371                 }
2372
2373                 # Parsing span elements, including code spans, character escapes,
2374                 # and inline HTML tags, so that pipes inside those gets ignored.
2375                 $head           = $this->parseSpan($head);
2376                 $headers        = preg_split('/ *[|] */', $head);
2377                 $col_count      = count($headers);
2378
2379                 # Write column headers.
2380                 $text = "<table>\n";
2381                 $text .= "<thead>\n";
2382                 $text .= "<tr>\n";
2383                 foreach ($headers as $n => $header)
2384                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2385                 $text .= "</tr>\n";
2386                 $text .= "</thead>\n";
2387
2388                 # Split content by row.
2389                 $rows = explode("\n", trim($content, "\n"));
2390
2391                 $text .= "<tbody>\n";
2392                 foreach ($rows as $row) {
2393                         # Parsing span elements, including code spans, character escapes,
2394                         # and inline HTML tags, so that pipes inside those gets ignored.
2395                         $row = $this->parseSpan($row);
2396
2397                         # Split row by cell.
2398                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2399                         $row_cells = array_pad($row_cells, $col_count, '');
2400
2401                         $text .= "<tr>\n";
2402                         foreach ($row_cells as $n => $cell)
2403                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2404                         $text .= "</tr>\n";
2405                 }
2406                 $text .= "</tbody>\n";
2407                 $text .= "</table>";
2408
2409                 return $this->hashBlock($text) . "\n";
2410         }
2411
2412
2413         function doDefLists($text) {
2414         #
2415         # Form HTML definition lists.
2416         #
2417                 $less_than_tab = $this->tab_width - 1;
2418
2419                 # Re-usable pattern to match any entire dl list:
2420                 $whole_list_re = '(?>
2421                         (                                                               # $1 = whole list
2422                           (                                                             # $2
2423                                 [ ]{0,'.$less_than_tab.'}
2424                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2425                                 \n?
2426                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2427                           )
2428                           (?s:.+?)
2429                           (                                                             # $4
2430                                   \z
2431                                 |
2432                                   \n{2,}
2433                                   (?=\S)
2434                                   (?!                                           # Negative lookahead for another term
2435                                         [ ]{0,'.$less_than_tab.'}
2436                                         (?: \S.*\n )+?                  # defined term
2437                                         \n?
2438                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2439                                   )
2440                                   (?!                                           # Negative lookahead for another definition
2441                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2442                                   )
2443                           )
2444                         )
2445                 )'; // mx
2446
2447                 $text = preg_replace_callback('{
2448                                 (?>\A\n?|(?<=\n\n))
2449                                 '.$whole_list_re.'
2450                         }mx',
2451                         array(&$this, '_doDefLists_callback'), $text);
2452
2453                 return $text;
2454         }
2455         function _doDefLists_callback($matches) {
2456                 # Re-usable patterns to match list item bullets and number markers:
2457                 $list = $matches[1];
2458
2459                 # Turn double returns into triple returns, so that we can make a
2460                 # paragraph for the last item in a list, if necessary:
2461                 $result = trim($this->processDefListItems($list));
2462                 $result = "<dl>\n" . $result . "\n</dl>";
2463                 return $this->hashBlock($result) . "\n\n";
2464         }
2465
2466
2467         function processDefListItems($list_str) {
2468         #
2469         #       Process the contents of a single definition list, splitting it
2470         #       into individual term and definition list items.
2471         #
2472                 $less_than_tab = $this->tab_width - 1;
2473
2474                 # trim trailing blank lines:
2475                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2476
2477                 # Process definition terms.
2478                 $list_str = preg_replace_callback('{
2479                         (?>\A\n?|\n\n+)                                 # leading line
2480                         (                                                               # definition terms = $1
2481                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2482                                 (?![:][ ]|[ ])                          # negative lookahead for a definition
2483                                                                                         #   mark (colon) or more whitespace.
2484                                 (?> \S.* \n)+?                          # actual term (not whitespace).
2485                         )
2486                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
2487                                                                                         #   with a definition mark.
2488                         }xm',
2489                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
2490
2491                 # Process actual definitions.
2492                 $list_str = preg_replace_callback('{
2493                         \n(\n+)?                                                # leading line = $1
2494                         (                                                               # marker space = $2
2495                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
2496                                 [:][ ]+                                         # definition mark (colon)
2497                         )
2498                         ((?s:.+?))                                              # definition text = $3
2499                         (?= \n+                                                 # stop at next definition mark,
2500                                 (?:                                                     # next term or end of text
2501                                         [ ]{0,'.$less_than_tab.'} [:][ ]        |
2502                                         <dt> | \z
2503                                 )
2504                         )
2505                         }xm',
2506                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
2507
2508                 return $list_str;
2509         }
2510         function _processDefListItems_callback_dt($matches) {
2511                 $terms = explode("\n", trim($matches[1]));
2512                 $text = '';
2513                 foreach ($terms as $term) {
2514                         $term = $this->runSpanGamut(trim($term));
2515                         $text .= "\n<dt>" . $term . "</dt>";
2516                 }
2517                 return $text . "\n";
2518         }
2519         function _processDefListItems_callback_dd($matches) {
2520                 $leading_line   = $matches[1];
2521                 $marker_space   = $matches[2];
2522                 $def                    = $matches[3];
2523
2524                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2525                         # Replace marker with the appropriate whitespace indentation
2526                         $def = str_repeat(' ', strlen($marker_space)) . $def;
2527                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2528                         $def = "\n". $def ."\n";
2529                 }
2530                 else {
2531                         $def = rtrim($def);
2532                         $def = $this->runSpanGamut($this->outdent($def));
2533                 }
2534
2535                 return "\n<dd>" . $def . "</dd>\n";
2536         }
2537
2538
2539         function doFencedCodeBlocks($text) {
2540         #
2541         # Adding the fenced code block syntax to regular Markdown:
2542         #
2543         # ~~~
2544         # Code block
2545         # ~~~
2546         #
2547                 $less_than_tab = $this->tab_width;
2548
2549                 $text = preg_replace_callback('{
2550                                 (?:\n|\A)
2551                                 # 1: Opening marker
2552                                 (
2553                                         ~{3,} # Marker: three tilde or more.
2554                                 )
2555                                 [ ]* \n # Whitespace and newline following marker.
2556
2557                                 # 2: Content
2558                                 (
2559                                         (?>
2560                                                 (?!\1 [ ]* \n)  # Not a closing marker.
2561                                                 .*\n+
2562                                         )+
2563                                 )
2564
2565                                 # Closing marker.
2566                                 \1 [ ]* \n
2567                         }xm',
2568                         array(&$this, '_doFencedCodeBlocks_callback'), $text);
2569
2570                 return $text;
2571         }
2572         function _doFencedCodeBlocks_callback($matches) {
2573                 $codeblock = $matches[2];
2574                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2575                 $codeblock = preg_replace_callback('/^\n+/',
2576                         array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2577                 $codeblock = "<pre><code>$codeblock</code></pre>";
2578                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2579         }
2580         function _doFencedCodeBlocks_newlines($matches) {
2581                 return str_repeat("<br$this->empty_element_suffix",
2582                         strlen($matches[0]));
2583         }
2584
2585
2586         #
2587         # Redefining emphasis markers so that emphasis by underscore does not
2588         # work in the middle of a word.
2589         #
2590         var $em_relist = array(
2591                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![.,:;]\s)',
2592                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2593                 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2594                 );
2595         var $strong_relist = array(
2596                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![.,:;]\s)',
2597                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2598                 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
2599                 );
2600         var $em_strong_relist = array(
2601                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![.,:;]\s)',
2602                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
2603                 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
2604                 );
2605
2606
2607         function formParagraphs($text) {
2608         #
2609         #       Params:
2610         #               $text - string to process with html <p> tags
2611         #
2612                 # Strip leading and trailing lines:
2613                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2614
2615                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2616
2617                 #
2618                 # Wrap <p> tags and unhashify HTML blocks
2619                 #
2620                 foreach ($grafs as $key => $value) {
2621                         $value = trim($this->runSpanGamut($value));
2622
2623                         # Check if this should be enclosed in a paragraph.
2624                         # Clean tag hashes & block tag hashes are left alone.
2625                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2626
2627                         if ($is_p) {
2628                                 $value = "<p>$value</p>";
2629                         }
2630                         $grafs[$key] = $value;
2631                 }
2632
2633                 # Join grafs in one text, then unhash HTML tags.
2634                 $text = implode("\n\n", $grafs);
2635
2636                 # Finish by removing any tag hashes still present in $text.
2637                 $text = $this->unhash($text);
2638
2639                 return $text;
2640         }
2641
2642
2643         ### Footnotes
2644
2645         function stripFootnotes($text) {
2646         #
2647         # Strips link definitions from text, stores the URLs and titles in
2648         # hash references.
2649         #
2650                 $less_than_tab = $this->tab_width - 1;
2651
2652                 # Link defs are in the form: [^id]: url "optional title"
2653                 $text = preg_replace_callback('{
2654                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2655                           [ ]*
2656                           \n?                                   # maybe *one* newline
2657                         (                                               # text = $2 (no blank lines allowed)
2658                                 (?:
2659                                         .+                              # actual text
2660                                 |
2661                                         \n                              # newlines but
2662                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2663                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2664                                                                         # by non-indented content
2665                                 )*
2666                         )
2667                         }xm',
2668                         array(&$this, '_stripFootnotes_callback'),
2669                         $text);
2670                 return $text;
2671         }
2672         function _stripFootnotes_callback($matches) {
2673                 $note_id = $this->fn_id_prefix . $matches[1];
2674                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2675                 return ''; # String that will replace the block
2676         }
2677
2678
2679         function doFootnotes($text) {
2680         #
2681         # Replace footnote references in $text [^id] with a special text-token
2682         # which will be replaced by the actual footnote marker in appendFootnotes.
2683         #
2684                 if (!$this->in_anchor) {
2685                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2686                 }
2687                 return $text;
2688         }
2689
2690
2691         function appendFootnotes($text) {
2692         #
2693         # Append footnote list to text.
2694         #
2695                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2696                         array(&$this, '_appendFootnotes_callback'), $text);
2697
2698                 if (!empty($this->footnotes_ordered)) {
2699                         $text .= "\n\n";
2700                         $text .= "<div class=\"footnotes\">\n";
2701                         $text .= "<hr". $this->empty_element_suffix ."\n";
2702                         $text .= "<ol>\n\n";
2703
2704                         $attr = " rev=\"footnote\"";
2705                         if ($this->fn_backlink_class != "") {
2706                                 $class = $this->fn_backlink_class;
2707                                 $class = $this->encodeAttribute($class);
2708                                 $attr .= " class=\"$class\"";
2709                         }
2710                         if ($this->fn_backlink_title != "") {
2711                                 $title = $this->fn_backlink_title;
2712                                 $title = $this->encodeAttribute($title);
2713                                 $attr .= " title=\"$title\"";
2714                         }
2715                         $num = 0;
2716
2717                         while (!empty($this->footnotes_ordered)) {
2718                                 $footnote = reset($this->footnotes_ordered);
2719                                 $note_id = key($this->footnotes_ordered);
2720                                 unset($this->footnotes_ordered[$note_id]);
2721
2722                                 $footnote .= "\n"; # Need to append newline before parsing.
2723                                 $footnote = $this->runBlockGamut("$footnote\n");
2724                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2725                                         array(&$this, '_appendFootnotes_callback'), $footnote);
2726
2727                                 $attr = str_replace("%%", ++$num, $attr);
2728                                 $note_id = $this->encodeAttribute($note_id);
2729
2730                                 # Add backlink to last paragraph; create new paragraph if needed.
2731                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2732                                 if (preg_match('{</p>$}', $footnote)) {
2733                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2734                                 } else {
2735                                         $footnote .= "\n\n<p>$backlink</p>";
2736                                 }
2737
2738                                 $text .= "<li id=\"fn:$note_id\">\n";
2739                                 $text .= $footnote . "\n";
2740                                 $text .= "</li>\n\n";
2741                         }
2742
2743                         $text .= "</ol>\n";
2744                         $text .= "</div>";
2745                 }
2746                 return $text;
2747         }
2748         function _appendFootnotes_callback($matches) {
2749                 $node_id = $this->fn_id_prefix . $matches[1];
2750
2751                 # Create footnote marker only if it has a corresponding footnote *and*
2752                 # the footnote hasn't been used by another marker.
2753                 if (isset($this->footnotes[$node_id])) {
2754                         # Transfert footnote content to the ordered list.
2755                         $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2756                         unset($this->footnotes[$node_id]);
2757
2758                         $num = $this->footnote_counter++;
2759                         $attr = " rel=\"footnote\"";
2760                         if ($this->fn_link_class != "") {
2761                                 $class = $this->fn_link_class;
2762                                 $class = $this->encodeAttribute($class);
2763                                 $attr .= " class=\"$class\"";
2764                         }
2765                         if ($this->fn_link_title != "") {
2766                                 $title = $this->fn_link_title;
2767                                 $title = $this->encodeAttribute($title);
2768                                 $attr .= " title=\"$title\"";
2769                         }
2770
2771                         $attr = str_replace("%%", $num, $attr);
2772                         $node_id = $this->encodeAttribute($node_id);
2773
2774                         return
2775                                 "<sup id=\"fnref:$node_id\">".
2776                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2777                                 "</sup>";
2778                 }
2779
2780                 return "[^".$matches[1]."]";
2781         }
2782
2783
2784         ### Abbreviations ###
2785
2786         function stripAbbreviations($text) {
2787         #
2788         # Strips abbreviations from text, stores titles in hash references.
2789         #
2790                 $less_than_tab = $this->tab_width - 1;
2791
2792                 # Link defs are in the form: [id]*: url "optional title"
2793                 $text = preg_replace_callback('{
2794                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
2795                         (.*)                                    # text = $2 (no blank lines allowed)
2796                         }xm',
2797                         array(&$this, '_stripAbbreviations_callback'),
2798                         $text);
2799                 return $text;
2800         }
2801         function _stripAbbreviations_callback($matches) {
2802                 $abbr_word = $matches[1];
2803                 $abbr_desc = $matches[2];
2804                 if ($this->abbr_word_re)
2805                         $this->abbr_word_re .= '|';
2806                 $this->abbr_word_re .= preg_quote($abbr_word);
2807                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2808                 return ''; # String that will replace the block
2809         }
2810
2811
2812         function doAbbreviations($text) {
2813         #
2814         # Find defined abbreviations in text and wrap them in <abbr> elements.
2815         #
2816                 if ($this->abbr_word_re) {
2817                         // cannot use the /x modifier because abbr_word_re may
2818                         // contain significant spaces:
2819                         $text = preg_replace_callback('{'.
2820                                 '(?<![\w\x1A])'.
2821                                 '(?:'.$this->abbr_word_re.')'.
2822                                 '(?![\w\x1A])'.
2823                                 '}',
2824                                 array(&$this, '_doAbbreviations_callback'), $text);
2825                 }
2826                 return $text;
2827         }
2828         function _doAbbreviations_callback($matches) {
2829                 $abbr = $matches[0];
2830                 if (isset($this->abbr_desciptions[$abbr])) {
2831                         $desc = $this->abbr_desciptions[$abbr];
2832                         if (empty($desc)) {
2833                                 return $this->hashPart("<abbr>$abbr</abbr>");
2834                         } else {
2835                                 $desc = $this->encodeAttribute($desc);
2836                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
2837                         }
2838                 } else {
2839                         return $matches[0];
2840                 }
2841         }
2842
2843 }
2844
2845
2846 /*
2847
2848 PHP Markdown Extra
2849 ==================
2850
2851 Description
2852 -----------
2853
2854 This is a PHP port of the original Markdown formatter written in Perl
2855 by John Gruber. This special "Extra" version of PHP Markdown features
2856 further enhancements to the syntax for making additional constructs
2857 such as tables and definition list.
2858
2859 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2860 easy-to-write structured text format into HTML. Markdown's text format
2861 is most similar to that of plain text email, and supports features such
2862 as headers, *emphasis*, code blocks, blockquotes, and links.
2863
2864 Markdown's syntax is designed not as a generic markup language, but
2865 specifically to serve as a front-end to (X)HTML. You can use span-level
2866 HTML tags anywhere in a Markdown document, and you can use block level
2867 HTML tags (like <div> and <table> as well).
2868
2869 For more information about Markdown's syntax, see:
2870
2871 <http://daringfireball.net/projects/markdown/>
2872
2873
2874 Bugs
2875 ----
2876
2877 To file bug reports please send email to:
2878
2879 <michel.fortin@michelf.com>
2880
2881 Please include with your report: (1) the example input; (2) the output you
2882 expected; (3) the output Markdown actually produced.
2883
2884
2885 Version History
2886 ---------------
2887
2888 See the readme file for detailed release notes for this version.
2889
2890
2891 Copyright and License
2892 ---------------------
2893
2894 PHP Markdown & Extra
2895 Copyright (c) 2004-2009 Michel Fortin
2896 <http://michelf.com/>
2897 All rights reserved.
2898
2899 Based on Markdown
2900 Copyright (c) 2003-2006 John Gruber
2901 <http://daringfireball.net/>
2902 All rights reserved.
2903
2904 Redistribution and use in source and binary forms, with or without
2905 modification, are permitted provided that the following conditions are
2906 met:
2907
2908 *       Redistributions of source code must retain the above copyright notice,
2909         this list of conditions and the following disclaimer.
2910
2911 *       Redistributions in binary form must reproduce the above copyright
2912         notice, this list of conditions and the following disclaimer in the
2913         documentation and/or other materials provided with the distribution.
2914
2915 *       Neither the name "Markdown" nor the names of its contributors may
2916         be used to endorse or promote products derived from this software
2917         without specific prior written permission.
2918
2919 This software is provided by the copyright holders and contributors "as
2920 is" and any express or implied warranties, including, but not limited
2921 to, the implied warranties of merchantability and fitness for a
2922 particular purpose are disclaimed. In no event shall the copyright owner
2923 or contributors be liable for any direct, indirect, incidental, special,
2924 exemplary, or consequential damages (including, but not limited to,
2925 procurement of substitute goods or services; loss of use, data, or
2926 profits; or business interruption) however caused and on any theory of
2927 liability, whether in contract, strict liability, or tort (including
2928 negligence or otherwise) arising in any way out of the use of this
2929 software, even if advised of the possibility of such damage.
2930
2931 */
2932 ?>