lib/markdown.php

   1 <?php
   2
   3 #
   4 # Markdown  -  A text-to-HTML conversion tool for web writers
   5 #
   6 # Copyright (c) 2004 John Gruber
   7 # <http://daringfireball.net/projects/markdown/>
   8 #
   9 # Copyright (c) 2004-2005 Michel Fortin - PHP Port
  10 # <http://www.michelf.com/projects/php-markdown/>
  11 #
  12
  13
  14 global    $MarkdownPHPVersion, $MarkdownSyntaxVersion,
  15         $md_empty_element_suffix, $md_tab_width,
  16         $md_nested_brackets_depth, $md_nested_brackets,
  17         $md_escape_table, $md_backslash_escape_table,
  18         $md_list_level;
  19
  20 $MarkdownPHPVersion    = '1.0.1a'; # Fri 15 Apr 2005
  21 $MarkdownSyntaxVersion = '1.0.1';  # Sun 12 Dec 2004
  22
  23
  24 #
  25 # Global default settings:
  26 #
  27 $md_empty_element_suffix = " />";     # Change to ">" for HTML output
  28 $md_tab_width = 4;
  29
  30
  31 # -- WordPress Plugin Interface -----------------------------------------------
  32 /*
  33 Plugin Name: Markdown
  34 Plugin URI: http://www.michelf.com/projects/php-markdown/
  35 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  36 Version: 1.0.1a
  37 Author: Michel Fortin
  38 Author URI: http://www.michelf.com/
  39 */
  40 if (isset($wp_version)) {
  41     # Remove default WordPress auto-paragraph filter.
  42     remove_filter('the_content',  'wpautop');
  43     remove_filter('the_excerpt',  'wpautop');
  44     remove_filter('comment_text', 'wpautop');
  45     # Add Markdown filter with priority 6 (same as Textile).
  46     add_filter('the_content',     'Markdown', 6);
  47     add_filter('the_excerpt',     'Markdown', 6);
  48     add_filter('the_excerpt_rss', 'Markdown', 6);
  49     add_filter('comment_text',    'Markdown', 6);
  50     add_filter('comment_excerpt', 'Markdown', 6);
  51
  52     # Postpone the not-allowed-tag-filter until Markdown has run. For comments,
  53     # it would probably be better to filter with Markdown before they are
  54     # added in the database, but doing this would break older sites.
  55     remove_filter('pre_comment_content', 'wp_filter_kses');
  56     add_filter('comment_text', 'wp_filter_kses', 45);
  57
  58     # Make balenceTags work *after* Markdown. You can still disable
  59     # balanceTags from the admin interface (in Options > Writing).
  60     remove_filter('content_save_pre', 'balanceTags', 50);
  61     remove_filter('excerpt_save_pre', 'balanceTags', 50);
  62     remove_filter('comment_save_pre', 'balanceTags', 50);
  63     add_filter('the_content',     'balanceTags', 50);
  64     add_filter('the_excerpt',     'balanceTags', 50);
  65     add_filter('the_excerpt_rss', 'balanceTags', 50);
  66     add_filter('comment_text',    'balanceTags', 50);
  67     add_filter('comment_excerpt', 'balanceTags', 50);
  68 }
  69
  70
  71 # -- bBlog Plugin Info --------------------------------------------------------
  72 function identify_modifier_markdown() {
  73     global $MarkdownPHPVersion;
  74     return array(
  75         'name'            => 'markdown',
  76         'type'            => 'modifier',
  77         'nicename'        => 'Markdown',
  78         'description'    => 'A text-to-HTML conversion tool for web writers',
  79         'authors'        => 'Michel Fortin and John Gruber',
  80         'licence'        => 'GPL',
  81         'version'        => $MarkdownPHPVersion,
  82         'help'            => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
  83     );
  84 }
  85
  86 # -- Smarty Modifier Interface ------------------------------------------------
  87 function smarty_modifier_markdown($text) {
  88     return Markdown($text);
  89 }
  90
  91 # -- Textile Compatibility Mode -----------------------------------------------
  92 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
  93 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  94     # Try to include PHP SmartyPants. Should be in the same directory.
  95     @include_once 'smartypants.php';
  96     # Fake Textile class. It calls Markdown instead.
  97     class Textile {
  98         function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
  99             if ($lite == '' && $encode == '')   $text = Markdown($text);
 100             if (function_exists('SmartyPants')) $text = SmartyPants($text);
 101             return $text;
 102         }
 103     }
 104 }
 105
 106
 107
 108 #
 109 # Globals:
 110 #
 111
 112 # Regex to match balanced [brackets].
 113 # Needed to insert a maximum bracked depth while converting to PHP.
 114 $md_nested_brackets_depth = 6;
 115 $md_nested_brackets =
 116     str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
 117     str_repeat('\])*', $md_nested_brackets_depth);
 118
 119 # Table of hash values for escaped characters:
 120 $md_escape_table = array(
 121     "\\" => md5("\\"),
 122     "`" => md5("`"),
 123     "*" => md5("*"),
 124     "_" => md5("_"),
 125     "{" => md5("{"),
 126     "}" => md5("}"),
 127     "[" => md5("["),
 128     "]" => md5("]"),
 129     "(" => md5("("),
 130     ")" => md5(")"),
 131     ">" => md5(">"),
 132     "#" => md5("#"),
 133     "+" => md5("+"),
 134     "-" => md5("-"),
 135     "." => md5("."),
 136     "!" => md5("!")
 137 );
 138 # Create an identical table but for escaped characters.
 139 $md_backslash_escape_table;
 140 foreach ($md_escape_table as $key => $char)
 141     $md_backslash_escape_table["\\$key"] = $char;
 142
 143
 144 function Markdown($text) {
 145 #
 146 # Main function. The order in which other subs are called here is
 147 # essential. Link and image substitutions need to happen before
 148 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 149 # and <img> tags get encoded.
 150 #
 151     # Clear the global hashes. If we don't clear these, you get conflicts
 152     # from other articles when generating a page which contains more than
 153     # one article (e.g. an index page that shows the N most recent
 154     # articles):
 155     global $md_urls, $md_titles, $md_html_blocks;
 156     $md_urls = array();
 157     $md_titles = array();
 158     $md_html_blocks = array();
 159
 160     # Standardize line endings:
 161     #   DOS to Unix and Mac to Unix
 162     $text = str_replace(array("\r\n", "\r"), "\n", $text);
 163
 164     # Make sure $text ends with a couple of newlines:
 165     $text .= "\n\n";
 166
 167     # Convert all tabs to spaces.
 168     $text = _Detab($text);
 169
 170     # Strip any lines consisting only of spaces and tabs.
 171     # This makes subsequent regexen easier to write, because we can
 172     # match consecutive blank lines with /\n+/ instead of something
 173     # contorted like /[ \t]*\n+/ .
 174     $text = preg_replace('/^[ \t]+$/m', '', $text);
 175
 176     # Turn block-level HTML blocks into hash entries
 177     $text = _HashHTMLBlocks($text);
 178
 179     # Strip link definitions, store in hashes.
 180     $text = _StripLinkDefinitions($text);
 181
 182     $text = _RunBlockGamut($text);
 183
 184     $text = _UnescapeSpecialChars($text);
 185
 186     return $text . "\n";
 187 }
 188
 189
 190 function _StripLinkDefinitions($text) {
 191 #
 192 # Strips link definitions from text, stores the URLs and titles in
 193 # hash references.
 194 #
 195     global $md_tab_width;
 196     $less_than_tab = $md_tab_width - 1;
 197
 198     # Link defs are in the form: ^[id]: url "optional title"
 199     $text = preg_replace_callback('{
 200                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\]:    # id = $1
 201                           [ \t]*
 202                           \n?                # maybe *one* newline
 203                           [ \t]*
 204                         <?(\S+?)>?            # url = $2
 205                           [ \t]*
 206                           \n?                # maybe one newline
 207                           [ \t]*
 208                         (?:
 209                             (?<=\s)            # lookbehind for whitespace
 210                             ["(]
 211                             (.+?)            # title = $3
 212                             [")]
 213                             [ \t]*
 214                         )?    # title is optional
 215                         (?:\n+|\Z)
 216         }xm',
 217         '_StripLinkDefinitions_callback',
 218         $text);
 219     return $text;
 220 }
 221 function _StripLinkDefinitions_callback($matches) {
 222     global $md_urls, $md_titles;
 223     $link_id = strtolower($matches[1]);
 224     $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
 225     if (isset($matches[3]))
 226         $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
 227     return ''; # String that will replace the block
 228 }
 229
 230
 231 function _HashHTMLBlocks($text) {
 232     global $md_tab_width;
 233     $less_than_tab = $md_tab_width - 1;
 234
 235     # Hashify HTML blocks:
 236     # We only want to do this for block-level HTML tags, such as headers,
 237     # lists, and tables. That's because we still want to wrap <p>s around
 238     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 239     # phrase emphasis, and spans. The list of tags we're looking for is
 240     # hard-coded:
 241     $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
 242                     'script|noscript|form|fieldset|iframe|math|ins|del';
 243     $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
 244                     'script|noscript|form|fieldset|iframe|math';
 245
 246     # First, look for nested blocks, e.g.:
 247     #     <div>
 248     #         <div>
 249     #         tags for inner block must be indented.
 250     #         </div>
 251     #     </div>
 252     #
 253     # The outermost tags must start at the left margin for this to match, and
 254     # the inner nested divs must be indented.
 255     # We need to do this before the next, more liberal match, because the next
 256     # match will start at the first `<div>` and stop at the first `</div>`.
 257     $text = preg_replace_callback("{
 258                 (                        # save in $1
 259                     ^                    # start of line  (with /m)
 260                     <($block_tags_a)    # start tag = $2
 261                     \\b                    # word break
 262                     (.*\\n)*?            # any number of lines, minimally matching
 263                     </\\2>                # the matching end tag
 264                     [ \\t]*                # trailing spaces/tabs
 265                     (?=\\n+|\\Z)    # followed by a newline or end of document
 266                 )
 267         }xm",
 268         '_HashHTMLBlocks_callback',
 269         $text);
 270
 271     #
 272     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 273     #
 274     $text = preg_replace_callback("{
 275                 (                        # save in $1
 276                     ^                    # start of line  (with /m)
 277                     <($block_tags_b)    # start tag = $2
 278                     \\b                    # word break
 279                     (.*\\n)*?            # any number of lines, minimally matching
 280                     .*</\\2>                # the matching end tag
 281                     [ \\t]*                # trailing spaces/tabs
 282                     (?=\\n+|\\Z)    # followed by a newline or end of document
 283                 )
 284         }xm",
 285         '_HashHTMLBlocks_callback',
 286         $text);
 287
 288     # Special case just for <hr />. It was easier to make a special case than
 289     # to make the other regex more complicated.
 290     $text = preg_replace_callback('{
 291                 (?:
 292                     (?<=\n\n)        # Starting after a blank line
 293                     |                # or
 294                     \A\n?            # the beginning of the doc
 295                 )
 296                 (                        # save in $1
 297                     [ ]{0,'.$less_than_tab.'}
 298                     <(hr)                # start tag = $2
 299                     \b                    # word break
 300                     ([^<>])*?            #
 301                     /?>                    # the matching end tag
 302                     [ \t]*
 303                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
 304                 )
 305         }x',
 306         '_HashHTMLBlocks_callback',
 307         $text);
 308
 309     # Special case for standalone HTML comments:
 310     $text = preg_replace_callback('{
 311                 (?:
 312                     (?<=\n\n)        # Starting after a blank line
 313                     |                # or
 314                     \A\n?            # the beginning of the doc
 315                 )
 316                 (                        # save in $1
 317                     [ ]{0,'.$less_than_tab.'}
 318                     (?s:
 319                         <!
 320                         (--.*?--\s*)+
 321                         >
 322                     )
 323                     [ \t]*
 324                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
 325                 )
 326             }x',
 327             '_HashHTMLBlocks_callback',
 328             $text);
 329
 330     return $text;
 331 }
 332 function _HashHTMLBlocks_callback($matches) {
 333     global $md_html_blocks;
 334     $text = $matches[1];
 335     $key = md5($text);
 336     $md_html_blocks[$key] = $text;
 337     return "\n\n$key\n\n"; # String that will replace the block
 338 }
 339
 340
 341 function _RunBlockGamut($text) {
 342 #
 343 # These are all the transformations that form block-level
 344 # tags like paragraphs, headers, and list items.
 345 #
 346     global $md_empty_element_suffix;
 347
 348     $text = _DoHeaders($text);
 349
 350     # Do Horizontal Rules:
 351     $text = preg_replace(
 352         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
 353               '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
 354               '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
 355         "\n<hr$md_empty_element_suffix\n",
 356         $text);
 357
 358     $text = _DoLists($text);
 359
 360     $text = _DoCodeBlocks($text);
 361
 362     $text = _DoBlockQuotes($text);
 363
 364     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
 365     # was to escape raw HTML in the original Markdown source. This time,
 366     # we're escaping the markup we've just created, so that we don't wrap
 367     # <p> tags around block-level tags.
 368     $text = _HashHTMLBlocks($text);
 369
 370     $text = _FormParagraphs($text);
 371
 372     return $text;
 373 }
 374
 375
 376 function _RunSpanGamut($text) {
 377 #
 378 # These are all the transformations that occur *within* block-level
 379 # tags like paragraphs, headers, and list items.
 380 #
 381     global $md_empty_element_suffix;
 382
 383     $text = _DoCodeSpans($text);
 384
 385     $text = _EscapeSpecialChars($text);
 386
 387     # Process anchor and image tags. Images must come first,
 388     # because ![foo][f] looks like an anchor.
 389     $text = _DoImages($text);
 390     $text = _DoAnchors($text);
 391
 392     # Make links out of things like `<http://example.com/>`
 393     # Must come after _DoAnchors(), because you can use < and >
 394     # delimiters in inline links like [this](<url>).
 395     $text = _DoAutoLinks($text);
 396
 397     # Fix unencoded ampersands and <'s:
 398     $text = _EncodeAmpsAndAngles($text);
 399
 400     $text = _DoItalicsAndBold($text);
 401
 402     # Do hard breaks:
 403     $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
 404
 405     return $text;
 406 }
 407
 408
 409 function _EscapeSpecialChars($text) {
 410     global $md_escape_table;
 411     $tokens = _TokenizeHTML($text);
 412
 413     $text = '';   # rebuild $text from the tokens
 414 #    $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
 415 #    $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
 416
 417     foreach ($tokens as $cur_token) {
 418         if ($cur_token[0] == 'tag') {
 419             # Within tags, encode * and _ so they don't conflict
 420             # with their use in Markdown for italics and strong.
 421             # We're replacing each such character with its
 422             # corresponding MD5 checksum value; this is likely
 423             # overkill, but it should prevent us from colliding
 424             # with the escape values by accident.
 425             $cur_token[1] = str_replace(array('*', '_'),
 426                 array($md_escape_table['*'], $md_escape_table['_']),
 427                 $cur_token[1]);
 428             $text .= $cur_token[1];
 429         } else {
 430             $t = $cur_token[1];
 431             $t = _EncodeBackslashEscapes($t);
 432             $text .= $t;
 433         }
 434     }
 435     return $text;
 436 }
 437
 438
 439 function _DoAnchors($text) {
 440 #
 441 # Turn Markdown link shortcuts into XHTML <a> tags.
 442 #
 443     global $md_nested_brackets;
 444     #
 445     # First, handle reference-style links: [link text] [id]
 446     #
 447     $text = preg_replace_callback("{
 448         (                    # wrap whole match in $1
 449           \\[
 450             ($md_nested_brackets)    # link text = $2
 451           \\]
 452
 453           [ ]?                # one optional space
 454           (?:\\n[ ]*)?        # one optional newline followed by spaces
 455
 456           \\[
 457             (.*?)        # id = $3
 458           \\]
 459         )
 460         }xs",
 461         '_DoAnchors_reference_callback', $text);
 462
 463     #
 464     # Next, inline-style links: [link text](url "optional title")
 465     #
 466     $text = preg_replace_callback("{
 467         (                # wrap whole match in $1
 468           \\[
 469             ($md_nested_brackets)    # link text = $2
 470           \\]
 471           \\(            # literal paren
 472             [ \\t]*
 473             <?(.*?)>?    # href = $3
 474             [ \\t]*
 475             (            # $4
 476               (['\"])    # quote char = $5
 477               (.*?)        # Title = $6
 478               \\5        # matching quote
 479             )?            # title is optional
 480           \\)
 481         )
 482         }xs",
 483         '_DoAnchors_inline_callback', $text);
 484
 485     return $text;
 486 }
 487 function _DoAnchors_reference_callback($matches) {
 488     global $md_urls, $md_titles, $md_escape_table;
 489     $whole_match = $matches[1];
 490     $link_text   = $matches[2];
 491     $link_id     = strtolower($matches[3]);
 492
 493     if ($link_id == "") {
 494         $link_id = strtolower($link_text); # for shortcut links like [this][].
 495     }
 496
 497     if (isset($md_urls[$link_id])) {
 498         $url = $md_urls[$link_id];
 499         # We've got to encode these to avoid conflicting with italics/bold.
 500         $url = str_replace(array('*', '_'),
 501                            array($md_escape_table['*'], $md_escape_table['_']),
 502                            $url);
 503         $result = "<a href=\"$url\"";
 504         if ( isset( $md_titles[$link_id] ) ) {
 505             $title = $md_titles[$link_id];
 506             $title = str_replace(array('*',     '_'),
 507                                  array($md_escape_table['*'],
 508                                        $md_escape_table['_']), $title);
 509             $result .=  " title=\"$title\"";
 510         }
 511         $result .= ">$link_text</a>";
 512     }
 513     else {
 514         $result = $whole_match;
 515     }
 516     return $result;
 517 }
 518 function _DoAnchors_inline_callback($matches) {
 519     global $md_escape_table;
 520     $whole_match    = $matches[1];
 521     $link_text        = $matches[2];
 522     $url            = $matches[3];
 523     $title            =& $matches[6];
 524
 525     # We've got to encode these to avoid conflicting with italics/bold.
 526     $url = str_replace(array('*', '_'),
 527                        array($md_escape_table['*'], $md_escape_table['_']),
 528                        $url);
 529     $result = "<a href=\"$url\"";
 530     if (isset($title)) {
 531         $title = str_replace('"', '&quot;', $title);
 532         $title = str_replace(array('*', '_'),
 533                              array($md_escape_table['*'], $md_escape_table['_']),
 534                              $title);
 535         $result .=  " title=\"$title\"";
 536     }
 537
 538     $result .= ">$link_text</a>";
 539
 540     return $result;
 541 }
 542
 543
 544 function _DoImages($text) {
 545 #
 546 # Turn Markdown image shortcuts into <img> tags.
 547 #
 548     #
 549     # First, handle reference-style labeled images: ![alt text][id]
 550     #
 551     $text = preg_replace_callback('{
 552         (                # wrap whole match in $1
 553           !\[
 554             (.*?)        # alt text = $2
 555           \]
 556
 557           [ ]?                # one optional space
 558           (?:\n[ ]*)?        # one optional newline followed by spaces
 559
 560           \[
 561             (.*?)        # id = $3
 562           \]
 563
 564         )
 565         }xs',
 566         '_DoImages_reference_callback', $text);
 567
 568     #
 569     # Next, handle inline images:  ![alt text](url "optional title")
 570     # Don't forget: encode * and _
 571
 572     $text = preg_replace_callback("{
 573         (                # wrap whole match in $1
 574           !\\[
 575             (.*?)        # alt text = $2
 576           \\]
 577           \\(            # literal paren
 578             [ \\t]*
 579             <?(\S+?)>?    # src url = $3
 580             [ \\t]*
 581             (            # $4
 582               (['\"])    # quote char = $5
 583               (.*?)        # title = $6
 584               \\5        # matching quote
 585               [ \\t]*
 586             )?            # title is optional
 587           \\)
 588         )
 589         }xs",
 590         '_DoImages_inline_callback', $text);
 591
 592     return $text;
 593 }
 594 function _DoImages_reference_callback($matches) {
 595     global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
 596     $whole_match = $matches[1];
 597     $alt_text    = $matches[2];
 598     $link_id     = strtolower($matches[3]);
 599
 600     if ($link_id == "") {
 601         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 602     }
 603
 604     $alt_text = str_replace('"', '&quot;', $alt_text);
 605     if (isset($md_urls[$link_id])) {
 606         $url = $md_urls[$link_id];
 607         # We've got to encode these to avoid conflicting with italics/bold.
 608         $url = str_replace(array('*', '_'),
 609                            array($md_escape_table['*'], $md_escape_table['_']),
 610                            $url);
 611         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 612         if (isset($md_titles[$link_id])) {
 613             $title = $md_titles[$link_id];
 614             $title = str_replace(array('*', '_'),
 615                                  array($md_escape_table['*'],
 616                                        $md_escape_table['_']), $title);
 617             $result .=  " title=\"$title\"";
 618         }
 619         $result .= $md_empty_element_suffix;
 620     }
 621     else {
 622         # If there's no such link ID, leave intact:
 623         $result = $whole_match;
 624     }
 625
 626     return $result;
 627 }
 628 function _DoImages_inline_callback($matches) {
 629     global $md_empty_element_suffix, $md_escape_table;
 630     $whole_match    = $matches[1];
 631     $alt_text        = $matches[2];
 632     $url            = $matches[3];
 633     $title            = '';
 634     if (isset($matches[6])) {
 635         $title        = $matches[6];
 636     }
 637
 638     $alt_text = str_replace('"', '&quot;', $alt_text);
 639     $title    = str_replace('"', '&quot;', $title);
 640     # We've got to encode these to avoid conflicting with italics/bold.
 641     $url = str_replace(array('*', '_'),
 642                        array($md_escape_table['*'], $md_escape_table['_']),
 643                        $url);
 644     $result = "<img src=\"$url\" alt=\"$alt_text\"";
 645     if (isset($title)) {
 646         $title = str_replace(array('*', '_'),
 647                              array($md_escape_table['*'], $md_escape_table['_']),
 648                              $title);
 649         $result .=  " title=\"$title\""; # $title already quoted
 650     }
 651     $result .= $md_empty_element_suffix;
 652
 653     return $result;
 654 }
 655
 656
 657 function _DoHeaders($text) {
 658     # Setext-style headers:
 659     #      Header 1
 660     #      ========
 661     #
 662     #      Header 2
 663     #      --------
 664     #
 665     $text = preg_replace(
 666         array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
 667               '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
 668         array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
 669               "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
 670         $text);
 671
 672     # atx-style headers:
 673     #    # Header 1
 674     #    ## Header 2
 675     #    ## Header 2 with closing hashes ##
 676     #    ...
 677     #    ###### Header 6
 678     #
 679     $text = preg_replace("{
 680             ^(\\#{1,6})    # $1 = string of #'s
 681             [ \\t]*
 682             (.+?)        # $2 = Header text
 683             [ \\t]*
 684             \\#*            # optional closing #'s (not counted)
 685             \\n+
 686         }xme",
 687         "'<h'.strlen('\\1').'>'._RunSpanGamut(_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
 688         $text);
 689
 690     return $text;
 691 }
 692
 693
 694 function _DoLists($text) {
 695 #
 696 # Form HTML ordered (numbered) and unordered (bulleted) lists.
 697 #
 698     global $md_tab_width, $md_list_level;
 699     $less_than_tab = $md_tab_width - 1;
 700
 701     # Re-usable patterns to match list item bullets and number markers:
 702     $marker_ul  = '[*+-]';
 703     $marker_ol  = '\d+[.]';
 704     $marker_any = "(?:$marker_ul|$marker_ol)";
 705
 706     # Re-usable pattern to match any entirel ul or ol list:
 707     $whole_list = '
 708         (                                # $1 = whole list
 709           (                                # $2
 710             [ ]{0,'.$less_than_tab.'}
 711             ('.$marker_any.')                # $3 = first list item marker
 712             [ \t]+
 713           )
 714           (?s:.+?)
 715           (                                # $4
 716               \z
 717             |
 718               \n{2,}
 719               (?=\S)
 720               (?!                        # Negative lookahead for another list item marker
 721                 [ \t]*
 722                 '.$marker_any.'[ \t]+
 723               )
 724           )
 725         )
 726     '; // mx
 727
 728     # We use a different prefix before nested lists than top-level lists.
 729     # See extended comment in _ProcessListItems().
 730
 731     if ($md_list_level) {
 732         $text = preg_replace_callback('{
 733                 ^
 734                 '.$whole_list.'
 735             }mx',
 736             '_DoLists_callback', $text);
 737     }
 738     else {
 739         $text = preg_replace_callback('{
 740                 (?:(?<=\n\n)|\A\n?)
 741                 '.$whole_list.'
 742             }mx',
 743             '_DoLists_callback', $text);
 744     }
 745
 746     return $text;
 747 }
 748 function _DoLists_callback($matches) {
 749     # Re-usable patterns to match list item bullets and number markers:
 750     $marker_ul  = '[*+-]';
 751     $marker_ol  = '\d+[.]';
 752     $marker_any = "(?:$marker_ul|$marker_ol)";
 753
 754     $list = $matches[1];
 755     $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
 756     # Turn double returns into triple returns, so that we can make a
 757     # paragraph for the last item in a list, if necessary:
 758     $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
 759     $result = _ProcessListItems($list, $marker_any);
 760     $result = "<$list_type>\n" . $result . "</$list_type>\n";
 761     return $result;
 762 }
 763
 764
 765 function _ProcessListItems($list_str, $marker_any) {
 766 #
 767 #    Process the contents of a single ordered or unordered list, splitting it
 768 #    into individual list items.
 769 #
 770     global $md_list_level;
 771
 772     # The $md_list_level global keeps track of when we're inside a list.
 773     # Each time we enter a list, we increment it; when we leave a list,
 774     # we decrement. If it's zero, we're not in a list anymore.
 775     #
 776     # We do this because when we're not inside a list, we want to treat
 777     # something like this:
 778     #
 779     #        I recommend upgrading to version
 780     #        8. Oops, now this line is treated
 781     #        as a sub-list.
 782     #
 783     # As a single paragraph, despite the fact that the second line starts
 784     # with a digit-period-space sequence.
 785     #
 786     # Whereas when we're inside a list (or sub-list), that line will be
 787     # treated as the start of a sub-list. What a kludge, huh? This is
 788     # an aspect of Markdown's syntax that's hard to parse perfectly
 789     # without resorting to mind-reading. Perhaps the solution is to
 790     # change the syntax rules such that sub-lists must start with a
 791     # starting cardinal number; e.g. "1." or "a.".
 792
 793     $md_list_level++;
 794
 795     # trim trailing blank lines:
 796     $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 797
 798     $list_str = preg_replace_callback('{
 799         (\n)?                            # leading line = $1
 800         (^[ \t]*)                        # leading whitespace = $2
 801         ('.$marker_any.') [ \t]+        # list marker = $3
 802         ((?s:.+?)                        # list item text   = $4
 803         (\n{1,2}))
 804         (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
 805         }xm',
 806         '_ProcessListItems_callback', $list_str);
 807
 808     $md_list_level--;
 809     return $list_str;
 810 }
 811 function _ProcessListItems_callback($matches) {
 812     $item = $matches[4];
 813     $leading_line =& $matches[1];
 814     $leading_space =& $matches[2];
 815
 816     if ($leading_line || preg_match('/\n{2,}/', $item)) {
 817         $item = _RunBlockGamut(_Outdent($item));
 818     }
 819     else {
 820         # Recursion for sub-lists:
 821         $item = _DoLists(_Outdent($item));
 822         $item = preg_replace('/\n+$/', '', $item);
 823         $item = _RunSpanGamut($item);
 824     }
 825
 826     return "<li>" . $item . "</li>\n";
 827 }
 828
 829
 830 function _DoCodeBlocks($text) {
 831 #
 832 #    Process Markdown `<pre><code>` blocks.
 833 #
 834     global $md_tab_width;
 835     $text = preg_replace_callback("{
 836             (?:\\n\\n|\\A)
 837             (                # $1 = the code block -- one or more lines, starting with a space/tab
 838               (?:
 839                 (?:[ ]\{$md_tab_width} | \\t)  # Lines must start with a tab or a tab-width of spaces
 840                 .*\\n+
 841               )+
 842             )
 843             ((?=^[ ]{0,$md_tab_width}\\S)|\\Z)    # Lookahead for non-space at line-start, or end of doc
 844         }xm",
 845         '_DoCodeBlocks_callback', $text);
 846
 847     return $text;
 848 }
 849 function _DoCodeBlocks_callback($matches) {
 850     $codeblock = $matches[1];
 851
 852     $codeblock = _EncodeCode(_Outdent($codeblock));
 853 //    $codeblock = _Detab($codeblock);
 854     # trim leading newlines and trailing whitespace
 855     $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
 856
 857     $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
 858
 859     return $result;
 860 }
 861
 862
 863 function _DoCodeSpans($text) {
 864 #
 865 #     *    Backtick quotes are used for <code></code> spans.
 866 #
 867 #     *    You can use multiple backticks as the delimiters if you want to
 868 #         include literal backticks in the code span. So, this input:
 869 #
 870 #          Just type ``foo `bar` baz`` at the prompt.
 871 #
 872 #          Will translate to:
 873 #
 874 #          <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
 875 #
 876 #        There's no arbitrary limit to the number of backticks you
 877 #        can use as delimters. If you need three consecutive backticks
 878 #        in your code, use four for delimiters, etc.
 879 #
 880 #    *    You can use spaces to get literal backticks at the edges:
 881 #
 882 #          ... type `` `bar` `` ...
 883 #
 884 #          Turns to:
 885 #
 886 #          ... type <code>`bar`</code> ...
 887 #
 888     $text = preg_replace_callback("@
 889             (`+)        # $1 = Opening run of `
 890             (.+?)        # $2 = The code block
 891             (?<!`)
 892             \\1
 893             (?!`)
 894         @xs",
 895         '_DoCodeSpans_callback', $text);
 896
 897     return $text;
 898 }
 899 function _DoCodeSpans_callback($matches) {
 900     $c = $matches[2];
 901     $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
 902     $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
 903     $c = _EncodeCode($c);
 904     return "<code>$c</code>";
 905 }
 906
 907
 908 function _EncodeCode($_) {
 909 #
 910 # Encode/escape certain characters inside Markdown code runs.
 911 # The point is that in code, these characters are literals,
 912 # and lose their special Markdown meanings.
 913 #
 914     global $md_escape_table;
 915
 916     # Encode all ampersands; HTML entities are not
 917     # entities within a Markdown code span.
 918     $_ = str_replace('&', '&amp;', $_);
 919
 920     # Do the angle bracket song and dance:
 921     $_ = str_replace(array('<',    '>'),
 922                      array('&lt;', '&gt;'), $_);
 923
 924     # Now, escape characters that are magic in Markdown:
 925     $_ = str_replace(array_keys($md_escape_table),
 926                      array_values($md_escape_table), $_);
 927
 928     return $_;
 929 }
 930
 931
 932 function _DoItalicsAndBold($text) {
 933     # <strong> must go first:
 934     $text = preg_replace('{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }sx',
 935         '<strong>\2</strong>', $text);
 936     # Then <em>:
 937     $text = preg_replace('{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx',
 938         '<em>\2</em>', $text);
 939
 940     return $text;
 941 }
 942
 943
 944 function _DoBlockQuotes($text) {
 945     $text = preg_replace_callback('/
 946           (                                # Wrap whole match in $1
 947             (
 948               ^[ \t]*>[ \t]?            # ">" at the start of a line
 949                 .+\n                    # rest of the first line
 950               (.+\n)*                    # subsequent consecutive lines
 951               \n*                        # blanks
 952             )+
 953           )
 954         /xm',
 955         '_DoBlockQuotes_callback', $text);
 956
 957     return $text;
 958 }
 959 function _DoBlockQuotes_callback($matches) {
 960     $bq = $matches[1];
 961     # trim one level of quoting - trim whitespace-only lines
 962     $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
 963     $bq = _RunBlockGamut($bq);        # recurse
 964
 965     $bq = preg_replace('/^/m', "  ", $bq);
 966     # These leading spaces screw with <pre> content, so we need to fix that:
 967     $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
 968                                 '_DoBlockQuotes_callback2', $bq);
 969
 970     return "<blockquote>\n$bq\n</blockquote>\n\n";
 971 }
 972 function _DoBlockQuotes_callback2($matches) {
 973     $pre = $matches[1];
 974     $pre = preg_replace('/^  /m', '', $pre);
 975     return $pre;
 976 }
 977
 978
 979 function _FormParagraphs($text) {
 980 #
 981 #    Params:
 982 #        $text - string to process with html <p> tags
 983 #
 984     global $md_html_blocks;
 985
 986     # Strip leading and trailing lines:
 987     $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
 988
 989     $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
 990
 991     #
 992     # Wrap <p> tags.
 993     #
 994     foreach ($grafs as $key => $value) {
 995         if (!isset( $md_html_blocks[$value] )) {
 996             $value = _RunSpanGamut($value);
 997             $value = preg_replace('/^([ \t]*)/', '<p>', $value);
 998             $value .= "</p>";
 999             $grafs[$key] = $value;
1000         }
1001     }
1002
1003     #
1004     # Unhashify HTML blocks
1005     #
1006     foreach ($grafs as $key => $value) {
1007         if (isset( $md_html_blocks[$value] )) {
1008             $grafs[$key] = $md_html_blocks[$value];
1009         }
1010     }
1011
1012     return implode("\n\n", $grafs);
1013 }
1014
1015
1016 function _EncodeAmpsAndAngles($text) {
1017 # Smart processing for ampersands and angle brackets that need to be encoded.
1018
1019     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1020     #   http://bumppo.net/projects/amputator/
1021     $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1022                          '&amp;', $text);;
1023
1024     # Encode naked <'s
1025     $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
1026
1027     return $text;
1028 }
1029
1030
1031 function _EncodeBackslashEscapes($text) {
1032 #
1033 #    Parameter:  String.
1034 #    Returns:    The string, with after processing the following backslash
1035 #                escape sequences.
1036 #
1037     global $md_escape_table, $md_backslash_escape_table;
1038     # Must process escaped backslashes first.
1039     return str_replace(array_keys($md_backslash_escape_table),
1040                        array_values($md_backslash_escape_table), $text);
1041 }
1042
1043
1044 function _DoAutoLinks($text) {
1045     $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
1046                          '<a href="\1">\1</a>', $text);
1047
1048     # Email addresses: <address@domain.foo>
1049     $text = preg_replace('{
1050         <
1051         (?:mailto:)?
1052         (
1053             [-.\w]+
1054             \@
1055             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1056         )
1057         >
1058         }exi',
1059         "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
1060         $text);
1061
1062     return $text;
1063 }
1064
1065
1066 function _EncodeEmailAddress($addr) {
1067 #
1068 #    Input: an email address, e.g. "foo@example.com"
1069 #
1070 #    Output: the email address as a mailto link, with each character
1071 #        of the address encoded as either a decimal or hex entity, in
1072 #        the hopes of foiling most address harvesting spam bots. E.g.:
1073 #
1074 #      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1075 #        x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1076 #        &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1077 #
1078 #    Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
1079 #    mailing list: <http://tinyurl.com/yu7ue>
1080 #
1081     $addr = "mailto:" . $addr;
1082     $length = strlen($addr);
1083
1084     # leave ':' alone (to spot mailto: later)
1085     $addr = preg_replace_callback('/([^\:])/',
1086                                   '_EncodeEmailAddress_callback', $addr);
1087
1088     $addr = "<a href=\"$addr\">$addr</a>";
1089     # strip the mailto: from the visible part
1090     $addr = preg_replace('/">.+?:/', '">', $addr);
1091
1092     return $addr;
1093 }
1094 function _EncodeEmailAddress_callback($matches) {
1095     $char = $matches[1];
1096     $r = rand(0, 100);
1097     # roughly 10% raw, 45% hex, 45% dec
1098     # '@' *must* be encoded. I insist.
1099     if ($r > 90 && $char != '@') return $char;
1100     if ($r < 45) return '&#x'.dechex(ord($char)).';';
1101     return '&#'.ord($char).';';
1102 }
1103
1104
1105 function _UnescapeSpecialChars($text) {
1106 #
1107 # Swap back in all the special characters we've hidden.
1108 #
1109     global $md_escape_table;
1110     return str_replace(array_values($md_escape_table),
1111                        array_keys($md_escape_table), $text);
1112 }
1113
1114
1115 # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
1116 # We only define it if it is not already defined.
1117 if (!function_exists('_TokenizeHTML')) :
1118 function _TokenizeHTML($str) {
1119 #
1120 #   Parameter:  String containing HTML markup.
1121 #   Returns:    An array of the tokens comprising the input
1122 #               string. Each token is either a tag (possibly with nested,
1123 #               tags contained therein, such as <a href="<MTFoo>">, or a
1124 #               run of text between tags. Each element of the array is a
1125 #               two-element array; the first is either 'tag' or 'text';
1126 #               the second is the actual value.
1127 #
1128 #
1129 #   Regular expression derived from the _tokenize() subroutine in
1130 #   Brad Choate's MTRegex plugin.
1131 #   <http://www.bradchoate.com/past/mtregex.php>
1132 #
1133     $index = 0;
1134     $tokens = array();
1135
1136     $match = '(?s:<!(?:--.*?--\s*)+>)|'.    # comment
1137              '(?s:<\?.*?\?>)|'.                # processing instruction
1138              '(?:</?[\w:$]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)'; # regular tags
1139
1140     $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1141
1142     foreach ($parts as $part) {
1143         if (++$index % 2 && $part != '')
1144             array_push($tokens, array('text', $part));
1145         else
1146             array_push($tokens, array('tag', $part));
1147     }
1148
1149     return $tokens;
1150 }
1151 endif;
1152
1153
1154 function _Outdent($text) {
1155 #
1156 # Remove one level of line-leading tabs or spaces
1157 #
1158     global $md_tab_width;
1159     return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
1160 }
1161
1162
1163 function _Detab($text) {
1164 #
1165 # Replace tabs with the appropriate amount of space.
1166 #
1167     global $md_tab_width;
1168
1169     # For each line we separate the line in blocks delemited by
1170     # tab characters. Then we reconstruct the line adding the appropriate
1171     # number of space charcters.
1172
1173     $lines = explode("\n", $text);
1174     $text = "";
1175
1176     foreach ($lines as $line) {
1177         # Split in blocks.
1178         $blocks = explode("\t", $line);
1179         # Add each blocks to the line.
1180         $line = $blocks[0];
1181         unset($blocks[0]); # Do not add first block twice.
1182         foreach ($blocks as $block) {
1183             # Calculate amount of space, insert spaces, insert block.
1184             $amount = $md_tab_width - strlen($line) % $md_tab_width;
1185             $line .= str_repeat(" ", $amount) . $block;
1186         }
1187         $text .= "$line\n";
1188     }
1189     return $text;
1190 }
1191
1192
1193 function _UnslashQuotes($text) {
1194 #
1195 #    This function is useful to remove automaticaly slashed double quotes
1196 #    when using preg_replace and evaluating an expression.
1197 #    Parameter:  String.
1198 #    Returns:    The string with any slash-double-quote (\") sequence replaced
1199 #                by a single double quote.
1200 #
1201     return str_replace('\"', '"', $text);
1202 }
1203
1204
1205 /*
1206
1207 PHP Markdown
1208 ============
1209
1210 Description
1211 -----------
1212
1213 This is a PHP translation of the original Markdown formatter written in
1214 Perl by John Gruber.
1215
1216 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1217 easy-to-write structured text format into HTML. Markdown's text format
1218 is most similar to that of plain text email, and supports features such
1219 as headers, *emphasis*, code blocks, blockquotes, and links.
1220
1221 Markdown's syntax is designed not as a generic markup language, but
1222 specifically to serve as a front-end to (X)HTML. You can use span-level
1223 HTML tags anywhere in a Markdown document, and you can use block level
1224 HTML tags (like <div> and <table> as well).
1225
1226 For more information about Markdown's syntax, see:
1227
1228 <http://daringfireball.net/projects/markdown/>
1229
1230
1231 Bugs
1232 ----
1233
1234 To file bug reports please send email to:
1235
1236 <michel.fortin@michelf.com>
1237
1238 Please include with your report: (1) the example input; (2) the output you
1239 expected; (3) the output Markdown actually produced.
1240
1241
1242 Version History
1243 ---------------
1244
1245 See the readme file for detailed release notes for this version.
1246
1247 1.0.1a - 15 Apr 2005
1248
1249 1.0.1 - 17 Dec 2004
1250
1251 1.0 - 21 Aug 2004
1252
1253
1254 Author & Contributors
1255 ---------------------
1256
1257 Original Perl version by John Gruber
1258 <http://daringfireball.net/>
1259
1260 PHP port and other contributions by Michel Fortin
1261 <http://www.michelf.com/>
1262
1263
1264 Copyright and License
1265 ---------------------
1266
1267 Copyright (c) 2004-2005 Michel Fortin
1268 <http://www.michelf.com/>
1269 All rights reserved.
1270
1271 Copyright (c) 2003-2004 John Gruber
1272 <http://daringfireball.net/>
1273 All rights reserved.
1274
1275 Redistribution and use in source and binary forms, with or without
1276 modification, are permitted provided that the following conditions are
1277 met:
1278
1279 *    Redistributions of source code must retain the above copyright notice,
1280     this list of conditions and the following disclaimer.
1281
1282 *    Redistributions in binary form must reproduce the above copyright
1283     notice, this list of conditions and the following disclaimer in the
1284     documentation and/or other materials provided with the distribution.
1285
1286 *    Neither the name "Markdown" nor the names of its contributors may
1287     be used to endorse or promote products derived from this software
1288     without specific prior written permission.
1289
1290 This software is provided by the copyright holders and contributors "as
1291 is" and any express or implied warranties, including, but not limited
1292 to, the implied warranties of merchantability and fitness for a
1293 particular purpose are disclaimed. In no event shall the copyright owner
1294 or contributors be liable for any direct, indirect, incidental, special,
1295 exemplary, or consequential damages (including, but not limited to,
1296 procurement of substitute goods or services; loss of use, data, or
1297 profits; or business interruption) however caused and on any theory of
1298 liability, whether in contract, strict liability, or tort (including
1299 negligence or otherwise) arising in any way out of the use of this
1300 software, even if advised of the possibility of such damage.
1301
1302 */
1303 ?>