Markdown.pl

   1 #!/usr/bin/env perl
   2
   3 #
   4 # Markdown -- A text-to-HTML conversion tool for web writers
   5 #
   6 # Copyright (C) 2004 John Gruber
   7 # Copyright (C) 2015,2016,2017 Kyle J. McKay
   8 # All rights reserved.
   9 # License is Modified BSD (aka 3-clause BSD) License\n";
  10 # See LICENSE file (or <https://opensource.org/licenses/BSD-3-Clause>)
  11 #
  12
  13 package Markdown;
  14
  15 require 5.008;
  16 use strict;
  17 use warnings;
  18
  19 use Encode;
  20
  21 use vars qw($COPYRIGHT $VERSION @ISA @EXPORT_OK);
  22
  23 BEGIN {*COPYRIGHT =
  24 \"Copyright (C) 2004 John Gruber
  25 Copyright (C) 2015,2016,2017 Kyle J. McKay
  26 All rights reserved.
  27 ";
  28 *VERSION = \"1.1.5-PRE"
  29 }
  30
  31 require Exporter;
  32 use Digest::MD5 qw(md5 md5_hex);
  33 use File::Basename qw(basename);
  34 use Scalar::Util qw(refaddr looks_like_number);
  35 use Pod::Usage;
  36 @ISA = qw(Exporter);
  37 @EXPORT_OK = qw(Markdown);
  38 $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'};
  39
  40 close(DATA) if fileno(DATA);
  41 exit(&_main(@ARGV)||0) unless caller;
  42
  43 my $encoder;
  44 BEGIN {
  45         $encoder = Encode::find_encoding('Windows-1252') ||
  46                    Encode::find_encoding('ISO-8859-1') or
  47                    die "failed to load ISO-8859-1 encoder\n";
  48 }
  49
  50 #
  51 # Global default settings:
  52 #
  53 my ($g_style_prefix, $g_empty_element_suffix, $g_indent_width, $g_tab_width);
  54 BEGIN {
  55     $g_style_prefix = "_markdown-";     # Prefix for markdown css class styles
  56     $g_empty_element_suffix = " />";    # Change to ">" for HTML output
  57     $g_indent_width = 4;                # Number of spaces considered new level
  58     $g_tab_width = 4;                   # Legacy even though it's wrong
  59 }
  60
  61
  62 #
  63 # Globals:
  64 #
  65
  66 # Style sheet template
  67 my $g_style_sheet;
  68
  69 # Permanent block id table
  70 my %g_perm_block_ids;
  71
  72 # Global hashes, used by various utility routines
  73 my %g_urls;
  74 my %g_titles;
  75 my %g_anchors;
  76 my %g_block_ids;
  77 my %g_html_blocks;
  78 my %g_code_blocks;
  79 my %opt;
  80
  81 # Return a "block id" to use to identify the block that does not contain
  82 # any characters that could be misinterpreted by the rest of the code
  83 # Originally this used md5_hex but that's unnecessarily slow
  84 # Instead just use the refaddr of the scalar ref of the entry for that
  85 # key in either the global or, if the optional second argument is true,
  86 # permanent table.  To avoid the result being confused with anything
  87 # else, it's prefixed with a control character and suffixed with another
  88 # both of which are not allowed by the XML standard or Unicode.
  89 sub block_id {
  90     $_[1] ?
  91     "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" :
  92     "\5".refaddr(\$g_block_ids{$_[0]})."\6";
  93 }
  94
  95 # Regex to match balanced [brackets]. See Friedl's
  96 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
  97 my $g_nested_brackets;
  98 BEGIN {
  99     $g_nested_brackets = qr{
 100     (?>                                 # Atomic matching
 101         [^\[\]]+                        # Anything other than brackets
 102      |
 103         \[
 104             (??{ $g_nested_brackets })  # Recursive set of nested brackets
 105         \]
 106     )*
 107     }ox
 108 }
 109
 110
 111 # Table of hash values for escaped characters:
 112 my %g_escape_table;
 113 BEGIN {
 114     foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") {
 115         $g_escape_table{$char} = block_id($char,1);
 116     }
 117 }
 118
 119 # Used to track when we're inside an ordered or unordered list
 120 # (see _ProcessListItems() for details):
 121 my $g_list_level;
 122 BEGIN {
 123     $g_list_level = 0;
 124 }
 125
 126
 127 #### Blosxom plug-in interface ##########################################
 128 my $_haveBX;
 129 BEGIN {
 130     no warnings 'once';
 131     $_haveBX = defined($blosxom::version);
 132 }
 133
 134 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
 135 # which posts Markdown should process, using a "meta-markup: markdown"
 136 # header. If it's set to 0 (the default), Markdown will process all
 137 # entries.
 138 my $g_blosxom_use_meta;
 139 BEGIN {
 140     $g_blosxom_use_meta = 0;
 141 }
 142
 143 sub start { 1; }
 144 sub story {
 145     my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
 146
 147     if ((! $g_blosxom_use_meta) or
 148         (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
 149          ) {
 150             $$body_ref = Markdown($$body_ref);
 151     }
 152     1;
 153 }
 154
 155
 156 #### Movable Type plug-in interface #####################################
 157 my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT
 158 my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0.
 159
 160 unless ($_haveMT) {
 161     require MT;
 162     import  MT;
 163     require MT::Template::Context;
 164     import  MT::Template::Context;
 165
 166     unless ($_haveMT3) {
 167         require MT::Plugin;
 168         import  MT::Plugin;
 169         my $plugin = new MT::Plugin({
 170             name => "Markdown",
 171             description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 172             doc_link => 'http://daringfireball.net/projects/markdown/'
 173         });
 174         MT->add_plugin( $plugin );
 175     }
 176
 177     MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 178         my $ctx  = shift;
 179         my $args = shift;
 180         my $builder = $ctx->stash('builder');
 181         my $tokens = $ctx->stash('tokens');
 182
 183         if (defined ($args->{'output'}) ) {
 184             $ctx->stash('markdown_output', lc $args->{'output'});
 185         }
 186
 187         defined (my $str = $builder->build($ctx, $tokens) )
 188             or return $ctx->error($builder->errstr);
 189         $str; # return value
 190     });
 191
 192     MT->add_text_filter('markdown' => {
 193         label     => 'Markdown',
 194         docs      => 'http://daringfireball.net/projects/markdown/',
 195         on_format => sub {
 196             my $text = shift;
 197             my $ctx  = shift;
 198             my $raw  = 0;
 199             if (defined $ctx) {
 200             my $output = $ctx->stash('markdown_output');
 201                 if (defined $output && $output =~ m/^html/i) {
 202                     $g_empty_element_suffix = ">";
 203                     $ctx->stash('markdown_output', '');
 204                 }
 205                 elsif (defined $output && $output eq 'raw') {
 206                     $raw = 1;
 207                     $ctx->stash('markdown_output', '');
 208                 }
 209                 else {
 210                     $raw = 0;
 211                     $g_empty_element_suffix = " />";
 212                 }
 213             }
 214             $text = $raw ? $text : Markdown($text);
 215             $text;
 216         },
 217     });
 218
 219     # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 220     my $smartypants;
 221
 222     {
 223         no warnings "once";
 224         $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 225     }
 226
 227     if ($smartypants) {
 228         MT->add_text_filter('markdown_with_smartypants' => {
 229             label     => 'Markdown With SmartyPants',
 230             docs      => 'http://daringfireball.net/projects/markdown/',
 231             on_format => sub {
 232                 my $text = shift;
 233                 my $ctx  = shift;
 234                 if (defined $ctx) {
 235                     my $output = $ctx->stash('markdown_output');
 236                     if (defined $output && $output eq 'html') {
 237                         $g_empty_element_suffix = ">";
 238                     }
 239                     else {
 240                         $g_empty_element_suffix = " />";
 241                     }
 242                 }
 243                 $text = Markdown($text);
 244                 $text = $smartypants->($text, '1');
 245             },
 246         });
 247     }
 248 }
 249
 250 sub _strip {
 251         my $str = shift;
 252         defined($str) or return undef;
 253         $str =~ s/^\s+//;
 254         $str =~ s/\s+$//;
 255         $str =~ s/\s+/ /g;
 256         $str;
 257 }
 258
 259 #### BBEdit/command-line text filter interface ##########################
 260 sub _main {
 261     local *ARGV = \@_;
 262
 263
 264     #### Check for command-line switches: #################
 265     my %options = ();
 266     my %cli_opts;
 267     use Getopt::Long;
 268     Getopt::Long::Configure(qw(bundling require_order pass_through));
 269     GetOptions(\%cli_opts,
 270         'help','h',
 271         'version|V',
 272         'shortversion|short-version|s',
 273         'html4tags',
 274         'deprecated',
 275         'htmlroot|r=s',
 276         'imageroot|i=s',
 277         'tabwidth|tab-width=s',
 278         'stylesheet|style-sheet',
 279         'no-stylesheet|no-style-sheet',
 280         'stub',
 281     );
 282     if ($cli_opts{'help'}) {
 283         pod2usage(-verbose => 2, -exitval => 0);
 284     }
 285     if ($cli_opts{'h'}) {
 286         pod2usage(-verbose => 0, -exitval => 0);
 287     }
 288     if ($cli_opts{'version'}) { # Version info
 289         print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT;
 290         print "License is Modified BSD (aka 3-clause BSD) License\n";
 291         print "<https://opensource.org/licenses/BSD-3-Clause>\n";
 292         exit 0;
 293     }
 294     if ($cli_opts{'shortversion'}) { # Just the version number string.
 295         print $VERSION;
 296         exit 0;
 297     }
 298     my $stub = 0;
 299     if ($cli_opts{'stub'}) {
 300         $stub = 1;
 301     }
 302     if ($cli_opts{'html4tags'}) {        # Use HTML tag style instead of XHTML
 303         $options{empty_element_suffix} = ">";
 304         $stub = -$stub;
 305     }
 306     if ($cli_opts{'deprecated'}) {       # Allow <dir> and <menu> tags to pass through
 307         _SetAllowedTag("dir");
 308         _SetAllowedTag("menu");
 309     }
 310     if ($cli_opts{'tabwidth'}) {
 311         my $tw = $cli_opts{'tabwidth'};
 312         die "invalid tab width (must be integer)\n" unless looks_like_number $tw;
 313         die "invalid tab width (must be >= 2 and <= 32)\n" unless $tw >= 2 && $tw <= 32;
 314         $options{tab_width} = int(0+$tw);
 315     }
 316     if ($cli_opts{'htmlroot'}) {         # Use URL prefix
 317         $options{url_prefix} = $cli_opts{'htmlroot'};
 318     }
 319     if ($cli_opts{'imageroot'}) {        # Use image URL prefix
 320         $options{img_prefix} = $cli_opts{'imageroot'};
 321     }
 322     if ($cli_opts{'stylesheet'}) {  # Display the style sheet
 323         $options{show_styles} = 1;
 324     }
 325     if ($cli_opts{'no-stylesheet'}) {  # Do not display the style sheet
 326         $options{show_styles} = 0;
 327     }
 328     $options{show_styles} = 1 if $stub && !defined($options{show_styles});
 329     $options{tab_width} = 8 unless defined($options{tab_width});
 330
 331     my $hdr = sub {
 332         if ($stub > 0) {
 333             print <<'HTML5';
 334 <!DOCTYPE html>
 335 <html xmlns="http://www.w3.org/1999/xhtml">
 336 <head>
 337 <meta charset="utf-8" />
 338 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 339 HTML5
 340         } elsif ($stub < 0) {
 341             print <<'HTML4';
 342 <html>
 343 <head>
 344 <meta charset="utf-8">
 345 <meta http-equiv="content-type" content="text/html; charset=utf-8">
 346 HTML4
 347         }
 348         if ($stub && ($options{title} || $options{h1})) {
 349             my $title = $options{title};
 350             defined($title) && $title ne "" or $title = $options{h1};
 351             if (defined($title) && $title ne "") {
 352                 $title =~ s/&/&amp;/g;
 353                 $title =~ s/</&lt;/g;
 354                 print "<title>$title</title>\n";
 355             }
 356         }
 357         if ($options{show_styles}) {
 358             my $stylesheet = $g_style_sheet;
 359             $stylesheet =~ s/%\(base\)/$g_style_prefix/g;
 360             print $stylesheet;
 361         }
 362         if ($stub) {
 363             print "</head>\n<body style=\"text-align:center\">\n",
 364                 "<div style=\"display:inline-block;text-align:left;max-width:42pc\">\n";
 365         }
 366     };
 367
 368     #### Process incoming text: ###########################
 369     my $didhdr;
 370     for (;;) {
 371         local $_;
 372         {
 373             local $/; # Slurp the whole file
 374             $_ = <>;
 375         }
 376         defined($_) or last;
 377         my $result = Markdown($_, \%options);
 378         if ($result ne "") {
 379             if (!$didhdr) {
 380                 &$hdr();
 381                 $didhdr = 1;
 382             }
 383             print $result;
 384         }
 385     }
 386     &$hdr() unless $didhdr;
 387     print "</div>\n</body>\n</html>\n" if $stub;
 388
 389     exit 0;
 390 }
 391
 392
 393 sub Markdown {
 394 #
 395 # Primary function. The order in which other subs are called here is
 396 # essential. Link and image substitutions need to happen before
 397 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 398 # and <img> tags get encoded.
 399 #
 400     my $_text = shift;
 401     defined $_text or $_text='';
 402
 403     my $text;
 404     if (Encode::is_utf8($_text) || utf8::decode($_text)) {
 405         $text = $_text;
 406     } else {
 407         $text = $encoder->decode($_text, Encode::FB_DEFAULT);
 408     }
 409     $_text = undef;
 410
 411     # Any remaining arguments after the first are options; either a single
 412     # hashref or a list of name, value paurs.
 413     %opt = (
 414         # set initial defaults
 415         style_prefix            => $g_style_prefix,
 416         empty_element_suffix    => $g_empty_element_suffix,
 417         tab_width               => $g_tab_width,
 418         indent_width            => $g_indent_width,
 419         url_prefix              => "", # Prefixed to non-absolute URLs
 420         img_prefix              => "", # Prefixed to non-absolute image URLs
 421     );
 422     my %args = ();
 423     if (ref($_[0]) eq "HASH") {
 424         %args = %{$_[0]};
 425     } else {
 426         %args = @_;
 427     }
 428     while (my ($k,$v) = each %args) {
 429         $opt{$k} = $v;
 430     }
 431
 432     # Clear the globals. If we don't clear these, you get conflicts
 433     # from other articles when generating a page which contains more than
 434     # one article (e.g. an index page that shows the N most recent
 435     # articles):
 436     %g_urls = ();
 437     %g_titles = ();
 438     %g_anchors = ();
 439     %g_block_ids = ();
 440     %g_html_blocks = ();
 441     %g_code_blocks = ();
 442     $g_list_level = 0;
 443
 444     # Standardize line endings:
 445     $text =~ s{\r\n}{\n}g;  # DOS to Unix
 446     $text =~ s{\r}{\n}g;    # Mac to Unix
 447
 448     # Make sure $text ends with a couple of newlines:
 449     $text .= "\n\n";
 450
 451     # Handle backticks-delimited code blocks
 452     $text = _HashBTCodeBlocks($text);
 453
 454     # Convert all tabs to spaces.
 455     $text = _Detab($text);
 456
 457     # Strip any lines consisting only of spaces.
 458     # This makes subsequent regexen easier to write, because we can
 459     # match consecutive blank lines with /\n+/ instead of something
 460     # contorted like / *\n+/ .
 461     $text =~ s/^ +$//mg;
 462
 463     # Turn block-level HTML blocks into hash entries
 464     $text = _HashHTMLBlocks($text);
 465
 466     # Strip link definitions, store in hashes.
 467     $text = _StripLinkDefinitions($text);
 468
 469     $text = _RunBlockGamut($text, 1);
 470
 471     # Unhashify code blocks
 472     $text =~ s/(\005\d+\006)/$g_code_blocks{$1}/g;
 473
 474     $text = _UnescapeSpecialChars($text);
 475
 476     $text .= "\n" unless $text eq "";
 477
 478     utf8::encode($text);
 479     if (defined($opt{h1}) && $opt{h1} ne "" && ref($_[0]) eq "HASH") {
 480         utf8::encode($opt{h1});
 481         ${$_[0]}{h1} = $opt{h1}
 482     }
 483     return $text;
 484 }
 485
 486
 487 sub _HashBTCodeBlocks {
 488 #
 489 #   Process Markdown backticks (```) delimited code blocks
 490 #
 491     my $text = shift;
 492
 493     $text =~ s{
 494             (?:(?<=\n)|\A)
 495                 ``(`+)[ \t]*(?:([\w.+-]+)[ \t]*)?\n
 496              ( # $3 = the code block -- one or more lines, starting with ```
 497               (?:
 498                 .*\n+
 499               )+?
 500              )
 501             (?:(?:``\1[ \t]*(?:\n|\Z))|\Z) # and ending with ``` or end of document
 502         }{
 503             # $2 contains syntax highlighting to use if defined
 504             my $codeblock = $3;
 505             $codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines
 506             $codeblock = _Detab($codeblock, 8); # physical tab stops are always 8
 507             $codeblock =~ s/\A\n+//; # trim leading newlines
 508             $codeblock =~ s/\s+\z//; # trim trailing whitespace
 509             $codeblock = _EncodeCode($codeblock); # or run highlighter here
 510             $codeblock = "<div class=\"$opt{style_prefix}code-bt\"><pre style=\"display:none\"></pre><pre><code>"
 511                 . $codeblock . "\n</code></pre></div>";
 512
 513             my $key = block_id($codeblock);
 514             $g_html_blocks{$key} = $codeblock;
 515             "\n\n" . $key . "\n\n";
 516         }egmx;
 517
 518     return $text;
 519 }
 520
 521
 522 sub _StripLinkDefinitions {
 523 #
 524 # Strips link definitions from text, stores the URLs and titles in
 525 # hash references.
 526 #
 527     my $text = shift;
 528     my $less_than_indent = $opt{indent_width} - 1;
 529
 530     # Link defs are in the form: ^[id]: url "optional title"
 531     while ($text =~ s{
 532                         ^[ ]{0,$less_than_indent}\[(.+)\]: # id = $1
 533                           [ ]*
 534                           \n?               # maybe *one* newline
 535                           [ ]*
 536                         <?(\S+?)>?          # url = $2
 537                           [ ]*
 538                           \n?               # maybe one newline
 539                           [ ]*
 540                         (?:
 541                             (?<=\s)         # lookbehind for whitespace
 542                             ["(]
 543                             (.+?)           # title = $3
 544                             [")]
 545                             [ ]*
 546                         )?  # title is optional
 547                         (?:\n+|\Z)
 548                     }
 549                     {}mx) {
 550         my $id = _strip(lc $1); # Link IDs are case-insensitive
 551         my $url = $2;
 552         my $title = _strip($3);
 553         if ($id ne "") {
 554                 $g_urls{$id} = _EncodeAmpsAndAngles($url);
 555                 if (defined($title) && $title ne "") {
 556                     $g_titles{$id} = $title;
 557                     $g_titles{$id} =~ s/\042/&quot;/g;
 558                 }
 559         }
 560     }
 561
 562     return $text;
 563 }
 564
 565 my ($block_tags_a, $block_tags_b);
 566 BEGIN {
 567     $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o;
 568     $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o;
 569 }
 570
 571 sub _HashHTMLBlocks {
 572     my $text = shift;
 573     my $less_than_indent = $opt{indent_width} - 1;
 574
 575     # Hashify HTML blocks:
 576     # We only want to do this for block-level HTML tags, such as headers,
 577     # lists, and tables. That's because we still want to wrap <p>s around
 578     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 579     # phrase emphasis, and spans. The list of tags we're looking for is
 580     # hard-coded:
 581
 582     # First, look for nested blocks, e.g.:
 583     #   <div>
 584     #       <div>
 585     #       tags for inner block must be indented.
 586     #       </div>
 587     #   </div>
 588     #
 589     # The outermost tags must start at the left margin for this to match, and
 590     # the inner nested divs must be indented.
 591     # We need to do this before the next, more liberal match, because the next
 592     # match will start at the first `<div>` and stop at the first `</div>`.
 593     $text =~ s{
 594                 (                       # save in $1
 595                     ^                   # start of line (with /m)
 596                     <($block_tags_a)    # start tag = $2
 597                     \b                  # word break
 598                     (.*\n)*?            # any number of lines, minimally matching
 599                     </\2>               # the matching end tag
 600                     [ ]*                # trailing spaces
 601                     (?=\n+|\Z) # followed by a newline or end of document
 602                 )
 603             }{
 604                 my $key = block_id($1);
 605                 $g_html_blocks{$key} = $1;
 606                 "\n\n" . $key . "\n\n";
 607             }egmx;
 608
 609
 610     #
 611     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 612     #
 613     $text =~ s{
 614                 (                       # save in $1
 615                     ^                   # start of line (with /m)
 616                     <($block_tags_b)    # start tag = $2
 617                     \b                  # word break
 618                     (.*\n)*?            # any number of lines, minimally matching
 619                     .*</\2>             # the matching end tag
 620                     [ ]*                # trailing spaces
 621                     (?=\n+|\Z) # followed by a newline or end of document
 622                 )
 623             }{
 624                 my $key = block_id($1);
 625                 $g_html_blocks{$key} = $1;
 626                 "\n\n" . $key . "\n\n";
 627             }egmx;
 628     # Special case just for <hr />. It was easier to make a special case than
 629     # to make the other regex more complicated.
 630     $text =~ s{
 631                 (?:
 632                     (?<=\n\n)       # Starting after a blank line
 633                     |               # or
 634                     \A\n?           # the beginning of the doc
 635                 )
 636                 (                       # save in $1
 637                     [ ]{0,$less_than_indent}
 638                     <(hr)               # start tag = $2
 639                     \b                  # word break
 640                     ([^<>])*?           #
 641                     /?>                 # the matching end tag
 642                     [ ]*
 643                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 644                 )
 645             }{
 646                 my $key = block_id($1);
 647                 $g_html_blocks{$key} = $1;
 648                 "\n\n" . $key . "\n\n";
 649             }egx;
 650
 651     # Special case for standalone HTML comments:
 652     $text =~ s{
 653                 (?:
 654                     (?<=\n\n)       # Starting after a blank line
 655                     |               # or
 656                     \A\n?           # the beginning of the doc
 657                 )
 658                 (                   # save in $1
 659                     [ ]{0,$less_than_indent}
 660                     (?s:
 661                         <!
 662                         (--.*?--\s*)+
 663                         >
 664                     )
 665                     [ ]*
 666                     (?=\n{2,}|\Z)   # followed by a blank line or end of document
 667                 )
 668             }{
 669                 my $key = block_id($1);
 670                 $g_html_blocks{$key} = $1;
 671                 "\n\n" . $key . "\n\n";
 672             }egx;
 673
 674
 675     return $text;
 676 }
 677
 678
 679 sub _RunBlockGamut {
 680 #
 681 # These are all the transformations that form block-level
 682 # tags like paragraphs, headers, and list items.
 683 #
 684     my ($text, $anchors) = @_;
 685
 686     $text = _DoHeaders($text, $anchors);
 687
 688     # Do Horizontal Rules:
 689     $text =~ s{^ {0,3}\*(?: {0,2}\*){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 690     $text =~ s{^ {0,3}\_(?: {0,2}\_){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 691     $text =~ s{^ {0,3}\-(?: {0,2}\-){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 692
 693     $text = _DoLists($text);
 694
 695     $text = _DoCodeBlocks($text);
 696
 697     $text = _DoBlockQuotes($text);
 698
 699     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
 700     # was to escape raw HTML in the original Markdown source. This time,
 701     # we're escaping the markup we've just created, so that we don't wrap
 702     # <p> tags around block-level tags.
 703     $text = _HashHTMLBlocks($text);
 704
 705     $text = _FormParagraphs($text);
 706
 707     return $text;
 708 }
 709
 710
 711 sub _RunSpanGamut {
 712 #
 713 # These are all the transformations that occur *within* block-level
 714 # tags like paragraphs, headers, and list items.
 715 #
 716     my $text = shift;
 717
 718     $text = _DoCodeSpans($text);
 719
 720     $text = _EscapeSpecialChars($text);
 721
 722     # Process anchor and image tags. Images must come first,
 723     # because ![foo][f] looks like an anchor.
 724     $text = _DoImages($text);
 725     $text = _DoAnchors($text);
 726
 727     # Make links out of things like `<http://example.com/>`
 728     # Must come after _DoAnchors(), because you can use < and >
 729     # delimiters in inline links like [this](<url>).
 730     $text = _DoAutoLinks($text);
 731
 732     $text = _EncodeAmpsAndAngles($text);
 733
 734     $text = _DoItalicsAndBoldAndStrike($text);
 735
 736     # Do hard breaks:
 737     $text =~ s/ {2,}\n/<br$opt{empty_element_suffix}\n/g;
 738
 739     return $text;
 740 }
 741
 742
 743 sub _EscapeSpecialChars {
 744     my $text = shift;
 745     my $tokens ||= _TokenizeHTML($text);
 746
 747     $text = ''; # rebuild $text from the tokens
 748 #   my $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
 749 #   my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
 750
 751     foreach my $cur_token (@$tokens) {
 752         if ($cur_token->[0] eq "tag") {
 753             # Within tags, encode *, _ and ~ so they don't conflict
 754             # with their use in Markdown for italics and strong.
 755             # We're replacing each such character with its
 756             # corresponding block id value; this is likely
 757             # overkill, but it should prevent us from colliding
 758             # with the escape values by accident.
 759             $cur_token->[1] =~ s!([*_~])!$g_escape_table{$1}!g;
 760             $text .= $cur_token->[1];
 761         } else {
 762             my $t = $cur_token->[1];
 763             $t = _EncodeBackslashEscapes($t);
 764             $text .= $t;
 765         }
 766     }
 767     return $text;
 768 }
 769
 770
 771 sub _ProcessWikiLink {
 772     my ($link_text, $link_loc) = @_;
 773     if (defined($link_loc) && $link_loc =~ m{^(?:http|ftp)s?://\S+$}i) {
 774         # Just rewrite it to [...](...) form
 775         return "[".$link_text."](".$link_loc.")";
 776     }
 777     if (defined($link_loc)) {
 778         # We don't handle any other kind of "bar" links yet
 779         return undef;
 780     }
 781     if ($link_text =~ m{^(?:http|ftp)s?://\S+$}i) {
 782         # Just rewrite it to [...](...) form
 783         return "[".$link_text."](".$link_text.")";
 784     }
 785     # We don't handle any other wiki-style links yet
 786     return undef;
 787 }
 788
 789
 790 sub _DoAnchors {
 791 #
 792 # Turn Markdown link shortcuts into XHTML <a> tags.
 793 #
 794     my $text = shift;
 795
 796     #
 797     # First, handle wiki-style links: [[wiki style link]]
 798     #
 799     $text =~ s{
 800         (                   # wrap whole match in $1
 801           \[\[
 802             ($g_nested_brackets) # link text and id = $2
 803           \]\]
 804         )
 805     }{
 806         my $result;
 807         my $whole_match = $1;
 808         my $link_text   = $2;
 809         my $link_loc    = undef;
 810
 811         if ($link_text =~ /^(.*)\|(.*)$/s) {
 812             $link_text = $1;
 813             $link_loc = $2;
 814         }
 815
 816         $result = _ProcessWikiLink($link_text, $link_loc);
 817         defined($result) or $result = $whole_match;
 818         $result;
 819     }xsge;
 820
 821     #
 822     # Next, handle reference-style links: [link text] [id]
 823     #
 824     $text =~ s{
 825         (                   # wrap whole match in $1
 826           \[
 827             ($g_nested_brackets) # link text = $2
 828           \]
 829
 830           [ ]?              # one optional space
 831           (?:\n[ ]*)?       # one optional newline followed by spaces
 832
 833           \[
 834             (.*?)           # id = $3
 835           \]
 836         )
 837     }{
 838         my $result;
 839         my $whole_match = $1;
 840         my $link_text   = $2;
 841         my $link_id     = _strip(lc $3);
 842
 843         if ($link_id eq "") {
 844             $link_id = _strip(lc $link_text);     # for shortcut links like [this][].
 845         }
 846
 847         if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
 848             my $url = $g_urls{$link_id};
 849             $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
 850             # We've got to encode these to avoid conflicting
 851             # with italics, bold and strike through.
 852             $url =~ s!([*_~])!$g_escape_table{$1}!g;
 853             $result = "<a href=\"$url\"";
 854             if ( defined $g_titles{$link_id} ) {
 855                 my $title = $g_titles{$link_id};
 856                 $title =~ s!([*_~])!$g_escape_table{$1}!g;
 857                 $result .=  " title=\"$title\"";
 858             }
 859             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
 860             $result .= ">$link_text</a>";
 861         }
 862         else {
 863             $result = $whole_match;
 864         }
 865         $result;
 866     }xsge;
 867
 868     #
 869     # Subsequently, inline-style links: [link text](url "optional title")
 870     #
 871     $text =~ s{
 872         (               # wrap whole match in $1
 873           \[
 874             ($g_nested_brackets) # link text = $2
 875           \]
 876           \(            # literal paren
 877             [ ]*
 878             <?(.*?)>?   # href = $3
 879             [ ]*
 880             (           # $4
 881               (['\042]) # quote char = $5
 882               (.*?)     # Title = $6
 883               \5        # matching quote
 884             )?          # title is optional
 885           \)
 886         )
 887     }{
 888         my $result;
 889         my $whole_match = $1;
 890         my $link_text   = $2;
 891         my $url         = $3;
 892         my $title       = _strip($6);
 893
 894         $url = _PrefixURL($url);
 895         # We've got to encode these to avoid conflicting
 896         # with italics, bold and strike through.
 897         $url =~ s!([*_~])!$g_escape_table{$1}!g;
 898         $result = "<a href=\"$url\"";
 899
 900         if (defined $title) {
 901             $title =~ s/\042/&quot;/g;
 902             $title =~ s!([*_~])!$g_escape_table{$1}!g;
 903             $result .= " title=\"$title\"";
 904         }
 905
 906         $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
 907         $result .= ">$link_text</a>";
 908
 909         $result;
 910     }xsge;
 911
 912     #
 913     # Finally, handle reference-style implicit shortcut links: [link text]
 914     #
 915     $text =~ s{
 916         (                   # wrap whole match in $1
 917           \[
 918             ($g_nested_brackets) # link text = $2
 919           \]
 920         )
 921     }{
 922         my $result;
 923         my $whole_match = $1;
 924         my $link_text   = $2;
 925         my $link_id     = _strip(lc $2);
 926
 927         if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
 928             my $url = $g_urls{$link_id};
 929             $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
 930             # We've got to encode these to avoid conflicting
 931             # with italics, bold and strike through.
 932             $url =~ s!([*_~])!$g_escape_table{$1}!g;
 933             $result = "<a href=\"$url\"";
 934             if ( defined $g_titles{$link_id} ) {
 935                 my $title = $g_titles{$link_id};
 936                 $title =~ s!([*_~])!$g_escape_table{$1}!g;
 937                 $result .=  " title=\"$title\"";
 938             }
 939             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
 940             $result .= ">$link_text</a>";
 941         }
 942         else {
 943             $result = $whole_match;
 944         }
 945         $result;
 946     }xsge;
 947
 948     return $text;
 949 }
 950
 951
 952 sub _DoImages {
 953 #
 954 # Turn Markdown image shortcuts into <img> tags.
 955 #
 956     my $text = shift;
 957
 958     #
 959     # First, handle reference-style labeled images: ![alt text][id]
 960     #
 961     $text =~ s{
 962         (               # wrap whole match in $1
 963           !\[
 964             (.*?)       # alt text = $2
 965           \]
 966
 967           [ ]?          # one optional space
 968           (?:\n[ ]*)?   # one optional newline followed by spaces
 969
 970           \[
 971             (.*?)       # id = $3
 972           \]
 973
 974         )
 975     }{
 976         my $result;
 977         my $whole_match = $1;
 978         my $alt_text    = _strip($2);
 979         my $link_id     = _strip(lc $3);
 980
 981         if ($link_id eq "") {
 982             $link_id = lc $alt_text; # for shortcut links like ![this][].
 983         }
 984
 985         $alt_text =~ s/"/&quot;/g;
 986         if (defined $g_urls{$link_id}) {
 987             my $url = _PrefixURL($g_urls{$link_id});
 988             # We've got to encode these to avoid conflicting
 989             # with italics, bold and strike through.
 990             $url =~ s!([*_~])!$g_escape_table{$1}!g;
 991             $result = "<img src=\"$url\" alt=\"$alt_text\"";
 992             if (defined $g_titles{$link_id}) {
 993                 my $title = $g_titles{$link_id};
 994                 $title =~ s!([*_~])!$g_escape_table{$1}!g;
 995                 $result .=  " title=\"$title\"";
 996             }
 997             $result .= $opt{empty_element_suffix};
 998         }
 999         else {
1000             # If there's no such link ID, leave intact:
1001             $result = $whole_match;
1002         }
1003
1004         $result;
1005     }xsge;
1006
1007     #
1008     # Next, handle inline images:  ![alt text](url "optional title")
1009     # Don't forget: encode * and _
1010
1011     $text =~ s{
1012         (               # wrap whole match in $1
1013           !\[
1014             (.*?)       # alt text = $2
1015           \]
1016           \(            # literal paren
1017             [ ]*
1018             <?(\S+?)>?  # src url = $3
1019             [ ]*
1020             (           # $4
1021               (['\042]) # quote char = $5
1022               (.*?)     # title = $6
1023               \5        # matching quote
1024               [ ]*
1025             )?          # title is optional
1026           \)
1027         )
1028     }{
1029         my $result;
1030         my $whole_match = $1;
1031         my $alt_text    = _strip($2);
1032         my $url         = $3;
1033         my $title       = '';
1034         if (defined($6)) {
1035             $title      = _strip($6);
1036         }
1037
1038         $url = _PrefixURL($url);
1039         $alt_text =~ s/"/&quot;/g;
1040         $title    =~ s/"/&quot;/g;
1041         # We've got to encode these to avoid conflicting
1042         # with italics, bold and strike through.
1043         $url =~ s!([*_~])!$g_escape_table{$1}!g;
1044         $result = "<img src=\"$url\" alt=\"$alt_text\"";
1045         if (defined $title) {
1046             $title =~ s!([*_~])!$g_escape_table{$1}!g;
1047             $result .= " title=\"$title\"";
1048         }
1049         $result .= $opt{empty_element_suffix};
1050
1051         $result;
1052     }xsge;
1053
1054     #
1055     # Finally, handle reference-style implicitly labeled links: ![alt text]
1056     #
1057     $text =~ s{
1058         (               # wrap whole match in $1
1059           !\[
1060             (.*?)       # alt text = $2
1061           \]
1062         )
1063     }{
1064         my $result;
1065         my $whole_match = $1;
1066         my $alt_text    = _strip($2);
1067         my $link_id     = lc $alt_text;
1068
1069         $alt_text =~ s/"/&quot;/g;
1070         if (defined $g_urls{$link_id}) {
1071             my $url = _PrefixURL($g_urls{$link_id});
1072             # We've got to encode these to avoid conflicting
1073             # with italics, bold and strike through.
1074             $url =~ s!([*_~])!$g_escape_table{$1}!g;
1075             $result = "<img src=\"$url\" alt=\"$alt_text\"";
1076             if (defined $g_titles{$link_id}) {
1077                 my $title = $g_titles{$link_id};
1078                 $title =~ s!([*_~])!$g_escape_table{$1}!g;
1079                 $result .=  " title=\"$title\"";
1080             }
1081             $result .= $opt{empty_element_suffix};
1082         }
1083         else {
1084             # If there's no such link ID, leave intact:
1085             $result = $whole_match;
1086         }
1087
1088         $result;
1089     }xsge;
1090
1091     return $text;
1092 }
1093
1094
1095 sub _MakeAnchorId {
1096     use bytes;
1097     my $link = shift;
1098     $link =~ tr/-a-z0-9_/_/cs;
1099     return '' unless $link ne '';
1100     $link = md5_hex($link) if length($link) > 64;
1101     "_".$link."_";
1102 }
1103
1104
1105 sub _GetNewAnchorId {
1106     my $link = _strip(lc(shift));
1107     return '' if defined($g_anchors{$link});
1108     my $id = _MakeAnchorId($link);
1109     return '' unless $id;
1110     $g_anchors{$link} = '#'.$id;
1111     $id;
1112 }
1113
1114
1115 sub _DoHeaders {
1116     my ($text, $anchors) = @_;
1117     my $h1;
1118     my $geth1 = $anchors && !defined($opt{h1}) ? sub {
1119         return unless !defined($h1);
1120         my $h = shift;
1121         $h =~ s/^\s+//;
1122         $h =~ s/\s+$//;
1123         $h =~ s/\s+/ /g;
1124         $h1 = $h if $h ne "";
1125     } : sub {};
1126
1127     # Setext-style headers:
1128     #     Header 1
1129     #     ========
1130     #
1131     #     Header 2
1132     #     --------
1133     #
1134     #     Header 3
1135     #     ~~~~~~~~
1136     #
1137     $text =~ s{ ^(?:=+[ ]*\n)?[ ]*(.+?)[ ]*\n=+[ ]*\n+ }{
1138         my $h = $1;
1139         my $id = _GetNewAnchorId($h);
1140         &$geth1($h);
1141         $id = " id=\"$id\"" if $id ne "";
1142         "<h1$id>" . _RunSpanGamut($h) . "</h1>\n\n";
1143     }egmx;
1144
1145     $text =~ s{ ^(?:-+[ ]*\n)?[ ]*(.+?)[ ]*\n-+[ ]*\n+ }{
1146         my $h = $1;
1147         my $id = _GetNewAnchorId($h);
1148         $id = " id=\"$id\"" if $id ne "";
1149         "<h2$id>" . _RunSpanGamut($h) . "</h2>\n\n";
1150     }egmx;
1151
1152     $text =~ s{ ^(?:~+[ ]*\n)?[ ]*(.+?)[ ]*\n~+[ ]*\n+ }{
1153         my $h = $1;
1154         my $id = _GetNewAnchorId($h);
1155         $id = " id=\"$id\"" if $id ne "";
1156         "<h3$id>" . _RunSpanGamut($h) . "</h3>\n\n";
1157     }egmx;
1158
1159
1160     # atx-style headers:
1161     #   # Header 1
1162     #   ## Header 2
1163     #   ## Header 2 with closing hashes ##
1164     #   ...
1165     #   ###### Header 6
1166     #
1167     $text =~ s{
1168             ^(\#{1,6})  # $1 = string of #'s
1169             [ ]*
1170             (.+?)       # $2 = Header text
1171             [ ]*
1172             \#*         # optional closing #'s (not counted)
1173             \n+
1174         }{
1175             my $h = $2;
1176             my $h_level = length($1);
1177             my $id = $h_level <= 3 ? _GetNewAnchorId($h) : '';
1178             &$geth1($h) if $h_level == 1;
1179             $id = " id=\"$id\"" if $id ne "";
1180             "<h$h_level$id>" . _RunSpanGamut($h) . "</h$h_level>\n\n";
1181         }egmx;
1182
1183     $opt{h1} = $h1 if defined($h1) && $h1 ne "";
1184     return $text;
1185 }
1186
1187
1188 my ($marker_ul, $marker_ol, $marker_any, $roman_numeral, $greek_lower);
1189 BEGIN {
1190     # Re-usable patterns to match list item bullets and number markers:
1191     $roman_numeral = qr/(?:
1192         [IiVvXx]|[Ii]{2,3}|[Ii][VvXx]|[VvXx][Ii]{1,3}|[Xx][Vv][Ii]{0,3}|
1193         [Xx][Ii][VvXx]|[Xx]{2}[Ii]{0,3}|[Xx]{2}[Ii]?[Vv]|[Xx]{2}[Vv][Ii]{1,2})/ox;
1194     $greek_lower = qr/(?:[\x{03b1}-\x{03c9}])/o;
1195     $marker_ul  = qr/[*+-]/o;
1196     $marker_ol  = qr/(?:\d+|[A-Za-z]|$roman_numeral|$greek_lower)[.\)]/o;
1197     $marker_any = qr/(?:$marker_ul|$marker_ol)/o;
1198 }
1199
1200
1201 sub _GetListMarkerType {
1202     my ($list_type, $list_marker, $last_marker) = @_;
1203     return "" unless $list_type && $list_marker && lc($list_type) eq "ol";
1204     my $last_marker_type = '';
1205     $last_marker_type = _GetListMarkerType($list_type, $last_marker)
1206         if defined($last_marker) &&
1207             # these are roman unless $last_marker type case matches and is 'a' or 'A'
1208             $list_marker =~ /^[IiVvXx][.\)]?$/;
1209     return "I" if $list_marker =~ /^[IVX]/ && $last_marker_type ne 'A';
1210     return "i" if $list_marker =~ /^[ivx]/ && $last_marker_type ne 'a';
1211     return "A" if $list_marker =~ /^[A-Z]/;
1212     return "a" if $list_marker =~ /^[a-z]/ || $list_marker =~ /^$greek_lower/o;
1213     return "1";
1214 }
1215
1216
1217 sub _GetListItemTypeClass {
1218     my ($list_type, $list_marker, $last_marker) = @_;
1219     my $list_marker_type = _GetListMarkerType($list_type, $list_marker, $last_marker);
1220     my $ans = &{sub{
1221         return "" unless length($list_marker) >= 2 && $list_marker_type =~ /^[IiAa1]$/;
1222         return "lower-greek" if $list_marker_type eq "a" && $list_marker =~ /^$greek_lower/o;
1223         return "" unless $list_marker =~ /\)$/;
1224         return "upper-roman" if $list_marker_type eq "I";
1225         return "lower-roman" if $list_marker_type eq "i";
1226         return "upper-alpha" if $list_marker_type eq "A";
1227         return "lower-alpha" if $list_marker_type eq "a";
1228         return "decimal";
1229     }};
1230     return ($list_marker_type, $ans);
1231 }
1232
1233
1234 my %_roman_number_table;
1235 BEGIN {
1236     %_roman_number_table = (
1237         i       =>  1,
1238         ii      =>  2,
1239         iii     =>  3,
1240         iv      =>  4,
1241         v       =>  5,
1242         vi      =>  6,
1243         vii     =>  7,
1244         viii    =>  8,
1245         ix      =>  9,
1246         x       => 10,
1247         xi      => 11,
1248         xii     => 12,
1249         xiii    => 13,
1250         xiv     => 14,
1251         xv      => 15,
1252         xvi     => 16,
1253         xvii    => 17,
1254         xviii   => 18,
1255         xix     => 19,
1256         xx      => 20,
1257         xxi     => 21,
1258         xxii    => 22,
1259         xxiii   => 23,
1260         xxiv    => 24,
1261         xxv     => 25,
1262         xxvi    => 26,
1263         xxvii   => 27
1264     );
1265 }
1266
1267
1268 # Necessary because ς and σ are the same value grrr
1269 my %_greek_number_table;
1270 BEGIN {
1271     %_greek_number_table = (
1272         "\x{03b1}" =>  1, # α
1273         "\x{03b2}" =>  2, # β
1274         "\x{03b3}" =>  3, # γ
1275         "\x{03b4}" =>  4, # δ
1276         "\x{03b5}" =>  5, # ε
1277         "\x{03b6}" =>  6, # ζ
1278         "\x{03b7}" =>  7, # η
1279         "\x{03b8}" =>  8, # θ
1280         "\x{03b9}" =>  9, # ι
1281         "\x{03ba}" => 10, # κ
1282         "\x{03bb}" => 11, # λ
1283         #"\x{00b5}"=> 12, # µ is "micro" not "mu"
1284         "\x{03bc}" => 12, # μ
1285         "\x{03bd}" => 13, # ν
1286         "\x{03be}" => 14, # ξ
1287         "\x{03bf}" => 15, # ο
1288         "\x{03c0}" => 16, # π
1289         "\x{03c1}" => 17, # ρ
1290         "\x{03c2}" => 18, # ς
1291         "\x{03c3}" => 18, # σ
1292         "\x{03c4}" => 19, # τ
1293         "\x{03c5}" => 20, # υ
1294         "\x{03c6}" => 21, # φ
1295         "\x{03c7}" => 22, # χ
1296         "\x{03c8}" => 23, # ψ
1297         "\x{03c9}" => 24  # ω
1298     );
1299 }
1300
1301
1302 sub _GetMarkerIntegerNum {
1303     my ($list_marker_type, $marker_val) = @_;
1304     my $ans = &{sub{
1305         return 0 + $marker_val if $list_marker_type eq "1";
1306         $list_marker_type = lc($list_marker_type);
1307         return $_greek_number_table{$marker_val}
1308             if $list_marker_type eq "a" &&
1309             defined($_greek_number_table{$marker_val});
1310         $marker_val = lc($marker_val);
1311         return ord($marker_val) - ord("a") + 1 if $list_marker_type eq "a";
1312         return 1 unless $list_marker_type eq "i";
1313         defined($_roman_number_table{$marker_val}) and
1314             return $_roman_number_table{$marker_val};
1315         return 1;
1316     }};
1317     return $ans if $ans == 0 && $list_marker_type eq "1";
1318     return $ans >= 1 ? $ans : 1;
1319 }
1320
1321
1322 sub _IncrList {
1323     my ($from, $to, $extra) = @_;
1324     $extra = defined($extra) ? " $extra" : "";
1325     my $result = "";
1326     while ($from + 10 <= $to) {
1327         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-10\"></span>\n";
1328         $from += 10;
1329     }
1330     while ($from + 5 <= $to) {
1331         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-5\"></span>\n";
1332         $from += 5;
1333     }
1334     while ($from + 2 <= $to) {
1335         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-2\"></span>\n";
1336         $from += 2;
1337     }
1338     while ($from < $to) {
1339         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr\"></span>\n";
1340         ++$from;
1341     }
1342     return $result;
1343 }
1344
1345
1346 sub _DoLists {
1347 #
1348 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1349 #
1350     my $text = shift;
1351     my $indent = $opt{indent_width};
1352     my $less_than_indent = $indent - 1;
1353     my $less_than_double_indent = 2 * $indent - 1;
1354
1355     # Re-usable pattern to match any entire ul or ol list:
1356     my $whole_list = qr{
1357         (                           # $1 (or $_[0]) = whole list
1358           (                         # $2 (or $_[1])
1359             (?:(?<=\n)|\A)
1360             [ ]{0,$less_than_indent}
1361             (${marker_any})         # $3 (or $_[2]) = first list item marker
1362             [ ]+
1363           )
1364           (?s:.+?)
1365           (                         # $4 (or $_[3])
1366               \z
1367             |
1368               \n{2,}
1369               (?=\S)
1370               (?!                   # Negative lookahead for another list item marker
1371                 ${marker_any}[ ]
1372               )
1373           )
1374         )
1375     }mx;
1376
1377     my $list_item_sub = sub {
1378         my $list = $_[0];
1379         my $list_type = ($_[2] =~ m/$marker_ul/) ? "ul" : "ol";
1380         my $list_att = "";
1381         my $list_class = "";
1382         my $list_incr = "";
1383         # Turn double returns into triple returns, so that we can make a
1384         # paragraph for the last item in a list, if necessary:
1385         $list =~ s/\n\n/\n\n\n/g;
1386         my ($result, $first_marker, $fancy) = _ProcessListItems($list_type, $list);
1387         my $list_marker_type = _GetListMarkerType($list_type, $first_marker);
1388         if ($list_marker_type) {
1389                 $first_marker =~ s/[.\)]$//;
1390                 my $first_marker_num = _GetMarkerIntegerNum($list_marker_type, $first_marker);
1391                 $list_att = $list_marker_type eq "1" ? "" : " type=\"$list_marker_type\"";
1392                 if ($fancy) {
1393                     $list_class = " class=\"$opt{style_prefix}ol\"";
1394                     my $start = $first_marker_num;
1395                     $start = 10 if $start > 10;
1396                     $start = 5 if $start > 5 && $start < 10;
1397                     $start = 1 if $start > 1 && $start < 5;
1398                     $list_att .= " start=\"$start\"" unless $start == 1;
1399                     $list_incr = _IncrList($start, $first_marker_num);
1400                 } else {
1401                     $list_class = " class=\"$opt{style_prefix}lc-greek\""
1402                         if $list_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1403                     $list_att .= " start=\"$first_marker_num\"" unless $first_marker_num == 1;
1404                 }
1405         }
1406         $result = "<$list_type$list_att$list_class>\n$list_incr" . $result . "</$list_type>\n";
1407         $result;
1408     };
1409
1410     # We use a different prefix before nested lists than top-level lists.
1411     # See extended comment in _ProcessListItems().
1412     #
1413     # Note: (jg) There's a bit of duplication here. My original implementation
1414     # created a scalar regex pattern as the conditional result of the test on
1415     # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1416     # substitution once, using the scalar as the pattern. This worked,
1417     # everywhere except when running under MT on my hosting account at Pair
1418     # Networks. There, this caused all rebuilds to be killed by the reaper (or
1419     # perhaps they crashed, but that seems incredibly unlikely given that the
1420     # same script on the same server ran fine *except* under MT. I've spent
1421     # more time trying to figure out why this is happening than I'd like to
1422     # admit. My only guess, backed up by the fact that this workaround works,
1423     # is that Perl optimizes the substition when it can figure out that the
1424     # pattern will never change, and when this optimization isn't on, we run
1425     # afoul of the reaper. Thus, the slightly redundant code to that uses two
1426     # static s/// patterns rather than one conditional pattern.
1427     #
1428     # Note: (kjm) With the addition of the two-of-the-same-kind-in-a-row-
1429     # starts-a-list-at-the-top-level rule the two patterns really are somewhat
1430     # different now, but the duplication has pretty much been eliminated via
1431     # use of a separate sub which has the side-effect of making the below
1432     # two cases much easier to grok all at once.
1433
1434     if ($g_list_level) {
1435         $text =~ s{
1436                 ^
1437                 $whole_list
1438             }{
1439                 &$list_item_sub($1, $2, $3, $4);
1440             }egmx;
1441     }
1442     else {
1443         $text =~ s{
1444                 (?: (?<=\n\n) |
1445                     \A\n? |
1446                     (?:(?<=\n) # two of the same kind of marker lines
1447                        (?=[ ]{0,$less_than_indent}$marker_ul[ ].*\n
1448                           [ ]{0,$less_than_indent}$marker_ul[ ])) |
1449                     (?:(?<=\n) # in a row will start a list
1450                        (?=[ ]{0,$less_than_indent}$marker_ol[ ].*\n
1451                           [ ]{0,$less_than_indent}$marker_ol[ ])) |
1452                     (?:(?<=\n) # or any marker and a sublist marker
1453                        (?=[ ]{0,$less_than_indent}$marker_any[ ].*\n
1454                           [ ]{$indent,$less_than_double_indent}$marker_any[ ]))
1455                 )
1456                 $whole_list
1457             }{
1458                 &$list_item_sub($1, $2, $3, $4);
1459             }egmx;
1460     }
1461
1462     return $text;
1463 }
1464
1465
1466 sub _ProcessListItems {
1467 #
1468 #   Process the contents of a single ordered or unordered list, splitting it
1469 #   into individual list items.
1470 #
1471
1472     my $list_type = shift;
1473     my $list_str = shift;
1474
1475     # The $g_list_level global keeps track of when we're inside a list.
1476     # Each time we enter a list, we increment it; when we leave a list,
1477     # we decrement. If it's zero, we're not in a list anymore.
1478     #
1479     # We do this because when we're not inside a list, we want to treat
1480     # something like this:
1481     #
1482     #   I recommend upgrading to version
1483     #   8. Oops, now this line is treated
1484     #   as a sub-list.
1485     #
1486     # As a single paragraph, despite the fact that the second line starts
1487     # with a digit-period-space sequence.
1488     #
1489     # Whereas when we're inside a list (or sub-list), that line will be
1490     # treated as the start of a sub-list. What a kludge, huh? This is
1491     # an aspect of Markdown's syntax that's hard to parse perfectly
1492     # without resorting to mind-reading. Perhaps the solution is to
1493     # change the syntax rules such that sub-lists must start with a
1494     # starting cardinal number; e.g. "1." or "a.".
1495
1496     $g_list_level++;
1497     my $marker_kind = $list_type eq "ul" ? $marker_ul : $marker_ol;
1498     my $first_marker;
1499     my $first_marker_type;
1500     my $first_marker_num;
1501     my $last_marker;
1502     my $fancy;
1503     my $skipped;
1504     my $typechanged;
1505     my $next_num = 1;
1506
1507     # trim trailing blank lines:
1508     $list_str =~ s/\n{2,}\z/\n/;
1509
1510     my $result = "";
1511     my $oldpos = 0;
1512     pos($list_str) = 0;
1513     while ($list_str =~ m{\G            # start where we left off
1514         (\n+)?                          # leading line = $1
1515         (^[ ]*)                         # leading whitespace = $2
1516         ($marker_any) [ ] ([ ]*)        # list marker = $3 leading item space = $4
1517     }cgmx) {
1518         my $leading_line = $1;
1519         my $leading_space = $2;
1520         my $list_marker = $3;
1521         my $list_marker_len = length($list_marker);
1522         my $leading_item_space = $4;
1523         if ($-[0] > $oldpos) {
1524             $result .= substr($list_str, $oldpos, $-[0] - $oldpos); # Sort-of $`
1525             $oldpos = $-[0]; # point at start of this entire match
1526         }
1527         if (!defined($first_marker)) {
1528             $first_marker = $list_marker;
1529             $first_marker_type = _GetListMarkerType($list_type, $first_marker);
1530             if ($first_marker_type) {
1531                 (my $marker_val = $first_marker) =~ s/[.\)]$//;
1532                 $first_marker_num = _GetMarkerIntegerNum($first_marker_type, $marker_val);
1533                 $next_num = $first_marker_num;
1534                 $skipped = 1 if $next_num != 1;
1535             }
1536         } elsif ($list_marker !~ /$marker_kind/) {
1537             # Wrong marker kind, "fix up" the marker to a correct "lazy" marker
1538             # But keep the old length in $list_marker_len
1539             $list_marker = $last_marker;
1540         }
1541
1542         # Now grab the rest of this item's data upto but excluding the next
1543         # list marker at the SAME indent level, but sublists must be INCLUDED
1544
1545         my $item = "";
1546         while ($list_str =~ m{\G
1547             ((?:.+?)(?:\n{1,2}))        # list item text = $1
1548             (?= \n* (?: \z |            # end of string OR
1549                     (^[ ]*)             # leading whitespace = $2
1550                     ($marker_any)       # next list marker = $3
1551                     ([ ]+) ))           # one or more spaces after marker = $4
1552         }cgmxs) {
1553
1554             # If $3 has a left edge that is at the left edge of the previous
1555             # marker OR $3 has a right edge that is at the right edge of the
1556             # previous marker then we stop; otherwise we go on
1557
1558             $item .= substr($list_str, $-[0], $+[0] - $-[0]); # $&
1559             last if !defined($4) || length($2) == length($leading_space) ||
1560                 length($2) + length($3) == length($leading_space) + $list_marker_len;
1561             # move along, you're not the marker droid we're looking for...
1562             $item .= substr($list_str, $+[0], $+[4] - $+[0]);
1563             pos($list_str) = $+[4]; # ...move along over the marker droid
1564         }
1565         # Remember where we parked
1566         $oldpos = pos($list_str);
1567
1568         # Process the $list_marker $item
1569
1570         my $liatt = '';
1571         my $checkbox = '';
1572         my $incr = '';
1573
1574         if ($list_type eq "ul" && !$leading_item_space && $item =~ /^\[([ xX])\] +(.*)$/s) {
1575             my $checkmark = lc $1;
1576             $item = $2;
1577             my ($checkbox_class, $checkbox_val);
1578             if ($checkmark eq "x") {
1579                 ($checkbox_class, $checkbox_val) = ("checkbox-on", "x");
1580             } else {
1581                 ($checkbox_class, $checkbox_val) = ("checkbox-off", "&#160;");
1582             }
1583             $liatt = " class=\"$opt{style_prefix}$checkbox_class\"";
1584             $checkbox = "<span><span></span></span><span></span><span>[<tt>$checkbox_val</tt>]&#160;</span>";
1585         } else {
1586             my $list_marker_type;
1587             ($list_marker_type, $liatt) = _GetListItemTypeClass($list_type, $list_marker, $last_marker);
1588             if ($list_type eq "ol" && defined($first_marker)) {
1589                 my $styled = $fancy = 1 if $liatt && $list_marker =~ /\)$/;
1590                 my ($sfx, $dash) = ("", "");
1591                 ($sfx, $dash) = ("li", "-") if $styled;
1592                 if ($liatt =~ /lower/) {
1593                     $sfx .= "${dash}lc";
1594                 } elsif ($liatt =~ /upper/) {
1595                     $sfx .= "${dash}uc";
1596                 }
1597                 $sfx .= "-greek" if $liatt =~ /greek/;
1598                 $liatt = " class=\"$opt{style_prefix}$sfx\"" if $sfx;
1599                 $typechanged = 1 if $list_marker_type ne $first_marker_type;
1600                 (my $marker_val = $list_marker) =~ s/[.\)]$//;
1601                 my $marker_num = _GetMarkerIntegerNum($list_marker_type, $marker_val);
1602                 $marker_num = $next_num if $marker_num < $next_num;
1603                 $skipped = 1 if $next_num < $marker_num;
1604                 $incr = _IncrList($next_num, $marker_num, "incrlevel=$g_list_level");
1605                 $liatt = " value=\"$marker_num\"$liatt" if $fancy || $skipped;
1606                 $liatt = " type=\"$list_marker_type\"$liatt" if $styled || $typechanged;
1607                 $next_num = $marker_num + 1;
1608             }
1609         }
1610         $last_marker = $list_marker;
1611
1612         if ($leading_line or ($item =~ m/\n{2,}/)) {
1613             $item = _RunBlockGamut(_Outdent($item));
1614         }
1615         else {
1616             # Recursion for sub-lists:
1617             $item = _DoLists(_Outdent($item));
1618             chomp $item;
1619             $item = _RunSpanGamut($item);
1620         }
1621
1622         # Append to $result
1623         $result .= "$incr<li$liatt>" . $checkbox . $item . "</li>\n";
1624     }
1625     if ($fancy) {
1626         # remove "incrlevel=$g_list_level " parts
1627         $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr((?:-\d{1,2})?)">}
1628             {<span class="$opt{style_prefix}ol-incr$1">}g;
1629     } else {
1630         # remove the $g_list_level incr spans entirely
1631         $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr(?:-\d{1,2})?"></span>\n}{}g;
1632         # remove the class="$opt{style_prefix}lc-greek" if first_marker is greek
1633         $result =~ s{(<li[^>]*?) class="$opt{style_prefix}lc-greek">}{$1>}g
1634             if defined($first_marker_type) && $first_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1635     }
1636
1637     # Anything left over (similar to $') goes into result, but this should always be empty
1638     $result .= _RunBlockGamut(substr($list_str, pos($list_str)));
1639
1640     $g_list_level--;
1641     return ($result, $first_marker, $fancy);
1642 }
1643
1644
1645 sub _DoCodeBlocks {
1646 #
1647 #   Process Markdown `<pre><code>` blocks.
1648 #
1649
1650     my $text = shift;
1651
1652     $text =~ s{
1653             (?:\n\n|\A\n?)
1654             (           # $1 = the code block -- one or more lines, starting with indent_width spaces
1655               (?:
1656                 (?:[ ]{$opt{indent_width}})  # Lines must start with indent_width of spaces
1657                 .*\n+
1658               )+
1659             )
1660             ((?=^[ ]{0,$opt{indent_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1661         }{
1662             my $codeblock = $1;
1663
1664             $codeblock =~ s/\n\n\n/\n\n/g; # undo "paragraph for last list item" change
1665             $codeblock = _EncodeCode(_Outdent($codeblock));
1666             $codeblock =~ s/\A\n+//; # trim leading newlines
1667             $codeblock =~ s/\s+\z//; # trim trailing whitespace
1668
1669             my $result = "<div class=\"$opt{style_prefix}code\"><pre style=\"display:none\"></pre><pre><code>"
1670                 . $codeblock . "\n</code></pre></div>";
1671             my $key = block_id($result);
1672             $g_code_blocks{$key} = $result;
1673             "\n\n" . $key . "\n\n";
1674         }egmx;
1675
1676     return $text;
1677 }
1678
1679
1680 sub _DoCodeSpans {
1681 #
1682 # * Backtick quotes are used for <code></code> spans.
1683 #
1684 # * You can use multiple backticks as the delimiters if you want to
1685 #   include literal backticks in the code span. So, this input:
1686 #
1687 #     Just type ``foo `bar` baz`` at the prompt.
1688 #
1689 #   Will translate to:
1690 #
1691 #     <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1692 #
1693 #   There's no arbitrary limit to the number of backticks you
1694 #   can use as delimters. If you need three consecutive backticks
1695 #   in your code, use four for delimiters, etc.
1696 #
1697 # * You can use spaces to get literal backticks at the edges:
1698 #
1699 #     ... type `` `bar` `` ...
1700 #
1701 #   Turns to:
1702 #
1703 #     ... type <code>`bar`</code> ...
1704 #
1705
1706     my $text = shift;
1707
1708     $text =~ s@
1709             (`+)        # $1 = Opening run of `
1710             (.+?)       # $2 = The code block
1711             (?<!`)
1712             \1          # Matching closer
1713             (?!`)
1714         @
1715             my $c = "$2";
1716             $c =~ s/^[ ]+//g; # leading whitespace
1717             $c =~ s/[ ]+$//g; # trailing whitespace
1718             $c = _EncodeCode($c);
1719             "<code>$c</code>";
1720         @egsx;
1721
1722     return $text;
1723 }
1724
1725
1726 sub _EncodeCode {
1727 #
1728 # Encode/escape certain characters inside Markdown code runs.
1729 # The point is that in code, these characters are literals,
1730 # and lose their special Markdown meanings.
1731 #
1732     local $_ = shift;
1733
1734     # Encode all ampersands; HTML entities are not
1735     # entities within a Markdown code span.
1736     s/&/&amp;/g;
1737
1738     # Encode $'s, but only if we're running under Blosxom.
1739     # (Blosxom interpolates Perl variables in article bodies.)
1740     s/\$/&#036;/g if $_haveBX;
1741
1742     # Do the angle bracket song and dance:
1743     s! <  !&lt;!gx;
1744     s! >  !&gt;!gx;
1745
1746     # Now, escape characters that are magic in Markdown:
1747     s!([*_~{}\[\]\\])!$g_escape_table{$1}!g;
1748
1749     return $_;
1750 }
1751
1752
1753 sub _DoItalicsAndBoldAndStrike {
1754     my $text = shift;
1755
1756     # <strong> must go first:
1757     $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
1758         {<strong>$1</strong>}gsx;
1759     $text =~ s{ (?<!\w) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\w) }
1760         {<strong>$1</strong>}gsx;
1761
1762     $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
1763         {<strike>$1</strike>}gsx;
1764
1765     $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
1766         {<em>$1</em>}gsx;
1767     $text =~ s{ (?<!\w) _ (?=\S) (.+?) (?<=\S) _ (?!\w) }
1768         {<em>$1</em>}gsx;
1769
1770     return $text;
1771 }
1772
1773
1774 sub _DoBlockQuotes {
1775     my $text = shift;
1776
1777     $text =~ s{
1778           (                     # Wrap whole match in $1
1779             (
1780               ^[ ]*>[ ]?        # '>' at the start of a line
1781                 .+\n            # rest of the first line
1782               (.+\n)*           # subsequent consecutive lines
1783               \n*               # blanks
1784             )+
1785           )
1786         }{
1787             my $bq = $1;
1788             $bq =~ s/^[ ]*>[ ]?//gm; # trim one level of quoting
1789             $bq =~ s/^[ ]+$//mg;         # trim whitespace-only lines
1790             $bq = _RunBlockGamut($bq);   # recurse
1791
1792             $bq =~ s/^/  /mg;
1793             "<blockquote>\n$bq\n</blockquote>\n\n";
1794         }egmx;
1795
1796
1797     return $text;
1798 }
1799
1800
1801 sub _FormParagraphs {
1802 #
1803 # Params:
1804 #   $text - string to process with html <p> tags
1805 #
1806     my $text = shift;
1807
1808     # Strip leading and trailing lines:
1809     $text =~ s/\A\n+//;
1810     $text =~ s/\n+\z//;
1811
1812     my @grafs = split(/\n{2,}/, $text);
1813
1814     #
1815     # Wrap <p> tags.
1816     #
1817     foreach (@grafs) {
1818         unless (defined($g_html_blocks{$_}) || defined($g_code_blocks{$_})) {
1819             $_ = _RunSpanGamut($_);
1820             s/^([ ]*)/<p>/;
1821             $_ .= "</p>";
1822         }
1823     }
1824
1825     #
1826     # Unhashify HTML blocks
1827     #
1828     foreach (@grafs) {
1829         if (defined( $g_html_blocks{$_} )) {
1830             $_ = $g_html_blocks{$_};
1831         }
1832     }
1833
1834     return join "\n\n", @grafs;
1835 }
1836
1837
1838 my $g_possible_tag_name;
1839 my %ok_tag_name;
1840 BEGIN {
1841     # note: length("blockquote") == 10
1842     $g_possible_tag_name = qr/(?i:[a-z]{1,10}|h[1-6])/o;
1843     %ok_tag_name = map({$_ => 1} qw(
1844         a abbr acronym address
1845         b basefont bdo big blockquote br
1846         caption center cite code col colgroup
1847         dd del dfn div dl dt
1848         em
1849         font
1850         h1 h2 h3 h4 h5 h6 hr
1851         i img ins
1852         kbd
1853         li
1854         ol
1855         p pre
1856         q
1857         s samp small span strike strong sub sup
1858         table tbody td tfoot th thead tr tt
1859         u ul
1860         var
1861     ));
1862     $ok_tag_name{$_} = 0 foreach (qw(
1863         dir menu
1864     ));
1865 }
1866
1867
1868 sub _SetAllowedTag {
1869         my ($tag, $forbid) = @_;
1870         $ok_tag_name{$tag} = $forbid ? 0 : 1
1871                 if defined($tag) && exists($ok_tag_name{$tag});
1872 }
1873
1874
1875 # Encode leading '<' of any non-tags
1876 # However, "<?", "<!" and "<$" are passed through (legacy on that "<$" thing)
1877 sub _DoTag {
1878         my $tag = shift;
1879         return $tag if $tag =~ /^<[?\$!]/;
1880         if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
1881             $ok_tag_name{lc($1)}) {
1882
1883             return $tag;
1884         }
1885         $tag =~ s/</&lt;/g;
1886         return $tag;
1887 }
1888
1889
1890 sub _EncodeAmpsAndAngles {
1891 # Smart processing for ampersands and angle brackets that need to be encoded.
1892
1893     my $text = shift;
1894
1895     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1896     #   http://bumppo.net/projects/amputator/
1897     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
1898
1899     # Encode naked <'s
1900     $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
1901     $text =~ s{<(?=[^>]*$)}{&lt;}g;
1902
1903     # Encode <'s that cannot possibly be a start or end tag
1904     $text =~ s{(<[^>]*>)}{_DoTag($1)}ige;
1905
1906     return $text;
1907 }
1908
1909
1910 sub _EncodeBackslashEscapes {
1911 #
1912 # Parameter: String.
1913 # Returns:   String after processing the following backslash escape sequences.
1914 #
1915     local $_ = shift;
1916
1917     s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
1918     s{\\([`*_~{}\[\]()>#+\-.!`])}{$g_escape_table{$1}}g;
1919
1920     return $_;
1921 }
1922
1923
1924 sub _DoAutoLinks {
1925     local $_ = shift;
1926
1927     s{<((https?|ftps?):[^'\042>\s]+)>}{<a href="$1">&lt;$1&gt;</a>}gi;
1928
1929     # Email addresses: <address@domain.foo>
1930     s{
1931         <
1932         (?:mailto:)?
1933         (
1934             [-.\w]+
1935             \@
1936             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1937         )
1938         >
1939     }{
1940         _EncodeEmailAddress(_UnescapeSpecialChars($1), "&#x3c;", "&#62;");
1941     }egix;
1942
1943     # (kjm) I don't do "x" patterns
1944     s{(?<![\042'<>])(?<!&[Ll][Tt];)(?<!&#60;)(?<!&#x3[Cc];)\b((?:https?|ftps?)://(?:[-a-zA-Z0-9./?\&\%=_~!*;:\@+\$,\x23](?:(?<![.,:;])|(?=[^\s])))+)}
1945      {<a href="$1">$1</a>}sog;
1946     s{(?<![][])(?<!\] )\[RFC( ?)([0-9]{1,5})\](?![][])(?! \[)}
1947      {[<a href="http://tools.ietf.org/html/rfc$2">RFC$1$2</a>]}sog;
1948
1949     return $_;
1950 }
1951
1952
1953 sub _EncodeEmailAddress {
1954 #
1955 # Input: an email address, e.g. "foo@example.com"
1956 #
1957 # Output: the email address as a mailto link, with each character
1958 #         of the address encoded as either a decimal or hex entity, in
1959 #         the hopes of foiling most address harvesting spam bots. E.g.:
1960 #
1961 #   <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1962 #   x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1963 #   &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1964 #
1965 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1966 # mailing list: <http://tinyurl.com/yu7ue>
1967 #
1968
1969     my ($addr, $prefix, $suffix) = @_;
1970     $prefix = "" unless defined($prefix);
1971     $suffix = "" unless defined($suffix);
1972
1973     srand(unpack('N',md5($addr)));
1974     my @encode = (
1975         sub { '&#' .                 ord(shift)   . ';' },
1976         sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1977         sub {                            shift          },
1978     );
1979
1980     $addr = "mailto:" . $addr;
1981
1982     $addr =~ s{(.)}{
1983         my $char = $1;
1984         if ( $char eq '@' ) {
1985             # this *must* be encoded. I insist.
1986             $char = $encode[int rand 1]->($char);
1987         } elsif ( $char ne ':' ) {
1988             # leave ':' alone (to spot mailto: later)
1989             my $r = rand;
1990             # roughly 10% raw, 45% hex, 45% dec
1991             $char = (
1992                 $r > .9   ?  $encode[2]->($char)  :
1993                 $r < .45  ?  $encode[1]->($char)  :
1994                              $encode[0]->($char)
1995             );
1996         }
1997         $char;
1998     }gex;
1999
2000     # strip the mailto: from the visible part
2001     (my $bareaddr = $addr) =~ s/^.+?://;
2002     $addr = qq{<a href="$addr">$prefix$bareaddr$suffix</a>};
2003
2004     return $addr;
2005 }
2006
2007
2008 sub _UnescapeSpecialChars {
2009 #
2010 # Swap back in all the special characters we've hidden.
2011 #
2012     my $text = shift;
2013
2014     while( my($char, $hash) = each(%g_escape_table) ) {
2015         $text =~ s/$hash/$char/g;
2016     }
2017     return $text;
2018 }
2019
2020
2021 sub _TokenizeHTML {
2022 #
2023 # Parameter: String containing HTML markup.
2024 # Returns:   Reference to an array of the tokens comprising the input
2025 #            string. Each token is either a tag (possibly with nested,
2026 #            tags contained therein, such as <a href="<MTFoo>">, or a
2027 #            run of text between tags. Each element of the array is a
2028 #            two-element array; the first is either 'tag' or 'text';
2029 #            the second is the actual value.
2030 #
2031 #
2032 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
2033 #   <http://www.bradchoate.com/past/mtregex.php>
2034 #
2035
2036     my $str = shift;
2037     my $pos = 0;
2038     my $len = length $str;
2039     my @tokens;
2040
2041     my $depth = 6;
2042     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
2043     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
2044                    (?s: <\? .*? \?> ) |             # processing instruction
2045                    $nested_tags/iox;                # nested tags
2046
2047     while ($str =~ m/($match)/g) {
2048         my $whole_tag = $1;
2049         my $sec_start = pos $str;
2050         my $tag_start = $sec_start - length $whole_tag;
2051         if ($pos < $tag_start) {
2052             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
2053         }
2054         push @tokens, ['tag', $whole_tag];
2055         $pos = pos $str;
2056     }
2057     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
2058     \@tokens;
2059 }
2060
2061
2062 sub _Outdent {
2063 #
2064 # Remove one level of line-leading indent_width of spaces
2065 #
2066     my $text = shift;
2067
2068     $text =~ s/^ {1,$opt{indent_width}}//gm;
2069     return $text;
2070 }
2071
2072
2073 sub _Detab {
2074 #
2075 # Expand tabs to spaces using $opt{tab_width} if no second argument
2076 #
2077     my $text = shift;
2078     my $ts = shift || $opt{tab_width};
2079     # From the Perl camel book "Fluent Perl" section (slightly modified)
2080     $text =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ge;
2081     return $text;
2082 }
2083
2084
2085 sub _PrefixURL {
2086 #
2087 # Add URL prefix if needed
2088 #
2089     my $url = shift;
2090
2091     return $url unless $opt{url_prefix} ne '' || $opt{img_prefix} ne '';
2092     return $url if $url =~ m,^//, || $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/;
2093     my $ans = $opt{url_prefix};
2094     $ans = $opt{img_prefix}
2095         if $opt{img_prefix} ne '' && $url =~ /\.(?:png|gif|jpe?g|svg?z)$/i;
2096     return $url unless $ans ne '';
2097     $ans .= '/' if substr($ans, -1, 1) ne '/';
2098     $ans .= substr($url, 0, 1) eq '/' ? substr($url, 1) : $url;
2099     return $ans;
2100 }
2101
2102
2103 BEGIN {
2104     $g_style_sheet = <<'STYLESHEET';
2105
2106 <style type="text/css">
2107 /* <![CDATA[ */
2108
2109 /* Markdown.pl fancy style sheet
2110 ** Copyright (C) 2017 Kyle J. McKay.
2111 ** All rights reserved.
2112 **
2113 ** Redistribution and use in source and binary forms, with or without
2114 ** modification, are permitted provided that the following conditions are met:
2115 **
2116 **   1. Redistributions of source code must retain the above copyright notice,
2117 **      this list of conditions and the following disclaimer.
2118 **
2119 **   2. Redistributions in binary form must reproduce the above copyright
2120 **      notice, this list of conditions and the following disclaimer in the
2121 **      documentation and/or other materials provided with the distribution.
2122 **
2123 **   3. Neither the name of the copyright holder nor the names of its
2124 **      contributors may be used to endorse or promote products derived from
2125 **      this software without specific prior written permission.
2126 **
2127 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2128 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2129 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2130 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2131 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2132 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2133 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2134 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2135 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2136 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2137 ** POSSIBILITY OF SUCH DAMAGE.
2138 */
2139
2140 div.%(base)code-bt > pre, div.%(base)code > pre {
2141         margin: 0;
2142         padding: 0;
2143         overflow: auto;
2144 }
2145
2146 div.%(base)code-bt > pre > code, div.%(base)code > pre > code {
2147         display: inline-block;
2148         margin: 0;
2149         padding: 0.5em 0;
2150         border-top: thin dotted;
2151         border-bottom: thin dotted;
2152 }
2153
2154 ol.%(base)ol {
2155         counter-reset: %(base)item;
2156 }
2157 ol.%(base)ol[start="0"] {
2158         counter-reset: %(base)item -1;
2159 }
2160 ol.%(base)ol[start="5"] {
2161         counter-reset: %(base)item 4;
2162 }
2163 ol.%(base)ol[start="10"] {
2164         counter-reset: %(base)item 9;
2165 }
2166 ol.%(base)ol > span.%(base)ol-incr {
2167         counter-increment: %(base)item;
2168 }
2169 ol.%(base)ol > span.%(base)ol-incr-2 {
2170         counter-increment: %(base)item 2;
2171 }
2172 ol.%(base)ol > span.%(base)ol-incr-5 {
2173         counter-increment: %(base)item 5;
2174 }
2175 ol.%(base)ol > span.%(base)ol-incr-10 {
2176         counter-increment: %(base)item 10;
2177 }
2178 ol.%(base)lc-greek, li.%(base)lc-greek {
2179         list-style-type: lower-greek;
2180 }
2181 ol.%(base)ol > li {
2182         counter-increment: %(base)item;
2183 }
2184 ol.%(base)ol > li.%(base)li,
2185 ol.%(base)ol > li.%(base)li-lc,
2186 ol.%(base)ol > li.%(base)li-lc-greek,
2187 ol.%(base)ol > li.%(base)li-uc {
2188         list-style-type: none;
2189         display: block;
2190 }
2191 ol.%(base)ol > li.%(base)li:before,
2192 ol.%(base)ol > li.%(base)li-lc:before,
2193 ol.%(base)ol > li.%(base)li-lc-greek:before,
2194 ol.%(base)ol > li.%(base)li-uc:before {
2195         position: absolute;
2196         text-align: right;
2197         white-space: nowrap;
2198         margin-left: -9ex;
2199         width: 9ex;
2200 }
2201 ol.%(base)ol > li.%(base)li[type="1"]:before {
2202         content: counter(%(base)item, decimal) ")\A0 \A0 ";
2203 }
2204 ol.%(base)ol > li.%(base)li-lc[type="i"]:before,
2205 ol.%(base)ol > li.%(base)li-lc[type="I"]:before {
2206         content: counter(%(base)item, lower-roman) ")\A0 \A0 ";
2207 }
2208 ol.%(base)ol > li.%(base)li-uc[type="I"]:before,
2209 ol.%(base)ol > li.%(base)li-uc[type="i"]:before {
2210         content: counter(%(base)item, upper-roman) ")\A0 \A0 ";
2211 }
2212 ol.%(base)ol > li.%(base)li-lc[type="a"]:before,
2213 ol.%(base)ol > li.%(base)li-lc[type="A"]:before {
2214         content: counter(%(base)item, lower-alpha) ")\A0 \A0 ";
2215 }
2216 ol.%(base)ol > li.%(base)li-lc-greek[type="a"]:before,
2217 ol.%(base)ol > li.%(base)li-lc-greek[type="A"]:before {
2218         content: counter(%(base)item, lower-greek) ")\A0 \A0 ";
2219 }
2220 ol.%(base)ol > li.%(base)li-uc[type="A"]:before,
2221 ol.%(base)ol > li.%(base)li-uc[type="a"]:before {
2222         content: counter(%(base)item, upper-alpha) ")\A0 \A0 ";
2223 }
2224
2225 li.%(base)checkbox-on,
2226 li.%(base)checkbox-off {
2227         list-style-type: none;
2228         display: block;
2229 }
2230 li.%(base)checkbox-on > span:first-child + span + span,
2231 li.%(base)checkbox-off > span:first-child + span + span {
2232         position: absolute;
2233         clip: rect(0,0,0,0);
2234 }
2235 li.%(base)checkbox-on > span:first-child,
2236 li.%(base)checkbox-off > span:first-child,
2237 li.%(base)checkbox-on > span:first-child + span,
2238 li.%(base)checkbox-off > span:first-child + span {
2239         display: block;
2240         position: absolute;
2241         margin-left: -3ex;
2242         width: 1em;
2243         height: 1em;
2244 }
2245 li.%(base)checkbox-on > span:first-child > span:first-child,
2246 li.%(base)checkbox-off > span:first-child > span:first-child {
2247         display: block;
2248         position: absolute;
2249         left: 0.75pt; top: 0.75pt; right: 0.75pt; bottom: 0.75pt;
2250 }
2251 li.%(base)checkbox-on > span:first-child > span:first-child:before,
2252 li.%(base)checkbox-off > span:first-child > span:first-child:before {
2253         display: inline-block;
2254         position: relative;
2255         right: 1pt;
2256         width: 100%;
2257         height: 100%;
2258         border: 1pt solid;
2259         content: "";
2260 }
2261 li.%(base)checkbox-on > span:first-child + span:before {
2262         position: relative;
2263         left: 2pt;
2264         bottom: 1pt;
2265         font-size: 125%;
2266         line-height: 80%;
2267         content: "\2713";
2268 }
2269
2270 /* ]]> */
2271 </style>
2272
2273 STYLESHEET
2274     $g_style_sheet =~ s/^\s+//g;
2275     $g_style_sheet =~ s/\s+$//g;
2276     $g_style_sheet .= "\n";
2277 }
2278
2279 1;
2280
2281 __DATA__
2282
2283 =head1 NAME
2284
2285 Markdown.pl - convert Markdown format text files to HTML
2286
2287 =head1 SYNOPSIS
2288
2289 B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
2290     [B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>]
2291     [B<--tabwidth>=I<num>] [B<--stylesheet>] [B<--stub>] [--]
2292     [I<file>...]
2293
2294  Options:
2295    -h                                   show short usage help
2296    --help                               show long detailed help
2297    --html4tags                          use <br> instead of <br />
2298    --deprecated                         allow <dir> and <menu> tags
2299    --tabwidth=num                       expand tabs to num instead of 8
2300    -r prefix | --htmlroot=prefix        append relative non-img URLs
2301                                         to prefix
2302    -i prefix | --imageroot=prefix       append relative img URLs to
2303                                         prefix
2304    -V | --version                       show version, authors, license
2305                                         and copyright
2306    -s | --shortversion                  show just the version number
2307    --stylesheet                         output the fancy style sheet
2308    --no-stylesheet                      do not output fancy style sheet
2309    --stub                               wrap output in stub document
2310                                         implies --stylesheet
2311    --                                   end options and treat next
2312                                         argument as file
2313
2314 =head1 DESCRIPTION
2315
2316 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2317 easy-to-write structured text format into HTML. Markdown's text format
2318 is most similar to that of plain text email, and supports features such
2319 as headers, *emphasis*, code blocks, blockquotes, and links.
2320
2321 Markdown's syntax is designed not as a generic markup language, but
2322 specifically to serve as a front-end to (X)HTML. You can  use span-level
2323 HTML tags anywhere in a Markdown document, and you can use block level
2324 HTML tags (like <div> and <table> as well).
2325
2326 For more information about Markdown's syntax, see the F<basics.md>
2327 and F<syntax.md> files included with F<Markdown.pl>.
2328
2329 Input (auto-detected) may be either ISO-8859-1 or UTF-8.  Output is always
2330 converted to the UTF-8 character set.
2331
2332
2333 =head1 OPTIONS
2334
2335 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2336
2337     Markdown.pl -- -z
2338
2339 =over
2340
2341
2342 =item B<--html4tags>
2343
2344 Use HTML 4 style for empty element tags, e.g.:
2345
2346     <br>
2347
2348 instead of Markdown's default XHTML style tags, e.g.:
2349
2350     <br />
2351
2352
2353 =item B<--deprecated>
2354
2355 Both "<dir>" and "<menu>" are normally taken as literal text and the leading
2356 "<" will be automatically escaped.
2357
2358 If this option is used, they are recognized as valid tags and passed through
2359 without being escaped.
2360
2361 When dealing with program argument descriptions "<dir>" can be particularly
2362 problematic therefore use of this option is not recommended.
2363
2364 Other deprecated tags (such as "<font>" and "<center>" for example) continue
2365 to be recognized and passed through even without using this option.
2366
2367
2368 =item B<--tabwidth>=I<num>
2369
2370 Expand tabs to I<num> character wide tab stop positions instead of the default
2371 8.  Don't use this; physical tabs should always be expanded to 8-character
2372 positions.  This option does I<not> affect the number of spaces needed to
2373 start a new "indent level".  That will always be 4 no matter what value is
2374 used (or implied by default) with this option.  Also note that tabs inside
2375 backticks-delimited code blocks will always be expanded to 8-character tab
2376 stop positions no matter what value is used for this option.
2377
2378 The value must be S<2 <= I<num> <= 32>.
2379
2380
2381 =item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
2382
2383 Any non-absolute URLs have I<prefix> prepended.
2384
2385
2386 =item B<-i> I<prefix>, B<--imageroot>=I<prefix>
2387
2388 Any non-absolute URLs have I<prefix> prepended (overriding the B<-r> prefix
2389 if any) but only if they end in an image suffix.
2390
2391
2392 =item B<-V>, B<--version>
2393
2394 Display Markdown's version number and copyright information.
2395
2396
2397 =item B<-s>, B<--shortversion>
2398
2399 Display the short-form version number.
2400
2401
2402 =item B<--stylesheet>
2403
2404 Include the fancy style sheet at the beginning of the output (or in the
2405 C<head> section with B<--stub>).  This style sheet makes fancy checkboxes
2406 and makes a right parenthesis C<)> show instead of a C<.> for ordered lists
2407 that use them.  Without it things will still look fine except that the
2408 fancy stuff won't be there.
2409
2410 Use this option with no other arguments and redirect standard input to
2411 /dev/null to get just the style sheet and nothing else.
2412
2413
2414 =item B<--no-stylesheet>
2415
2416 Overrides a previous B<--stylesheet> and disables implicit inclusion
2417 of the style sheet by the B<--stub> option.
2418
2419
2420 =item B<--stub>
2421
2422 Wrap the output in a full document stub (i.e. has C<html>, C<head> and C<body>
2423 tags).  The style sheet I<will> be included in the C<head> section unless the
2424 B<--no-stylesheet> option is also used.
2425
2426
2427 =item B<-h>, B<--help>
2428
2429 Display Markdown's help.  With B<--help> full help is shown, with B<-h> only
2430 the usage and options are shown.
2431
2432
2433 =back
2434
2435
2436 =head1 VERSION HISTORY
2437
2438 Z<> See the F<README> file for detailed release notes for this version.
2439
2440 =over
2441
2442 =item Z<> 1.1.4 - 24 Jun 2017
2443
2444 =item Z<> 1.1.3 - 13 Feb 2017
2445
2446 =item Z<> 1.1.2 - 19 Jan 2017
2447
2448 =item Z<> 1.1.1 - 12 Jan 2017
2449
2450 =item Z<> 1.1.0 - 11 Jan 2017
2451
2452 =item Z<> 1.0.4 - 05 Jun 2016
2453
2454 =item Z<> 1.0.3 - 06 Sep 2015
2455
2456 =item Z<> 1.0.2 - 03 Sep 2015
2457
2458 =item Z<> 1.0.1 - 14 Dec 2004
2459
2460 =item Z<> 1.0.0 - 28 Aug 2004
2461
2462 =back
2463
2464 =head1 AUTHORS
2465
2466 =over
2467
2468 =item John Gruber
2469
2470 =item L<http://daringfireball.net>
2471
2472 =item L<http://daringfireball.net/projects/markdown/>
2473
2474 =item E<160>
2475
2476 =back
2477
2478 =over
2479
2480 =item PHP port and other contributions by Michel Fortin
2481
2482 =item L<http://michelf.com>
2483
2484 =item E<160>
2485
2486 =back
2487
2488 =over
2489
2490 =item Additional enhancements and tweaks by Kyle J. McKay
2491
2492 =item mackyle<at>gmail.com
2493
2494 =back
2495
2496 =head1 COPYRIGHT AND LICENSE
2497
2498 =over
2499
2500 =item Copyright (C) 2003-2004 John Gruber
2501
2502 =item Copyright (C) 2015-2017 Kyle J. McKay
2503
2504 =item All rights reserved.
2505
2506 =back
2507
2508 Redistribution and use in source and binary forms, with or without
2509 modification, are permitted provided that the following conditions are
2510 met:
2511
2512 =over
2513
2514 =item *
2515
2516 Redistributions of source code must retain the above copyright
2517 notice, this list of conditions and the following disclaimer.
2518
2519 =item *
2520
2521 Redistributions in binary form must reproduce the above copyright
2522 notice, this list of conditions and the following disclaimer in the
2523 documentation and/or other materials provided with the distribution.
2524
2525 =item *
2526
2527 Neither the name "Markdown" nor the names of its contributors may
2528 be used to endorse or promote products derived from this software
2529 without specific prior written permission.
2530
2531 =back
2532
2533 This software is provided by the copyright holders and contributors "as
2534 is" and any express or implied warranties, including, but not limited
2535 to, the implied warranties of merchantability and fitness for a
2536 particular purpose are disclaimed. In no event shall the copyright owner
2537 or contributors be liable for any direct, indirect, incidental, special,
2538 exemplary, or consequential damages (including, but not limited to,
2539 procurement of substitute goods or services; loss of use, data, or
2540 profits; or business interruption) however caused and on any theory of
2541 liability, whether in contract, strict liability, or tort (including
2542 negligence or otherwise) arising in any way out of the use of this
2543 software, even if advised of the possibility of such damage.
2544
2545 =cut