Markdown.pl

   1 #!/usr/bin/env perl
   2
   3 #
   4 # Markdown -- A text-to-HTML conversion tool for web writers
   5 #
   6 # Copyright (C) 2004 John Gruber
   7 # Copyright (C) 2015,2016,2017,2018,2019 Kyle J. McKay
   8 # All rights reserved.
   9 # License is Modified BSD (aka 3-clause BSD) License\n";
  10 # See LICENSE file (or <https://opensource.org/licenses/BSD-3-Clause>)
  11 #
  12
  13 package Markdown;
  14
  15 require 5.008;
  16 use strict;
  17 use warnings;
  18
  19 use Encode;
  20
  21 use vars qw($COPYRIGHT $VERSION @ISA @EXPORT_OK);
  22
  23 BEGIN {*COPYRIGHT =
  24 \"Copyright (C) 2004 John Gruber
  25 Copyright (C) 2015,2016,2017,2018,2019 Kyle J. McKay
  26 All rights reserved.
  27 ";
  28 *VERSION = \"1.1.8-PRE"
  29 }
  30
  31 require Exporter;
  32 use Digest::MD5 qw(md5 md5_hex);
  33 use File::Basename qw(basename);
  34 use Scalar::Util qw(refaddr looks_like_number);
  35 use Pod::Usage;
  36 @ISA = qw(Exporter);
  37 @EXPORT_OK = qw(Markdown);
  38 $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'};
  39
  40 close(DATA) if fileno(DATA);
  41 exit(&_main(@ARGV)||0) unless caller;
  42
  43 my $encoder;
  44 BEGIN {
  45         $encoder = Encode::find_encoding('Windows-1252') ||
  46                    Encode::find_encoding('ISO-8859-1') or
  47                    die "failed to load ISO-8859-1 encoder\n";
  48 }
  49
  50 #
  51 # Global default settings:
  52 #
  53 my ($g_style_prefix, $g_empty_element_suffix, $g_indent_width, $g_tab_width);
  54 BEGIN {
  55     $g_style_prefix = "_markdown-";     # Prefix for markdown css class styles
  56     $g_empty_element_suffix = " />";    # Change to ">" for HTML output
  57     $g_indent_width = 4;                # Number of spaces considered new level
  58     $g_tab_width = 4;                   # Legacy even though it's wrong
  59 }
  60
  61
  62 #
  63 # Globals:
  64 #
  65
  66 # Style sheet template
  67 my $g_style_sheet;
  68
  69 # Permanent block id table
  70 my %g_perm_block_ids;
  71
  72 # Global hashes, used by various utility routines
  73 my %g_urls;
  74 my %g_titles;
  75 my %g_anchors;
  76 my %g_anchors_id;
  77 my %g_block_ids;
  78 my %g_code_block_ids;
  79 my %g_html_blocks;
  80 my %g_code_blocks;
  81 my %opt;
  82
  83 # Return a "block id" to use to identify the block that does not contain
  84 # any characters that could be misinterpreted by the rest of the code
  85 # Originally this used md5_hex but that's unnecessarily slow
  86 # Instead just use the refaddr of the scalar ref of the entry for that
  87 # key in either the global or, if the optional second argument is true,
  88 # permanent table.  To avoid the result being confused with anything
  89 # else, it's prefixed with a control character and suffixed with another
  90 # both of which are not allowed by the XML standard or Unicode.
  91 sub block_id {
  92     $_[1] or return "\5".refaddr(\$g_block_ids{$_[0]})."\6";
  93     $_[1] == 1 and return "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3";
  94     $_[1] == 2 and return "\25".refaddr(\$g_code_block_ids{$_[0]})."\26";
  95     die "programmer error: bad block_id type $_[1]";
  96 }
  97
  98 # Regex to match balanced [brackets]. See Friedl's
  99 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
 100 my $g_nested_brackets;
 101 BEGIN {
 102     $g_nested_brackets = qr{
 103     (?>                                 # Atomic matching
 104         [^\[\]]+                        # Anything other than brackets
 105      |
 106         \[
 107             (??{ $g_nested_brackets })  # Recursive set of nested brackets
 108         \]
 109     )*
 110     }ox
 111 }
 112
 113
 114 # Table of hash values for escaped characters:
 115 my %g_escape_table;
 116 BEGIN {
 117     $g_escape_table{""} = "\2\3";
 118     foreach my $char (split //, "\\\`*_~{}[]()>#+-.!|:") {
 119         $g_escape_table{$char} = block_id($char,1);
 120     }
 121 }
 122
 123 # Used to track when we're inside an ordered or unordered list
 124 # (see _ProcessListItems() for details):
 125 my $g_list_level;
 126 BEGIN {
 127     $g_list_level = 0;
 128 }
 129
 130
 131 #### Blosxom plug-in interface ##########################################
 132 my $_haveBX;
 133 BEGIN {
 134     no warnings 'once';
 135     $_haveBX = defined($blosxom::version);
 136 }
 137
 138 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
 139 # which posts Markdown should process, using a "meta-markup: markdown"
 140 # header. If it's set to 0 (the default), Markdown will process all
 141 # entries.
 142 my $g_blosxom_use_meta;
 143 BEGIN {
 144     $g_blosxom_use_meta = 0;
 145 }
 146
 147 sub start { 1; }
 148 sub story {
 149     my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
 150
 151     if ((! $g_blosxom_use_meta) or
 152         (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
 153          ) {
 154             $$body_ref = Markdown($$body_ref);
 155     }
 156     1;
 157 }
 158
 159
 160 #### Movable Type plug-in interface #####################################
 161 my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT
 162 my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0.
 163
 164 unless ($_haveMT) {
 165     require MT;
 166     import  MT;
 167     require MT::Template::Context;
 168     import  MT::Template::Context;
 169
 170     unless ($_haveMT3) {
 171         require MT::Plugin;
 172         import  MT::Plugin;
 173         my $plugin = new MT::Plugin({
 174             name => "Markdown",
 175             description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 176             doc_link => 'http://daringfireball.net/projects/markdown/'
 177         });
 178         MT->add_plugin( $plugin );
 179     }
 180
 181     MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 182         my $ctx  = shift;
 183         my $args = shift;
 184         my $builder = $ctx->stash('builder');
 185         my $tokens = $ctx->stash('tokens');
 186
 187         if (defined ($args->{'output'}) ) {
 188             $ctx->stash('markdown_output', lc $args->{'output'});
 189         }
 190
 191         defined (my $str = $builder->build($ctx, $tokens) )
 192             or return $ctx->error($builder->errstr);
 193         $str; # return value
 194     });
 195
 196     MT->add_text_filter('markdown' => {
 197         label     => 'Markdown',
 198         docs      => 'http://daringfireball.net/projects/markdown/',
 199         on_format => sub {
 200             my $text = shift;
 201             my $ctx  = shift;
 202             my $raw  = 0;
 203             if (defined $ctx) {
 204             my $output = $ctx->stash('markdown_output');
 205                 if (defined $output && $output =~ m/^html/i) {
 206                     $g_empty_element_suffix = ">";
 207                     $ctx->stash('markdown_output', '');
 208                 }
 209                 elsif (defined $output && $output eq 'raw') {
 210                     $raw = 1;
 211                     $ctx->stash('markdown_output', '');
 212                 }
 213                 else {
 214                     $raw = 0;
 215                     $g_empty_element_suffix = " />";
 216                 }
 217             }
 218             $text = $raw ? $text : Markdown($text);
 219             $text;
 220         },
 221     });
 222
 223     # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 224     my $smartypants;
 225
 226     {
 227         no warnings "once";
 228         $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 229     }
 230
 231     if ($smartypants) {
 232         MT->add_text_filter('markdown_with_smartypants' => {
 233             label     => 'Markdown With SmartyPants',
 234             docs      => 'http://daringfireball.net/projects/markdown/',
 235             on_format => sub {
 236                 my $text = shift;
 237                 my $ctx  = shift;
 238                 if (defined $ctx) {
 239                     my $output = $ctx->stash('markdown_output');
 240                     if (defined $output && $output eq 'html') {
 241                         $g_empty_element_suffix = ">";
 242                     }
 243                     else {
 244                         $g_empty_element_suffix = " />";
 245                     }
 246                 }
 247                 $text = Markdown($text);
 248                 $text = $smartypants->($text, '1');
 249             },
 250         });
 251     }
 252 }
 253
 254 sub _strip {
 255         my $str = shift;
 256         defined($str) or return undef;
 257         $str =~ s/^\s+//;
 258         $str =~ s/\s+$//;
 259         $str =~ s/\s+/ /g;
 260         $str;
 261 }
 262
 263 #### BBEdit/command-line text filter interface ##########################
 264 sub _main {
 265     local *ARGV = \@_;
 266
 267
 268     #### Check for command-line switches: #################
 269     my %options = ();
 270     my %cli_opts;
 271     use Getopt::Long;
 272     Getopt::Long::Configure(qw(bundling require_order pass_through));
 273     GetOptions(\%cli_opts,
 274         'help','h',
 275         'version|V',
 276         'shortversion|short-version|s',
 277         'html4tags',
 278         'deprecated',
 279         'htmlroot|r=s',
 280         'imageroot|i=s',
 281         'tabwidth|tab-width=s',
 282         'stylesheet|style-sheet',
 283         'no-stylesheet|no-style-sheet',
 284         'stub',
 285     );
 286     if ($cli_opts{'help'}) {
 287         pod2usage(-verbose => 2, -exitval => 0);
 288     }
 289     if ($cli_opts{'h'}) {
 290         pod2usage(-verbose => 0, -exitval => 0);
 291     }
 292     if ($cli_opts{'version'}) { # Version info
 293         print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT;
 294         print "License is Modified BSD (aka 3-clause BSD) License\n";
 295         print "<https://opensource.org/licenses/BSD-3-Clause>\n";
 296         exit 0;
 297     }
 298     if ($cli_opts{'shortversion'}) { # Just the version number string.
 299         print $VERSION;
 300         exit 0;
 301     }
 302     my $stub = 0;
 303     if ($cli_opts{'stub'}) {
 304         $stub = 1;
 305     }
 306     if ($cli_opts{'html4tags'}) {        # Use HTML tag style instead of XHTML
 307         $options{empty_element_suffix} = ">";
 308         $stub = -$stub;
 309     }
 310     if ($cli_opts{'deprecated'}) {       # Allow <dir> and <menu> tags to pass through
 311         _SetAllowedTag("dir");
 312         _SetAllowedTag("menu");
 313     }
 314     if ($cli_opts{'tabwidth'}) {
 315         my $tw = $cli_opts{'tabwidth'};
 316         die "invalid tab width (must be integer)\n" unless looks_like_number $tw;
 317         die "invalid tab width (must be >= 2 and <= 32)\n" unless $tw >= 2 && $tw <= 32;
 318         $options{tab_width} = int(0+$tw);
 319     }
 320     if ($cli_opts{'htmlroot'}) {         # Use URL prefix
 321         $options{url_prefix} = $cli_opts{'htmlroot'};
 322     }
 323     if ($cli_opts{'imageroot'}) {        # Use image URL prefix
 324         $options{img_prefix} = $cli_opts{'imageroot'};
 325     }
 326     if ($cli_opts{'stylesheet'}) {  # Display the style sheet
 327         $options{show_styles} = 1;
 328     }
 329     if ($cli_opts{'no-stylesheet'}) {  # Do not display the style sheet
 330         $options{show_styles} = 0;
 331     }
 332     $options{show_styles} = 1 if $stub && !defined($options{show_styles});
 333     $options{tab_width} = 8 unless defined($options{tab_width});
 334
 335     my $hdr = sub {
 336         if ($stub > 0) {
 337             print <<'HTML5';
 338 <!DOCTYPE html>
 339 <html xmlns="http://www.w3.org/1999/xhtml">
 340 <head>
 341 <meta charset="utf-8" />
 342 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 343 HTML5
 344         } elsif ($stub < 0) {
 345             print <<'HTML4';
 346 <html>
 347 <head>
 348 <meta charset="utf-8">
 349 <meta http-equiv="content-type" content="text/html; charset=utf-8">
 350 HTML4
 351         }
 352         if ($stub && ($options{title} || $options{h1})) {
 353             my $title = $options{title};
 354             defined($title) && $title ne "" or $title = $options{h1};
 355             if (defined($title) && $title ne "") {
 356                 $title =~ s/&/&amp;/g;
 357                 $title =~ s/</&lt;/g;
 358                 print "<title>$title</title>\n";
 359             }
 360         }
 361         if ($options{show_styles}) {
 362             my $stylesheet = $g_style_sheet;
 363             $stylesheet =~ s/%\(base\)/$g_style_prefix/g;
 364             print $stylesheet;
 365         }
 366         if ($stub) {
 367             print "</head>\n<body style=\"text-align:center\">\n",
 368                 "<div style=\"display:inline-block;text-align:left;max-width:42pc\">\n";
 369         }
 370     };
 371
 372     #### Process incoming text: ###########################
 373     my $didhdr;
 374     for (;;) {
 375         local $_;
 376         {
 377             local $/; # Slurp the whole file
 378             $_ = <>;
 379         }
 380         defined($_) or last;
 381         my $result = Markdown($_, \%options);
 382         if ($result ne "") {
 383             if (!$didhdr) {
 384                 &$hdr();
 385                 $didhdr = 1;
 386             }
 387             print $result;
 388         }
 389     }
 390     &$hdr() unless $didhdr;
 391     print "</div>\n</body>\n</html>\n" if $stub;
 392
 393     exit 0;
 394 }
 395
 396
 397 sub Markdown {
 398 #
 399 # Primary function. The order in which other subs are called here is
 400 # essential. Link and image substitutions need to happen before
 401 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 402 # and <img> tags get encoded.
 403 #
 404     my $_text = shift;
 405     defined $_text or $_text='';
 406
 407     my $text;
 408     if (Encode::is_utf8($_text) || utf8::decode($_text)) {
 409         $text = $_text;
 410     } else {
 411         $text = $encoder->decode($_text, Encode::FB_DEFAULT);
 412     }
 413     $_text = undef;
 414
 415     # Any remaining arguments after the first are options; either a single
 416     # hashref or a list of name, value paurs.
 417     %opt = (
 418         # set initial defaults
 419         style_prefix            => $g_style_prefix,
 420         empty_element_suffix    => $g_empty_element_suffix,
 421         tab_width               => $g_tab_width,
 422         indent_width            => $g_indent_width,
 423         url_prefix              => "", # Prefixed to non-absolute URLs
 424         img_prefix              => "", # Prefixed to non-absolute image URLs
 425     );
 426     my %args = ();
 427     if (ref($_[0]) eq "HASH") {
 428         %args = %{$_[0]};
 429     } else {
 430         %args = @_;
 431     }
 432     while (my ($k,$v) = each %args) {
 433         $opt{$k} = $v;
 434     }
 435
 436     # Clear the globals. If we don't clear these, you get conflicts
 437     # from other articles when generating a page which contains more than
 438     # one article (e.g. an index page that shows the N most recent
 439     # articles):
 440     %g_urls = ();
 441     %g_titles = ();
 442     %g_anchors = ();
 443     %g_block_ids = ();
 444     %g_code_block_ids = ();
 445     %g_html_blocks = ();
 446     %g_code_blocks = ();
 447     $g_list_level = 0;
 448
 449     # Standardize line endings:
 450     $text =~ s{\r\n}{\n}g;  # DOS to Unix
 451     $text =~ s{\r}{\n}g;    # Mac to Unix
 452
 453     # Make sure $text ends with a couple of newlines:
 454     $text .= "\n\n";
 455
 456     # Handle backticks-delimited code blocks
 457     $text = _HashBTCodeBlocks($text);
 458
 459     # Convert all tabs to spaces.
 460     $text = _Detab($text);
 461
 462     # Strip any lines consisting only of spaces.
 463     # This makes subsequent regexen easier to write, because we can
 464     # match consecutive blank lines with /\n+/ instead of something
 465     # contorted like / *\n+/ .
 466     $text =~ s/^ +$//mg;
 467
 468     # Turn block-level HTML blocks into hash entries
 469     $text = _HashHTMLBlocks($text);
 470
 471     # Strip link definitions, store in hashes.
 472     $text = _StripLinkDefinitions($text);
 473
 474     $text = _RunBlockGamut($text, 1);
 475
 476     # Remove indentation markers
 477     $text =~ s/\027+//gs;
 478
 479     # Unhashify code blocks
 480     $text =~ s/(\025\d+\026)/$g_code_blocks{$1}/g;
 481
 482     $text = _UnescapeSpecialChars($text);
 483
 484     $text .= "\n" unless $text eq "";
 485
 486     utf8::encode($text);
 487     if (defined($opt{h1}) && $opt{h1} ne "" && ref($_[0]) eq "HASH") {
 488         utf8::encode($opt{h1});
 489         ${$_[0]}{h1} = $opt{h1}
 490     }
 491     return $text;
 492 }
 493
 494
 495 sub _HashBTCodeBlocks {
 496 #
 497 #   Process Markdown backticks (```) delimited code blocks
 498 #
 499     my $text = shift;
 500     my $less_than_indent = $opt{indent_width} - 1;
 501
 502     $text =~ s{
 503             (?:(?<=\n)|\A)
 504                 ([ ]{0,$less_than_indent})``(`+)[ \t]*(?:([\w.+-]+)[ \t]*)?\n
 505              ( # $4 = the code block -- one or more lines, starting with ```
 506               (?:
 507                 .*\n+
 508               )+?
 509              )
 510             # and ending with ``` or end of document
 511             (?:(?:[ ]{0,$less_than_indent}``\2[ \t]*(?:\n|\Z))|\Z)
 512         }{
 513             # $2 contains syntax highlighting to use if defined
 514             my $leadsp = length($1);
 515             my $codeblock = $4;
 516             $codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines
 517             $codeblock = _Detab($codeblock, 8); # physical tab stops are always 8
 518             $codeblock =~ s/\A\n+//; # trim leading newlines
 519             $codeblock =~ s/\s+\z//; # trim trailing whitespace
 520             $codeblock =~ s/^ {1,$leadsp}//mg if $leadsp; # trim leading space(s)
 521             $codeblock = _EncodeCode($codeblock); # or run highlighter here
 522             $codeblock = "<div class=\"$opt{style_prefix}code-bt\"><pre style=\"display:none\"></pre><pre><code>"
 523                 . $codeblock . "\n</code></pre></div>";
 524
 525             my $key = block_id($codeblock);
 526             $g_html_blocks{$key} = $codeblock;
 527             "\n\n" . $key . "\n\n";
 528         }egmx;
 529
 530     return $text;
 531 }
 532
 533
 534 sub _StripLinkDefinitions {
 535 #
 536 # Strips link definitions from text, stores the URLs and titles in
 537 # hash references.
 538 #
 539     my $text = shift;
 540     my $less_than_indent = $opt{indent_width} - 1;
 541
 542     # Link defs are in the form: ^[id]: url "optional title"
 543     while ($text =~ s{
 544                         ^[ ]{0,$less_than_indent}\[(.+)\]: # id = $1
 545                           [ ]*
 546                           \n?               # maybe *one* newline
 547                           [ ]*
 548                         <?((?:\S(?:\\\n\s*[^\s"(])?)+?)>? # url = $2
 549                           [ ]*
 550                           \n?               # maybe one newline
 551                           [ ]*
 552                         (?:
 553                             (?<=\s)         # lookbehind for whitespace
 554                             (?:(['"])|(\()) # title quote char
 555                             (.+?)           # title = $5
 556                             (?(4)\)|\3)     # match same quote
 557                             [ ]*
 558                         )?  # title is optional
 559                         (?:\n+|\Z)
 560                     }
 561                     {}mx) {
 562         my $id = _strip(lc $1); # Link IDs are case-insensitive
 563         my $url = $2;
 564         my $title = _strip($5);
 565         $url =~ s/\\\n\s*//gs;
 566         if ($id ne "") {
 567                 $g_urls{$id} = _EncodeAmpsAndAngles($url);
 568                 if (defined($title) && $title ne "") {
 569                     $g_titles{$id} = $title;
 570                     $g_titles{$id} =~ s/\042/&quot;/g;
 571                 }
 572         }
 573     }
 574
 575     return $text;
 576 }
 577
 578 my ($block_tags_a, $block_tags_b);
 579 BEGIN {
 580     $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o;
 581     $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o;
 582 }
 583
 584 sub _HashHTMLBlocks {
 585     my $text = shift;
 586     my $less_than_indent = $opt{indent_width} - 1;
 587     my $idt = "\027" x $g_list_level;
 588
 589     # Hashify HTML blocks:
 590     # We only want to do this for block-level HTML tags, such as headers,
 591     # lists, and tables. That's because we still want to wrap <p>s around
 592     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 593     # phrase emphasis, and spans. The list of tags we're looking for is
 594     # hard-coded:
 595
 596     # First, look for nested blocks, e.g.:
 597     #   <div>
 598     #       <div>
 599     #       tags for inner block must be indented.
 600     #       </div>
 601     #   </div>
 602     #
 603     # The outermost tags must start at the left margin for this to match, and
 604     # the inner nested divs must be indented.
 605     # We need to do this before the next, more liberal match, because the next
 606     # match will start at the first `<div>` and stop at the first `</div>`.
 607     $text =~ s{
 608                 (                       # save in $1
 609                     ^                   # start of line (with /m)
 610                     ((?:\Q$idt\E)?)     # optional lead in = $2
 611                     <($block_tags_a)    # start tag = $3
 612                     \b                  # word break
 613                     (?:.*\n)*?          # any number of lines, minimally matching
 614                     \2</\3>             # the matching end tag
 615                     [ ]*                # trailing spaces
 616                     (?=\n+|\Z) # followed by a newline or end of document
 617                 )
 618             }{
 619                 my $key = block_id($1);
 620                 $g_html_blocks{$key} = $1;
 621                 "\n\n" . $key . "\n\n";
 622             }egmx;
 623
 624
 625     #
 626     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 627     #
 628     $text =~ s{
 629                 (                       # save in $1
 630                     ^                   # start of line (with /m)
 631                     (?:\Q$idt\E)?       # optional lead in
 632                     <($block_tags_b)    # start tag = $2
 633                     \b                  # word break
 634                     (?:.*\n)*?          # any number of lines, minimally matching
 635                     .*</\2>             # the matching end tag
 636                     [ ]*                # trailing spaces
 637                     (?=\n+|\Z) # followed by a newline or end of document
 638                 )
 639             }{
 640                 my $key = block_id($1);
 641                 $g_html_blocks{$key} = $1;
 642                 "\n\n" . $key . "\n\n";
 643             }egmx;
 644     # Special case just for <hr />. It was easier to make a special case than
 645     # to make the other regex more complicated.
 646     $text =~ s{
 647                 (?:
 648                     (?<=\n\n)       # Starting after a blank line
 649                     |               # or
 650                     \A\n?           # the beginning of the doc
 651                 )
 652                 (                       # save in $1
 653                     [ ]{0,$less_than_indent}
 654                     <(?:hr)             # start tag
 655                     \b                  # word break
 656                     (?:[^<>])*?         #
 657                     /?>                 # the matching end tag
 658                     [ ]*
 659                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 660                 )
 661             }{
 662                 my $key = block_id($1);
 663                 $g_html_blocks{$key} = $1;
 664                 "\n\n" . $key . "\n\n";
 665             }egx;
 666
 667     # Special case for standalone HTML comments:
 668     $text =~ s{
 669                 (?:
 670                     (?<=\n\n)       # Starting after a blank line
 671                     |               # or
 672                     \A\n?           # the beginning of the doc
 673                 )
 674                 (                   # save in $1
 675                     [ ]{0,$less_than_indent}
 676                     (?s:
 677                         <!--
 678                         (?:[^-]|(?:-(?!-)))*
 679                         -->
 680                     )
 681                     [ ]*
 682                     (?=\n{1,}|\Z)   # followed by end of line or end of document
 683                 )
 684             }{
 685                 my $key = block_id($1);
 686                 $g_html_blocks{$key} = $1;
 687                 "\n\n" . $key . "\n\n";
 688             }egx;
 689
 690
 691     return $text;
 692 }
 693
 694
 695 sub _RunBlockGamut {
 696 #
 697 # These are all the transformations that form block-level
 698 # tags like paragraphs, headers, and list items.
 699 #
 700     my ($text, $anchors) = @_;
 701
 702     $text = _DoHeaders($text, $anchors);
 703
 704     # Do Horizontal Rules:
 705     $text =~ s{^ {0,3}\*(?: {0,2}\*){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 706     $text =~ s{^ {0,3}\_(?: {0,2}\_){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 707     $text =~ s{^ {0,3}\-(?: {0,2}\-){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 708
 709     $text = _DoLists($text);
 710
 711     $text = _DoCodeBlocks($text);
 712
 713     $text = _DoBlockQuotes($text);
 714
 715     $text = _DoTables($text);
 716
 717     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
 718     # was to escape raw HTML in the original Markdown source. This time,
 719     # we're escaping the markup we've just created, so that we don't wrap
 720     # <p> tags around block-level tags.
 721     $text = _HashHTMLBlocks($text);
 722
 723     $text = _FormParagraphs($text);
 724
 725     return $text;
 726 }
 727
 728
 729 sub _RunSpanGamut {
 730 #
 731 # These are all the transformations that occur *within* block-level
 732 # tags like paragraphs, headers, and list items.
 733 #
 734     my $text = shift;
 735
 736     $text = _DoCodeSpans($text);
 737
 738     $text = _EscapeSpecialChars($text);
 739
 740     # Process anchor and image tags. Images must come first,
 741     # because ![foo][f] looks like an anchor.
 742     $text = _DoImages($text);
 743     $text = _DoAnchors($text);
 744
 745     # Make links out of things like `<http://example.com/>`
 746     # Must come after _DoAnchors(), because you can use < and >
 747     # delimiters in inline links like [this](<url>).
 748     $text = _DoAutoLinks($text);
 749
 750     $text = _EncodeAmpsAndAngles($text);
 751
 752     $text = _DoItalicsAndBoldAndStrike($text);
 753
 754     # Do hard breaks:
 755     $text =~ s/ {2,}\n/<br$opt{empty_element_suffix}\n/g;
 756
 757     return $text;
 758 }
 759
 760
 761 sub _EscapeSpecialChars {
 762     my $text = shift;
 763     my $tokens ||= _TokenizeHTML($text);
 764
 765     $text = ''; # rebuild $text from the tokens
 766 #   my $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
 767 #   my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
 768
 769     foreach my $cur_token (@$tokens) {
 770         if ($cur_token->[0] eq "tag") {
 771             # Within tags, encode *, _ and ~ so they don't conflict
 772             # with their use in Markdown for italics and strong.
 773             # We're replacing each such character with its
 774             # corresponding block id value; this is likely
 775             # overkill, but it should prevent us from colliding
 776             # with the escape values by accident.
 777             $cur_token->[1] =~ s!([*_~])!$g_escape_table{$1}!g;
 778             $text .= $cur_token->[1];
 779         } else {
 780             my $t = $cur_token->[1];
 781             $t = _EncodeBackslashEscapes($t);
 782             $text .= $t;
 783         }
 784     }
 785     return $text;
 786 }
 787
 788
 789 sub _ProcessWikiLink {
 790     my ($link_text, $link_loc) = @_;
 791     if (defined($link_loc) && $link_loc =~ m{^(?:http|ftp)s?://\S+$}i) {
 792         # Just rewrite it to [...](...) form
 793         return "[".$link_text."](".$link_loc.")";
 794     }
 795     if (defined($link_loc)) {
 796         # We don't handle any other kind of "bar" links yet
 797         return undef;
 798     }
 799     if ($link_text =~ m{^(?:http|ftp)s?://\S+$}i) {
 800         # Just rewrite it to [...](...) form
 801         return "[".$link_text."](".$link_text.")";
 802     }
 803     # We don't handle any other wiki-style links yet
 804     return undef;
 805 }
 806
 807
 808 # Return a suitably encoded <a...> tag string
 809 # On input NONE of $url, $text or $title should be xmlencoded
 810 # but $url should already be url-encoded if needed, but NOT g_escape_table'd
 811 sub _MakeATag {
 812         my ($url, $text, $title) = @_;
 813         defined($url) or $url="";
 814         defined($text) or $text="";
 815         defined($title) or $title="";
 816
 817         my $result = "<a href=\"" . _EncodeAttText($url) . "\"";
 818         $title = _strip($title);
 819         $result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
 820         return $result . ">" . $text . "</a>";
 821 }
 822
 823
 824 sub _DoAnchors {
 825 #
 826 # Turn Markdown link shortcuts into XHTML <a> tags.
 827 #
 828     my $text = shift;
 829
 830     #
 831     # First, handle wiki-style links: [[wiki style link]]
 832     #
 833     $text =~ s{
 834         (                   # wrap whole match in $1
 835           \[\[
 836             ($g_nested_brackets) # link text and id = $2
 837           \]\]
 838         )
 839     }{
 840         my $result;
 841         my $whole_match = $1;
 842         my $link_text   = $2;
 843         my $link_loc    = undef;
 844
 845         if ($link_text =~ /^(.*)\|(.*)$/s) {
 846             $link_text = $1;
 847             $link_loc = $2;
 848         }
 849
 850         $result = _ProcessWikiLink($link_text, $link_loc);
 851         defined($result) or $result = $whole_match;
 852         $result;
 853     }xsge;
 854
 855     #
 856     # Next, handle reference-style links: [link text] [id]
 857     #
 858     $text =~ s{
 859         (                   # wrap whole match in $1
 860           \[
 861             ($g_nested_brackets) # link text = $2
 862           \]
 863
 864           [ ]?              # one optional space
 865           (?:\n[ ]*)?       # one optional newline followed by spaces
 866
 867           \[
 868             (.*?)           # id = $3
 869           \]
 870         )
 871     }{
 872         my $result;
 873         my $whole_match = $1;
 874         my $link_text   = $2;
 875         my $link_id     = $3;
 876
 877         $link_id ne "" or $link_id = $link_text; # for shortcut links like [this][].
 878         $link_id = _strip(lc $link_id);
 879
 880         if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
 881             my $url = $g_urls{$link_id};
 882             $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
 883             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
 884             $result = _MakeATag($url, $link_text, $g_titles{$link_id});
 885         }
 886         else {
 887             $result = $whole_match;
 888         }
 889         $result;
 890     }xsge;
 891
 892     #
 893     # Subsequently, inline-style links: [link text](url "optional title")
 894     #
 895     $text =~ s{
 896         (               # wrap whole match in $1
 897           \[
 898             ($g_nested_brackets) # link text = $2
 899           \]
 900           \(            # literal paren
 901             [ ]*
 902             <?(.*?)>?   # href = $3
 903             [ ]*
 904             (           # $4
 905               (['\042]) # quote char = $5
 906               (.*?)     # Title = $6
 907               \5        # matching quote
 908             )?          # title is optional
 909           \)
 910         )
 911     }{
 912         #my $result;
 913         my $whole_match = $1;
 914         my $link_text   = $2;
 915         my $url         = $3;
 916         my $title       = $6;
 917
 918         if ($url =~ /^#\S/) {
 919             my $idbase = _strip(lc(substr($url, 1)));
 920             my $id = _MakeAnchorId($idbase);
 921             if (defined($g_anchors_id{$id})) {
 922                 $url = $g_anchors_id{$id};
 923             } else {
 924                 $idbase =~ s/-/_/gs;
 925                 $id = _MakeAnchorId($idbase);
 926                 if (defined($g_anchors_id{$id})) {
 927                     $url = $g_anchors_id{$id};
 928                 }
 929             }
 930         }
 931         $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
 932         _MakeATag(_PrefixURL($url), $link_text, $title);
 933     }xsge;
 934
 935     #
 936     # Finally, handle reference-style implicit shortcut links: [link text]
 937     #
 938     $text =~ s{
 939         (                   # wrap whole match in $1
 940           \[
 941             ($g_nested_brackets) # link text = $2
 942           \]
 943         )
 944     }{
 945         my $result;
 946         my $whole_match = $1;
 947         my $link_text   = $2;
 948         my $link_id     = _strip(lc $2);
 949
 950         if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
 951             my $url = $g_urls{$link_id};
 952             $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
 953             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
 954             $result = _MakeATag($url, $link_text, $g_titles{$link_id});
 955         }
 956         else {
 957             $result = $whole_match;
 958         }
 959         $result;
 960     }xsge;
 961
 962     return $text;
 963 }
 964
 965
 966 # Return a suitably encoded <img...> tag string
 967 # On input NONE of $url, $alt or $title should be xmlencoded
 968 # but $url should already be url-encoded if needed, but NOT g_escape_table'd
 969 sub _MakeIMGTag {
 970     my ($url, $alt, $title) = @_;
 971     defined($url) or $url="";
 972     defined($alt) or $alt="";
 973     defined($title) or $title="";
 974     return "" unless $url ne "";
 975
 976     my $result = "<img src=\"" . _EncodeAttText($url) . "\"";
 977     my ($w, $h) = (0, 0);
 978     ($alt, $title) = (_strip($alt), _strip($title));
 979     if ($title =~ /^(.*)\(([1-9][0-9]*)[xX]([1-9][0-9]*)\)$/os) {
 980         ($title, $w, $h) = (_strip($1), $2, $3);
 981     } elsif ($title =~ /^(.*)\(\?[xX]([1-9][0-9]*)\)$/os) {
 982         ($title, $h) = (_strip($1), $2);
 983     } elsif ($title =~ /^(.*)\(([1-9][0-9]*)[xX]\?\)$/os) {
 984         ($title, $w) = (_strip($1), $2);
 985     }
 986     $result .= " alt=\"" . _EncodeAttText($alt) . "\"" if $alt ne "";
 987     $result .= " width=\"$w\"" if $w != 0;
 988     $result .= " height=\"$h\"" if $h != 0;
 989     $result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
 990     $result .= $opt{empty_element_suffix};
 991     return $result;
 992 }
 993
 994
 995 sub _DoImages {
 996 #
 997 # Turn Markdown image shortcuts into <img> tags.
 998 #
 999     my $text = shift;
1000
1001     #
1002     # First, handle reference-style labeled images: ![alt text][id]
1003     #
1004     $text =~ s{
1005         (               # wrap whole match in $1
1006           !\[
1007             (.*?)       # alt text = $2
1008           \]
1009
1010           [ ]?          # one optional space
1011           (?:\n[ ]*)?   # one optional newline followed by spaces
1012
1013           \[
1014             (.*?)       # id = $3
1015           \]
1016
1017         )
1018     }{
1019         my $result;
1020         my $whole_match = $1;
1021         my $alt_text    = $2;
1022         my $link_id     = $3;
1023
1024         $link_id ne "" or $link_id = $alt_text; # for shortcut links like ![this][].
1025         $link_id = _strip(lc $link_id);
1026
1027         if (defined $g_urls{$link_id}) {
1028             $result = _MakeIMGTag(
1029                 _PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
1030         }
1031         else {
1032             # If there's no such link ID, leave intact:
1033             $result = $whole_match;
1034         }
1035
1036         $result;
1037     }xsge;
1038
1039     #
1040     # Next, handle inline images:  ![alt text](url "optional title")
1041     # Don't forget: encode * and _
1042
1043     $text =~ s{
1044         (               # wrap whole match in $1
1045           !\[
1046             (.*?)       # alt text = $2
1047           \]
1048           \(            # literal paren
1049             [ ]*
1050             <?(\S+?)>?  # src url = $3
1051             [ ]*
1052             (           # $4
1053               (['\042]) # quote char = $5
1054               (.*?)     # title = $6
1055               \5        # matching quote
1056               [ ]*
1057             )?          # title is optional
1058           \)
1059         )
1060     }{
1061         #my $whole_match = $1;
1062         my $alt_text    = $2;
1063         my $url         = $3;
1064         my $title       = '';
1065         if (defined($6)) {
1066             $title      = $6;
1067         }
1068
1069         _MakeIMGTag(_PrefixURL($url), $alt_text, $title);
1070     }xsge;
1071
1072     #
1073     # Finally, handle reference-style implicitly labeled links: ![alt text]
1074     #
1075     $text =~ s{
1076         (               # wrap whole match in $1
1077           !\[
1078             (.*?)       # alt text = $2
1079           \]
1080         )
1081     }{
1082         my $result;
1083         my $whole_match = $1;
1084         my $alt_text    = $2;
1085         my $link_id     = lc(_strip($alt_text));
1086
1087         if (defined $g_urls{$link_id}) {
1088             $result = _MakeIMGTag(
1089                 _PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
1090         }
1091         else {
1092             # If there's no such link ID, leave intact:
1093             $result = $whole_match;
1094         }
1095
1096         $result;
1097     }xsge;
1098
1099     return $text;
1100 }
1101
1102 sub _EncodeAttText {
1103     my $text = shift;
1104     defined($text) or return undef;
1105     $text = _HTMLEncode(_strip($text));
1106     # We've got to encode these to avoid conflicting
1107     # with italics, bold and strike through.
1108     $text =~ s!([*_~:])!$g_escape_table{$1}!g;
1109     return $text;
1110 }
1111
1112
1113 sub _MakeAnchorId {
1114     use bytes;
1115     my $link = shift;
1116     $link = lc($link);
1117     $link =~ tr/-a-z0-9_/_/cs;
1118     return '' unless $link ne '';
1119     $link = "_".$link."_";
1120     $link =~ s/__+/_/gs;
1121     $link = "_".md5_hex($link)."_" if length($link) > 66;
1122     return $link;
1123 }
1124
1125
1126 sub _GetNewAnchorId {
1127     my $link = _strip(lc(shift));
1128     return '' if defined($g_anchors{$link});
1129     my $id = _MakeAnchorId($link);
1130     return '' unless $id;
1131     $g_anchors{$link} = '#'.$id;
1132     $g_anchors_id{$id} = $g_anchors{$link};
1133     if ($id =~ /-/) {
1134         my $id2 = $id;
1135         $id2 =~ s/-/_/gs;
1136         $id2 =~ s/__+/_/gs;
1137         defined($g_anchors_id{$id2}) or $g_anchors_id{$id2} = $g_anchors{$link};
1138     }
1139     $id;
1140 }
1141
1142
1143 sub _DoHeaders {
1144     my ($text, $anchors) = @_;
1145     my $h1;
1146     my $geth1 = $anchors && !defined($opt{h1}) ? sub {
1147         return unless !defined($h1);
1148         my $h = shift;
1149         $h =~ s/^\s+//;
1150         $h =~ s/\s+$//;
1151         $h =~ s/\s+/ /g;
1152         $h1 = $h if $h ne "";
1153     } : sub {};
1154
1155     # Setext-style headers:
1156     #     Header 1
1157     #     ========
1158     #
1159     #     Header 2
1160     #     --------
1161     #
1162     #     Header 3
1163     #     ~~~~~~~~
1164     #
1165     $text =~ s{ ^(?:=+[ ]*\n)?[ ]*(.+?)[ ]*\n=+[ ]*\n+ }{
1166         my $h = $1;
1167         my $id = _GetNewAnchorId($h);
1168         &$geth1($h);
1169         $id = " id=\"$id\"" if $id ne "";
1170         "<h1$id>" . _RunSpanGamut($h) . "</h1>\n\n";
1171     }egmx;
1172
1173     $text =~ s{ ^(?:-+[ ]*\n)?[ ]*(.+?)[ ]*\n-+[ ]*\n+ }{
1174         my $h = $1;
1175         my $id = _GetNewAnchorId($h);
1176         $id = " id=\"$id\"" if $id ne "";
1177         "<h2$id>" . _RunSpanGamut($h) . "</h2>\n\n";
1178     }egmx;
1179
1180     $text =~ s{ ^(?:~+[ ]*\n)?[ ]*(.+?)[ ]*\n~+[ ]*\n+ }{
1181         my $h = $1;
1182         my $id = _GetNewAnchorId($h);
1183         $id = " id=\"$id\"" if $id ne "";
1184         "<h3$id>" . _RunSpanGamut($h) . "</h3>\n\n";
1185     }egmx;
1186
1187
1188     # atx-style headers:
1189     #   # Header 1
1190     #   ## Header 2
1191     #   ## Header 2 with closing hashes ##
1192     #   ...
1193     #   ###### Header 6
1194     #
1195     $text =~ s{
1196             ^(\#{1,6})  # $1 = string of #'s
1197             [ ]*
1198             (.+?)       # $2 = Header text
1199             [ ]*
1200             \#*         # optional closing #'s (not counted)
1201             \n+
1202         }{
1203             my $h = $2;
1204             my $h_level = length($1);
1205             my $id = $h_level <= 3 ? _GetNewAnchorId($h) : '';
1206             &$geth1($h) if $h_level == 1;
1207             $id = " id=\"$id\"" if $id ne "";
1208             "<h$h_level$id>" . _RunSpanGamut($h) . "</h$h_level>\n\n";
1209         }egmx;
1210
1211     $opt{h1} = $h1 if defined($h1) && $h1 ne "";
1212     return $text;
1213 }
1214
1215
1216 my ($marker_ul, $marker_ol, $marker_any, $roman_numeral, $greek_lower);
1217 BEGIN {
1218     # Re-usable patterns to match list item bullets and number markers:
1219     $roman_numeral = qr/(?:
1220         [IiVvXx]|[Ii]{2,3}|[Ii][VvXx]|[VvXx][Ii]{1,3}|[Xx][Vv][Ii]{0,3}|
1221         [Xx][Ii][VvXx]|[Xx]{2}[Ii]{0,3}|[Xx]{2}[Ii]?[Vv]|[Xx]{2}[Vv][Ii]{1,2})/ox;
1222     $greek_lower = qr/(?:[\x{03b1}-\x{03c9}])/o;
1223     $marker_ul  = qr/[*+-]/o;
1224     $marker_ol  = qr/(?:\d+|[A-Za-z]|$roman_numeral|$greek_lower)[.\)]/o;
1225     $marker_any = qr/(?:$marker_ul|$marker_ol)/o;
1226 }
1227
1228
1229 sub _GetListMarkerType {
1230     my ($list_type, $list_marker, $last_marker) = @_;
1231     return "" unless $list_type && $list_marker && lc($list_type) eq "ol";
1232     my $last_marker_type = '';
1233     $last_marker_type = _GetListMarkerType($list_type, $last_marker)
1234         if defined($last_marker) &&
1235             # these are roman unless $last_marker type case matches and is 'a' or 'A'
1236             $list_marker =~ /^[IiVvXx][.\)]?$/;
1237     return "I" if $list_marker =~ /^[IVX]/ && $last_marker_type ne 'A';
1238     return "i" if $list_marker =~ /^[ivx]/ && $last_marker_type ne 'a';
1239     return "A" if $list_marker =~ /^[A-Z]/;
1240     return "a" if $list_marker =~ /^[a-z]/ || $list_marker =~ /^$greek_lower/o;
1241     return "1";
1242 }
1243
1244
1245 sub _GetListItemTypeClass {
1246     my ($list_type, $list_marker, $last_marker) = @_;
1247     my $list_marker_type = _GetListMarkerType($list_type, $list_marker, $last_marker);
1248     my $ans = &{sub{
1249         return "" unless length($list_marker) >= 2 && $list_marker_type =~ /^[IiAa1]$/;
1250         return "lower-greek" if $list_marker_type eq "a" && $list_marker =~ /^$greek_lower/o;
1251         return "" unless $list_marker =~ /\)$/;
1252         return "upper-roman" if $list_marker_type eq "I";
1253         return "lower-roman" if $list_marker_type eq "i";
1254         return "upper-alpha" if $list_marker_type eq "A";
1255         return "lower-alpha" if $list_marker_type eq "a";
1256         return "decimal";
1257     }};
1258     return ($list_marker_type, $ans);
1259 }
1260
1261
1262 my %_roman_number_table;
1263 BEGIN {
1264     %_roman_number_table = (
1265         i       =>  1,
1266         ii      =>  2,
1267         iii     =>  3,
1268         iv      =>  4,
1269         v       =>  5,
1270         vi      =>  6,
1271         vii     =>  7,
1272         viii    =>  8,
1273         ix      =>  9,
1274         x       => 10,
1275         xi      => 11,
1276         xii     => 12,
1277         xiii    => 13,
1278         xiv     => 14,
1279         xv      => 15,
1280         xvi     => 16,
1281         xvii    => 17,
1282         xviii   => 18,
1283         xix     => 19,
1284         xx      => 20,
1285         xxi     => 21,
1286         xxii    => 22,
1287         xxiii   => 23,
1288         xxiv    => 24,
1289         xxv     => 25,
1290         xxvi    => 26,
1291         xxvii   => 27
1292     );
1293 }
1294
1295
1296 # Necessary because ς and σ are the same value grrr
1297 my %_greek_number_table;
1298 BEGIN {
1299     %_greek_number_table = (
1300         "\x{03b1}" =>  1, # α
1301         "\x{03b2}" =>  2, # β
1302         "\x{03b3}" =>  3, # γ
1303         "\x{03b4}" =>  4, # δ
1304         "\x{03b5}" =>  5, # ε
1305         "\x{03b6}" =>  6, # ζ
1306         "\x{03b7}" =>  7, # η
1307         "\x{03b8}" =>  8, # θ
1308         "\x{03b9}" =>  9, # ι
1309         "\x{03ba}" => 10, # κ
1310         "\x{03bb}" => 11, # λ
1311         #"\x{00b5}"=> 12, # µ is "micro" not "mu"
1312         "\x{03bc}" => 12, # μ
1313         "\x{03bd}" => 13, # ν
1314         "\x{03be}" => 14, # ξ
1315         "\x{03bf}" => 15, # ο
1316         "\x{03c0}" => 16, # π
1317         "\x{03c1}" => 17, # ρ
1318         "\x{03c2}" => 18, # ς
1319         "\x{03c3}" => 18, # σ
1320         "\x{03c4}" => 19, # τ
1321         "\x{03c5}" => 20, # υ
1322         "\x{03c6}" => 21, # φ
1323         "\x{03c7}" => 22, # χ
1324         "\x{03c8}" => 23, # ψ
1325         "\x{03c9}" => 24  # ω
1326     );
1327 }
1328
1329
1330 sub _GetMarkerIntegerNum {
1331     my ($list_marker_type, $marker_val) = @_;
1332     my $ans = &{sub{
1333         return 0 + $marker_val if $list_marker_type eq "1";
1334         $list_marker_type = lc($list_marker_type);
1335         return $_greek_number_table{$marker_val}
1336             if $list_marker_type eq "a" &&
1337             defined($_greek_number_table{$marker_val});
1338         $marker_val = lc($marker_val);
1339         return ord($marker_val) - ord("a") + 1 if $list_marker_type eq "a";
1340         return 1 unless $list_marker_type eq "i";
1341         defined($_roman_number_table{$marker_val}) and
1342             return $_roman_number_table{$marker_val};
1343         return 1;
1344     }};
1345     return $ans if $ans == 0 && $list_marker_type eq "1";
1346     return $ans >= 1 ? $ans : 1;
1347 }
1348
1349
1350 sub _IncrList {
1351     my ($from, $to, $extra) = @_;
1352     $extra = defined($extra) ? " $extra" : "";
1353     my $result = "";
1354     while ($from + 10 <= $to) {
1355         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-10\"></span>\n";
1356         $from += 10;
1357     }
1358     while ($from + 5 <= $to) {
1359         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-5\"></span>\n";
1360         $from += 5;
1361     }
1362     while ($from + 2 <= $to) {
1363         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-2\"></span>\n";
1364         $from += 2;
1365     }
1366     while ($from < $to) {
1367         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr\"></span>\n";
1368         ++$from;
1369     }
1370     return $result;
1371 }
1372
1373
1374 sub _DoLists {
1375 #
1376 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1377 #
1378     my $text = shift;
1379     my $indent = $opt{indent_width};
1380     my $less_than_indent = $indent - 1;
1381     my $less_than_double_indent = 2 * $indent - 1;
1382
1383     # Re-usable pattern to match any entire ul or ol list:
1384     my $whole_list = qr{
1385         (                           # $1 (or $_[0]) = whole list
1386           (                         # $2 (or $_[1])
1387             (?:(?<=\n)|\A)
1388             [ ]{0,$less_than_indent}
1389             (${marker_any})         # $3 (or $_[2]) = first list item marker
1390             [ ]+
1391           )
1392           (?s:.+?)
1393           (                         # $4 (or $_[3])
1394               \z
1395             |
1396               \n{2,}
1397               (?=\S)
1398               (?!                   # Negative lookahead for another list item marker
1399                 ${marker_any}[ ]
1400               )
1401           )
1402         )
1403     }mx;
1404
1405     my $list_item_sub = sub {
1406         my $list = $_[0];
1407         my $list_type = ($_[2] =~ m/$marker_ul/) ? "ul" : "ol";
1408         my $list_att = "";
1409         my $list_class = "";
1410         my $list_incr = "";
1411         # Turn double returns into triple returns, so that we can make a
1412         # paragraph for the last item in a list, if necessary:
1413         $list =~ s/\n\n/\n\n\n/g;
1414         my ($result, $first_marker, $fancy) = _ProcessListItems($list_type, $list);
1415         my $list_marker_type = _GetListMarkerType($list_type, $first_marker);
1416         if ($list_marker_type) {
1417                 $first_marker =~ s/[.\)]$//;
1418                 my $first_marker_num = _GetMarkerIntegerNum($list_marker_type, $first_marker);
1419                 $list_att = $list_marker_type eq "1" ? "" : " type=\"$list_marker_type\"";
1420                 if ($fancy) {
1421                     $list_class = " class=\"$opt{style_prefix}ol\"";
1422                     my $start = $first_marker_num;
1423                     $start = 10 if $start > 10;
1424                     $start = 5 if $start > 5 && $start < 10;
1425                     $start = 1 if $start > 1 && $start < 5;
1426                     $list_att .= " start=\"$start\"" unless $start == 1;
1427                     $list_incr = _IncrList($start, $first_marker_num);
1428                 } else {
1429                     $list_class = " class=\"$opt{style_prefix}lc-greek\""
1430                         if $list_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1431                     $list_att .= " start=\"$first_marker_num\"" unless $first_marker_num == 1;
1432                 }
1433         }
1434         my $idt = "\027" x $g_list_level;
1435         $result = "$idt<$list_type$list_att$list_class>\n$list_incr" . $result . "$idt</$list_type>\n\n";
1436         $result;
1437     };
1438
1439     # We use a different prefix before nested lists than top-level lists.
1440     # See extended comment in _ProcessListItems().
1441     #
1442     # Note: (jg) There's a bit of duplication here. My original implementation
1443     # created a scalar regex pattern as the conditional result of the test on
1444     # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1445     # substitution once, using the scalar as the pattern. This worked,
1446     # everywhere except when running under MT on my hosting account at Pair
1447     # Networks. There, this caused all rebuilds to be killed by the reaper (or
1448     # perhaps they crashed, but that seems incredibly unlikely given that the
1449     # same script on the same server ran fine *except* under MT. I've spent
1450     # more time trying to figure out why this is happening than I'd like to
1451     # admit. My only guess, backed up by the fact that this workaround works,
1452     # is that Perl optimizes the substition when it can figure out that the
1453     # pattern will never change, and when this optimization isn't on, we run
1454     # afoul of the reaper. Thus, the slightly redundant code to that uses two
1455     # static s/// patterns rather than one conditional pattern.
1456     #
1457     # Note: (kjm) With the addition of the two-of-the-same-kind-in-a-row-
1458     # starts-a-list-at-the-top-level rule the two patterns really are somewhat
1459     # different now, but the duplication has pretty much been eliminated via
1460     # use of a separate sub which has the side-effect of making the below
1461     # two cases much easier to grok all at once.
1462
1463     if ($g_list_level) {
1464         $text =~ s{
1465                 ^
1466                 $whole_list
1467             }{
1468                 &$list_item_sub($1, $2, $3, $4);
1469             }egmx;
1470     }
1471     else {
1472         $text =~ s{
1473                 (?: (?<=\n\n) |
1474                     \A\n? |
1475                     (?:(?<=\n) # two of the same kind of marker lines
1476                        (?=[ ]{0,$less_than_indent}$marker_ul[ ].*\n
1477                           [ ]{0,$less_than_indent}$marker_ul[ ])) |
1478                     (?:(?<=\n) # in a row will start a list
1479                        (?=[ ]{0,$less_than_indent}$marker_ol[ ].*\n
1480                           [ ]{0,$less_than_indent}$marker_ol[ ])) |
1481                     (?:(?<=\n) # or any marker and a sublist marker
1482                        (?=[ ]{0,$less_than_indent}$marker_any[ ].*\n
1483                           [ ]{$indent,$less_than_double_indent}$marker_any[ ]))
1484                 )
1485                 $whole_list
1486             }{
1487                 &$list_item_sub($1, $2, $3, $4);
1488             }egmx;
1489     }
1490
1491     return $text;
1492 }
1493
1494
1495 sub _ProcessListItems {
1496 #
1497 #   Process the contents of a single ordered or unordered list, splitting it
1498 #   into individual list items.
1499 #
1500
1501     my $list_type = shift;
1502     my $list_str = shift;
1503
1504     # The $g_list_level global keeps track of when we're inside a list.
1505     # Each time we enter a list, we increment it; when we leave a list,
1506     # we decrement. If it's zero, we're not in a list anymore.
1507     #
1508     # We do this because when we're not inside a list, we want to treat
1509     # something like this:
1510     #
1511     #   I recommend upgrading to version
1512     #   8. Oops, now this line is treated
1513     #   as a sub-list.
1514     #
1515     # As a single paragraph, despite the fact that the second line starts
1516     # with a digit-period-space sequence.
1517     #
1518     # Whereas when we're inside a list (or sub-list), that line will be
1519     # treated as the start of a sub-list. What a kludge, huh? This is
1520     # an aspect of Markdown's syntax that's hard to parse perfectly
1521     # without resorting to mind-reading. Perhaps the solution is to
1522     # change the syntax rules such that sub-lists must start with a
1523     # starting cardinal number; e.g. "1." or "a.".
1524
1525     $g_list_level++;
1526     my $idt = "\027" x $g_list_level;
1527     my $marker_kind = $list_type eq "ul" ? $marker_ul : $marker_ol;
1528     my $first_marker;
1529     my $first_marker_type;
1530     my $first_marker_num;
1531     my $last_marker;
1532     my $fancy;
1533     my $skipped;
1534     my $typechanged;
1535     my $next_num = 1;
1536
1537     # trim trailing blank lines:
1538     $list_str =~ s/\n{2,}\z/\n/;
1539
1540     my $result = "";
1541     my $oldpos = 0;
1542     pos($list_str) = 0;
1543     while ($list_str =~ m{\G            # start where we left off
1544         (\n+)?                          # leading line = $1
1545         (^[ ]*)                         # leading whitespace = $2
1546         ($marker_any) [ ] ([ ]*)        # list marker = $3 leading item space = $4
1547     }cgmx) {
1548         my $leading_line = $1;
1549         my $leading_space = $2;
1550         my $list_marker = $3;
1551         my $list_marker_len = length($list_marker);
1552         my $leading_item_space = $4;
1553         if ($-[0] > $oldpos) {
1554             $result .= substr($list_str, $oldpos, $-[0] - $oldpos); # Sort-of $`
1555             $oldpos = $-[0]; # point at start of this entire match
1556         }
1557         if (!defined($first_marker)) {
1558             $first_marker = $list_marker;
1559             $first_marker_type = _GetListMarkerType($list_type, $first_marker);
1560             if ($first_marker_type) {
1561                 (my $marker_val = $first_marker) =~ s/[.\)]$//;
1562                 $first_marker_num = _GetMarkerIntegerNum($first_marker_type, $marker_val);
1563                 $next_num = $first_marker_num;
1564                 $skipped = 1 if $next_num != 1;
1565             }
1566         } elsif ($list_marker !~ /$marker_kind/) {
1567             # Wrong marker kind, "fix up" the marker to a correct "lazy" marker
1568             # But keep the old length in $list_marker_len
1569             $list_marker = $last_marker;
1570         }
1571
1572         # Now grab the rest of this item's data upto but excluding the next
1573         # list marker at the SAME indent level, but sublists must be INCLUDED
1574
1575         my $item = "";
1576         while ($list_str =~ m{\G
1577             ((?:.+?)(?:\n{1,2}))        # list item text = $1
1578             (?= \n* (?: \z |            # end of string OR
1579                     (^[ ]*)             # leading whitespace = $2
1580                     ($marker_any)       # next list marker = $3
1581                     ([ ]+) ))           # one or more spaces after marker = $4
1582         }cgmxs) {
1583
1584             # If $3 has a left edge that is at the left edge of the previous
1585             # marker OR $3 has a right edge that is at the right edge of the
1586             # previous marker then we stop; otherwise we go on
1587
1588             $item .= substr($list_str, $-[0], $+[0] - $-[0]); # $&
1589             last if !defined($4) || length($2) == length($leading_space) ||
1590                 length($2) + length($3) == length($leading_space) + $list_marker_len;
1591             # move along, you're not the marker droid we're looking for...
1592             $item .= substr($list_str, $+[0], $+[4] - $+[0]);
1593             pos($list_str) = $+[4]; # ...move along over the marker droid
1594         }
1595         # Remember where we parked
1596         $oldpos = pos($list_str);
1597
1598         # Process the $list_marker $item
1599
1600         my $liatt = '';
1601         my $checkbox = '';
1602         my $incr = '';
1603
1604         if ($list_type eq "ul" && !$leading_item_space && $item =~ /^\[([ xX])\] +(.*)$/s) {
1605             my $checkmark = lc $1;
1606             $item = $2;
1607             my ($checkbox_class, $checkbox_val);
1608             if ($checkmark eq "x") {
1609                 ($checkbox_class, $checkbox_val) = ("checkbox-on", "x");
1610             } else {
1611                 ($checkbox_class, $checkbox_val) = ("checkbox-off", "&#160;");
1612             }
1613             $liatt = " class=\"$opt{style_prefix}$checkbox_class\"";
1614             $checkbox = "<span><span></span></span><span></span><span>[<tt>$checkbox_val</tt>]&#160;</span>";
1615         } else {
1616             my $list_marker_type;
1617             ($list_marker_type, $liatt) = _GetListItemTypeClass($list_type, $list_marker, $last_marker);
1618             if ($list_type eq "ol" && defined($first_marker)) {
1619                 my $styled = $fancy = 1 if $liatt && $list_marker =~ /\)$/;
1620                 my ($sfx, $dash) = ("", "");
1621                 ($sfx, $dash) = ("li", "-") if $styled;
1622                 if ($liatt =~ /lower/) {
1623                     $sfx .= "${dash}lc";
1624                 } elsif ($liatt =~ /upper/) {
1625                     $sfx .= "${dash}uc";
1626                 }
1627                 $sfx .= "-greek" if $liatt =~ /greek/;
1628                 $liatt = " class=\"$opt{style_prefix}$sfx\"" if $sfx;
1629                 $typechanged = 1 if $list_marker_type ne $first_marker_type;
1630                 (my $marker_val = $list_marker) =~ s/[.\)]$//;
1631                 my $marker_num = _GetMarkerIntegerNum($list_marker_type, $marker_val);
1632                 $marker_num = $next_num if $marker_num < $next_num;
1633                 $skipped = 1 if $next_num < $marker_num;
1634                 $incr = _IncrList($next_num, $marker_num, "incrlevel=$g_list_level");
1635                 $liatt = " value=\"$marker_num\"$liatt" if $fancy || $skipped;
1636                 $liatt = " type=\"$list_marker_type\"$liatt" if $styled || $typechanged;
1637                 $next_num = $marker_num + 1;
1638             }
1639         }
1640         $last_marker = $list_marker;
1641
1642         if ($leading_line or ($item =~ m/\n{2,}/)) {
1643             $item = _RunBlockGamut(_Outdent($item));
1644         }
1645         else {
1646             # Recursion for sub-lists:
1647             $item = _DoLists(_Outdent($item));
1648             chomp $item;
1649             $item = _RunSpanGamut($item);
1650         }
1651
1652         # Append to $result
1653         $result .= "$incr$idt<li$liatt>" . $checkbox . $item . "$idt</li>\n";
1654     }
1655     if ($fancy) {
1656         # remove "incrlevel=$g_list_level " parts
1657         $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr((?:-\d{1,2})?)">}
1658             {$idt<span class="$opt{style_prefix}ol-incr$1">}g;
1659     } else {
1660         # remove the $g_list_level incr spans entirely
1661         $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr(?:-\d{1,2})?"></span>\n}{}g;
1662         # remove the class="$opt{style_prefix}lc-greek" if first_marker is greek
1663         $result =~ s{(<li[^>]*?) class="$opt{style_prefix}lc-greek">}{$1>}g
1664             if defined($first_marker_type) && $first_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1665     }
1666
1667     # Anything left over (similar to $') goes into result, but this should always be empty
1668     $result .= _RunBlockGamut(substr($list_str, pos($list_str)));
1669
1670     $g_list_level--;
1671     return ($result, $first_marker, $fancy);
1672 }
1673
1674
1675 sub _DoCodeBlocks {
1676 #
1677 #   Process Markdown `<pre><code>` blocks.
1678 #
1679
1680     my $text = shift;
1681
1682     $text =~ s{
1683             (?:\n\n|\A\n?)
1684             (           # $1 = the code block -- one or more lines, starting with indent_width spaces
1685               (?:
1686                 (?:[ ]{$opt{indent_width}})  # Lines must start with indent_width of spaces
1687                 .*\n+
1688               )+
1689             )
1690             ((?=^[ ]{0,$opt{indent_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1691         }{
1692             my $codeblock = $1;
1693
1694             $codeblock =~ s/\n\n\n/\n\n/g; # undo "paragraph for last list item" change
1695             $codeblock = _EncodeCode(_Outdent($codeblock));
1696             $codeblock =~ s/\A\n+//; # trim leading newlines
1697             $codeblock =~ s/\s+\z//; # trim trailing whitespace
1698
1699             my $result = "<div class=\"$opt{style_prefix}code\"><pre style=\"display:none\"></pre><pre><code>"
1700                 . $codeblock . "\n</code></pre></div>";
1701             my $key = block_id($result, 2);
1702             $g_code_blocks{$key} = $result;
1703             "\n\n" . $key . "\n\n";
1704         }egmx;
1705
1706     return $text;
1707 }
1708
1709
1710 sub _DoCodeSpans {
1711 #
1712 # * Backtick quotes are used for <code></code> spans.
1713 #
1714 # * You can use multiple backticks as the delimiters if you want to
1715 #   include literal backticks in the code span. So, this input:
1716 #
1717 #     Just type ``foo `bar` baz`` at the prompt.
1718 #
1719 #   Will translate to:
1720 #
1721 #     <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1722 #
1723 #   There's no arbitrary limit to the number of backticks you
1724 #   can use as delimters. If you need three consecutive backticks
1725 #   in your code, use four for delimiters, etc.
1726 #
1727 # * You can use spaces to get literal backticks at the edges:
1728 #
1729 #     ... type `` `bar` `` ...
1730 #
1731 #   Turns to:
1732 #
1733 #     ... type <code>`bar`</code> ...
1734 #
1735
1736     my $text = shift;
1737
1738     $text =~ s@
1739             (`+)        # $1 = Opening run of `
1740             (.+?)       # $2 = The code block
1741             (?<!`)
1742             \1          # Matching closer
1743             (?!`)
1744         @
1745             my $c = "$2";
1746             $c =~ s/^[ ]+//g; # leading whitespace
1747             $c =~ s/[ ]+$//g; # trailing whitespace
1748             $c = _EncodeCode($c);
1749             "<code>$c</code>";
1750         @egsx;
1751
1752     return $text;
1753 }
1754
1755
1756 sub _EncodeCode {
1757 #
1758 # Encode/escape certain characters inside Markdown code runs.
1759 # The point is that in code, these characters are literals,
1760 # and lose their special Markdown meanings.
1761 #
1762     local $_ = shift;
1763
1764     # Encode all ampersands; HTML entities are not
1765     # entities within a Markdown code span.
1766     s/&/&amp;/g;
1767
1768     # Encode $'s, but only if we're running under Blosxom.
1769     # (Blosxom interpolates Perl variables in article bodies.)
1770     s/\$/&#036;/g if $_haveBX;
1771
1772     # Do the angle bracket song and dance:
1773     s! <  !&lt;!gx;
1774     s! >  !&gt;!gx;
1775
1776     # Now, escape characters that are magic in Markdown:
1777     s!([*_~{}\[\]\\])!$g_escape_table{$1}!g;
1778
1779     return $_;
1780 }
1781
1782
1783 sub _DoItalicsAndBoldAndStrike {
1784     my $text = shift;
1785
1786     # <strong> must go first:
1787     $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
1788         {<strong>$1</strong>}gsx;
1789     $text =~ s{ (?<!\w) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\w) }
1790         {<strong>$1</strong>}gsx;
1791
1792     $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
1793         {<strike>$1</strike>}gsx;
1794
1795     $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
1796         {<em>$1</em>}gsx;
1797     $text =~ s{ (?<!\w) _ (?=\S) (.+?) (?<=\S) _ (?!\w) }
1798         {<em>$1</em>}gsx;
1799
1800     return $text;
1801 }
1802
1803
1804 sub _DoBlockQuotes {
1805     my $text = shift;
1806
1807     $text =~ s{
1808           (                     # Wrap whole match in $1
1809             (
1810               ^[ ]*>[ ]?        # '>' at the start of a line
1811                 .*\n            # rest of the first line
1812               (.+\n)*           # subsequent consecutive lines
1813               \n*               # blanks
1814             )+
1815           )
1816         }{
1817             my $bq = $1;
1818             $bq =~ s/^[ ]*>[ ]?//gm; # trim one level of quoting
1819             $bq =~ s/^[ ]+$//mg;         # trim whitespace-only lines
1820             $bq = _RunBlockGamut($bq);   # recurse
1821
1822             $bq =~ s/^/  /mg;
1823             "<blockquote>\n$bq\n</blockquote>\n\n";
1824         }egmx;
1825
1826
1827     return $text;
1828 }
1829
1830
1831 my ($LEAD, $TRAIL, $LEADBAR, $LEADSP, $COLPL, $SEP);
1832 BEGIN {
1833     $LEAD = qr/(?>[ ]*(?:\|[ ]*)?)/o;
1834     $TRAIL = qr/\|[ ]*/o;
1835     $LEADBAR = qr/(?>[ ]*\|[ ]*)/o;
1836     $LEADSP = qr/(?>[ ]*)/o;
1837     $COLPL = qr/(?:[^\n|\\]|\\[^\n])+/o;
1838     $SEP = qr/[ ]*:?-+:?[ ]*/o;
1839 }
1840
1841 sub _DoTables {
1842     my $text = shift;
1843
1844     $text =~ s{
1845         (                               # Wrap whole thing to avoid $&
1846          (?: (?<=\n\n) | \A\n? )        # Preceded by blank line or beginning of string
1847          ^(                             # Header line
1848             $LEADBAR \| [^\n]* |
1849             $LEADBAR $COLPL [^\n]* |
1850             $LEADSP $COLPL \| [^\n]*
1851           )\n
1852           (                             # Separator line
1853             $LEADBAR $SEP (?: \| $SEP )* (?: \| [ ]*)? |
1854             $SEP (?: \| $SEP )+ (?: \| [ ]*)? |
1855             $SEP \| [ ]*
1856           )\n
1857           ((?:                          # Rows (0+)
1858             $LEADBAR \| [^\n]* \n |
1859             $LEADBAR $COLPL [^\n]* \n |
1860             $LEADSP $COLPL \| [^\n]* \n
1861           )*)
1862         )
1863     } {
1864         my ($w, $h, $s, $rows) = ($1, $2, $3, $4);
1865         my @heads = _SplitTableRow($h);
1866         my @seps = _SplitTableRow($s);
1867         if (@heads == @seps) {
1868             my @align = map {
1869                 if (/^:-+:$/) {" align=\"center\""}
1870                 elsif (/^:/) {" align=\"left\""}
1871                 elsif (/:$/) {" align=\"right\""}
1872                 else {""}
1873             } @seps;
1874             my $headers = _MakeTableRow("th", \@align, @heads);
1875             my $tab ="\n<table border=\"1\" cellspacing=\"0\" cellpadding=\"2\" class=\"$opt{style_prefix}table\">\n" .
1876                 "  <tr class=\"$opt{style_prefix}row-hdr\">" . _MakeTableRow("th", \@align, @heads) . "</tr>\n";
1877             my $cnt = 0;
1878             my @classes = ("class=\"$opt{style_prefix}row-even\"", "class=\"$opt{style_prefix}row-odd\"");
1879             $tab .= "  <tr " . $classes[++$cnt % 2] . ">" . _MakeTableRow("td", \@align, _SplitTableRow($_)) . "</tr>\n"
1880                     foreach split(/\n/, $rows);
1881             $tab .= "</table>\n\n";
1882         } else {
1883             $w;
1884         }
1885     }egmx;
1886
1887     return $text;
1888 }
1889
1890
1891 sub _SplitTableRow {
1892     my $row = shift;
1893     $row =~ s/^$LEAD//;
1894     $row =~ s/$TRAIL$//;
1895     $row =~ s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
1896     $row =~ s!\\\|!$g_escape_table{'|'}!go; # Then do \|
1897     my @elems = map {
1898       s!$g_escape_table{'|'}!|!go;
1899       s!$g_escape_table{'\\'}!\\!go;
1900       s/^[ ]+//;
1901       s/[ ]+$//;
1902       $_;
1903     } split(/[ ]*\|[ ]*/, $row, -1);
1904     @elems or push(@elems, "");
1905     return @elems;
1906 }
1907
1908
1909 sub _MakeTableRow {
1910     my $etype = shift;
1911     my $align = shift;
1912     my $row = "";
1913     for (my $i = 0; $i < @$align; ++$i) {
1914         my $data = $_[$i];
1915         defined($data) or $data = "";
1916         $row .= "<" . $etype . $$align[$i] . ">" .
1917             _RunSpanGamut($data) . "</" . $etype . ">";
1918     }
1919     return $row;
1920 }
1921
1922
1923 sub _FormParagraphs {
1924 #
1925 # Params:
1926 #   $text - string to process with html <p> tags
1927 #
1928     my $text = shift;
1929
1930     # Strip leading and trailing lines:
1931     $text =~ s/\A\n+//;
1932     $text =~ s/\n+\z//;
1933
1934     my @grafs = split(/\n{2,}/, $text);
1935
1936     #
1937     # Wrap <p> tags.
1938     #
1939     foreach (@grafs) {
1940         unless (defined($g_html_blocks{$_}) || defined($g_code_blocks{$_})) {
1941             $_ = _RunSpanGamut($_);
1942             s/^([ ]*)/<p>/;
1943             $_ .= "</p>";
1944         }
1945     }
1946
1947     #
1948     # Unhashify HTML blocks
1949     #
1950     foreach (@grafs) {
1951         if (defined( $g_html_blocks{$_} )) {
1952             $_ = $g_html_blocks{$_};
1953         }
1954     }
1955
1956     return join "\n\n", @grafs;
1957 }
1958
1959
1960 my $g_possible_tag_name;
1961 my %ok_tag_name;
1962 BEGIN {
1963     # note: length("blockquote") == 10
1964     $g_possible_tag_name = qr/(?i:[a-z]{1,10}|h[1-6])/o;
1965     %ok_tag_name = map({$_ => 1} qw(
1966         a abbr acronym address
1967         b basefont bdo big blockquote br
1968         caption center cite code col colgroup
1969         dd del dfn div dl dt
1970         em
1971         font
1972         h1 h2 h3 h4 h5 h6 hr
1973         i img ins
1974         kbd
1975         li
1976         ol
1977         p pre
1978         q
1979         s samp small span strike strong sub sup
1980         table tbody td tfoot th thead tr tt
1981         u ul
1982         var
1983     ));
1984     $ok_tag_name{$_} = 0 foreach (qw(
1985         dir menu
1986     ));
1987 }
1988
1989
1990 sub _SetAllowedTag {
1991         my ($tag, $forbid) = @_;
1992         $ok_tag_name{$tag} = $forbid ? 0 : 1
1993                 if defined($tag) && exists($ok_tag_name{$tag});
1994 }
1995
1996
1997 # Encode leading '<' of any non-tags
1998 # However, "<?", "<!" and "<$" are passed through (legacy on that "<$" thing)
1999 sub _DoTag {
2000         my $tag = shift;
2001         return $tag if $tag =~ /^<[?\$!]/;
2002         if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
2003             $ok_tag_name{lc($1)}) {
2004
2005             return _ProcessURLTag("href", $tag) if $tag =~ /^<a\s/i;
2006             return _ProcessURLTag("src", $tag) if $tag =~ /^<img\s/i;
2007             return $tag;
2008         }
2009         $tag =~ s/</&lt;/g;
2010         return $tag;
2011 }
2012
2013
2014 sub _ProcessURLTag {
2015     my $att = shift;
2016     my $tag = shift;
2017
2018     $att = lc($att) . "=";
2019     if ($tag =~ /^(<[^\s>]+\s+)/g) {
2020             my $out = $1;
2021             while ($tag =~ /\G([^\s\042\047>]+=)([\042\047])((?:(?!\2)(?!>).)*)(\2\s*)/gc) {
2022                     my ($p, $q, $v, $s) = ($1, $2, $3, $4);
2023                     if (lc($p) eq $att && $v ne "") {
2024                             $v = _HTMLEncode(_PrefixURL($v));
2025                     }
2026                     $out .= $p . $q . $v . $s;
2027             }
2028             $out .= substr($tag, pos($tag));
2029             return $out;
2030     }
2031
2032     return $tag;
2033 }
2034
2035
2036 sub _HTMLEncode {
2037     my $text = shift;
2038
2039     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
2040     #   http://bumppo.net/projects/amputator/
2041     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
2042
2043     # Remaining entities now
2044     $text =~ s/\042/&quot;/g;
2045     $text =~ s/\047/&apos;/g;
2046     $text =~ s/</&lt;/g;
2047     $text =~ s/>/&gt;/g;
2048
2049     return $text;
2050 }
2051
2052
2053 sub _EncodeAmps {
2054     my $text = shift;
2055
2056     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
2057     #   http://bumppo.net/projects/amputator/
2058     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
2059
2060     return $text;
2061 }
2062
2063
2064 sub _EncodeAmpsAndAngles {
2065 # Smart processing for ampersands and angle brackets that need to be encoded.
2066
2067     my $text = shift;
2068
2069     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
2070     #   http://bumppo.net/projects/amputator/
2071     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
2072
2073     # Encode naked <'s
2074     $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
2075     $text =~ s{<(?=[^>]*$)}{&lt;}g;
2076
2077     # Encode <'s that cannot possibly be a start or end tag
2078     $text =~ s{(<[^>]*>)}{_DoTag($1)}ige;
2079
2080     return $text;
2081 }
2082
2083
2084 sub _EncodeBackslashEscapes {
2085 #
2086 # Parameter: String.
2087 # Returns:   String after processing the following backslash escape sequences.
2088 #
2089     local $_ = shift;
2090
2091     s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
2092     s{\\([`*_~{}\[\]()>#+\-.!`])}{$g_escape_table{$1}}g;
2093
2094     return $_;
2095 }
2096
2097
2098 sub _DoAutoLinks {
2099     local $_ = shift;
2100
2101     s{<((https?|ftps?):[^'\042>\s]+)>}{<a href="$1">&lt;$1&gt;</a>}gi;
2102
2103     # Email addresses: <address@domain.foo>
2104     s{
2105         <
2106         (?:mailto:)?
2107         (
2108             [-.\w]+
2109             \@
2110             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
2111         )
2112         >
2113     }{
2114         _EncodeEmailAddress(_UnescapeSpecialChars($1), "&#x3c;", "&#62;");
2115     }egix;
2116
2117     # (kjm) I don't do "x" patterns
2118     s{(?<![\042'<>])(?<!&[Ll][Tt];)(?<!&#60;)(?<!&#x3[Cc];)\b((?:https?|ftps?)://(?:[-a-zA-Z0-9./?\&\%=_~!*;:\@+\$,\x23](?:(?<![.,:;])|(?=[^\s])))+)}
2119      {<a href="$1">$1</a>}sog;
2120     s{(?<![][])(?<!\] )\[RFC( ?)([0-9]{1,5})\](?![][])(?! \[)}
2121      {[<a href="http://tools.ietf.org/html/rfc$2">RFC$1$2</a>]}sog;
2122
2123     return $_;
2124 }
2125
2126
2127 sub _EncodeEmailAddress {
2128 #
2129 # Input: an email address, e.g. "foo@example.com"
2130 #
2131 # Output: the email address as a mailto link, with each character
2132 #         of the address encoded as either a decimal or hex entity, in
2133 #         the hopes of foiling most address harvesting spam bots. E.g.:
2134 #
2135 #   <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
2136 #   x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
2137 #   &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
2138 #
2139 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
2140 # mailing list: <http://tinyurl.com/yu7ue>
2141 #
2142
2143     my ($addr, $prefix, $suffix) = @_;
2144     $prefix = "" unless defined($prefix);
2145     $suffix = "" unless defined($suffix);
2146
2147     srand(unpack('N',md5($addr)));
2148     my @encode = (
2149         sub { '&#' .                 ord(shift)   . ';' },
2150         sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
2151         sub {                            shift          },
2152     );
2153
2154     $addr = "mailto:" . $addr;
2155
2156     $addr =~ s{(.)}{
2157         my $char = $1;
2158         if ( $char eq '@' ) {
2159             # this *must* be encoded. I insist.
2160             $char = $encode[int rand 1]->($char);
2161         } elsif ( $char ne ':' ) {
2162             # leave ':' alone (to spot mailto: later)
2163             my $r = rand;
2164             # roughly 10% raw, 45% hex, 45% dec
2165             $char = (
2166                 $r > .9   ?  $encode[2]->($char)  :
2167                 $r < .45  ?  $encode[1]->($char)  :
2168                              $encode[0]->($char)
2169             );
2170         }
2171         $char;
2172     }gex;
2173
2174     # strip the mailto: from the visible part
2175     (my $bareaddr = $addr) =~ s/^.+?://;
2176     $addr = qq{<a href="$addr">$prefix$bareaddr$suffix</a>};
2177
2178     return $addr;
2179 }
2180
2181
2182 sub _UnescapeSpecialChars {
2183 #
2184 # Swap back in all the special characters we've hidden.
2185 #
2186     my $text = shift;
2187
2188     while( my($char, $hash) = each(%g_escape_table) ) {
2189         $text =~ s/$hash/$char/g;
2190     }
2191     return $text;
2192 }
2193
2194
2195 sub _TokenizeHTML {
2196 #
2197 # Parameter: String containing HTML markup.
2198 # Returns:   Reference to an array of the tokens comprising the input
2199 #            string. Each token is either a tag (possibly with nested,
2200 #            tags contained therein, such as <a href="<MTFoo>">, or a
2201 #            run of text between tags. Each element of the array is a
2202 #            two-element array; the first is either 'tag' or 'text';
2203 #            the second is the actual value.
2204 #
2205 #
2206 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
2207 #   <http://www.bradchoate.com/past/mtregex.php>
2208 #
2209
2210     my $str = shift;
2211     my $pos = 0;
2212     my $len = length $str;
2213     my @tokens;
2214
2215     my $depth = 6;
2216     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
2217     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
2218                    (?s: <\? .*? \?> ) |             # processing instruction
2219                    $nested_tags/iox;                # nested tags
2220
2221     while ($str =~ m/($match)/g) {
2222         my $whole_tag = $1;
2223         my $sec_start = pos $str;
2224         my $tag_start = $sec_start - length $whole_tag;
2225         if ($pos < $tag_start) {
2226             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
2227         }
2228         push @tokens, ['tag', $whole_tag];
2229         $pos = pos $str;
2230     }
2231     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
2232     \@tokens;
2233 }
2234
2235
2236 sub _Outdent {
2237 #
2238 # Remove one level of line-leading indent_width of spaces
2239 #
2240     my $text = shift;
2241
2242     $text =~ s/^ {1,$opt{indent_width}}//gm;
2243     return $text;
2244 }
2245
2246
2247 sub _Detab {
2248 #
2249 # Expand tabs to spaces using $opt{tab_width} if no second argument
2250 #
2251     my $text = shift;
2252     my $ts = shift || $opt{tab_width};
2253     # From the Perl camel book "Fluent Perl" section (slightly modified)
2254     $text =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ge;
2255     return $text;
2256 }
2257
2258
2259 sub _PrefixURL {
2260 #
2261 # Add URL prefix if needed
2262 #
2263     my $url = shift;
2264     $url =~ s/^\s+//;
2265     $url =~ s/\s+$//;
2266     $url = "#" unless $url ne "";
2267
2268     return $url unless $opt{url_prefix} ne '' || $opt{img_prefix} ne '';
2269     return $url if $url =~ m"^\002\003" || $url =~ m"^#" ||
2270             $url =~ m,^//, || $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/;
2271     my $ans = $opt{url_prefix};
2272     $ans = $opt{img_prefix}
2273         if $opt{img_prefix} ne '' && $url =~ m"^[^#?]*\.(?:png|gif|jpe?g|svgz?)(?:[#?]|$)"i;
2274     return $url unless $ans ne '';
2275     $ans .= '/' if substr($ans, -1, 1) ne '/';
2276     $ans .= substr($url, 0, 1) eq '/' ? substr($url, 1) : $url;
2277     return "\2\3".$ans;
2278 }
2279
2280
2281 BEGIN {
2282     $g_style_sheet = <<'STYLESHEET';
2283
2284 <style type="text/css">
2285 /* <![CDATA[ */
2286
2287 /* Markdown.pl fancy style sheet
2288 ** Copyright (C) 2017,2018,2019 Kyle J. McKay.
2289 ** All rights reserved.
2290 **
2291 ** Redistribution and use in source and binary forms, with or without
2292 ** modification, are permitted provided that the following conditions are met:
2293 **
2294 **   1. Redistributions of source code must retain the above copyright notice,
2295 **      this list of conditions and the following disclaimer.
2296 **
2297 **   2. Redistributions in binary form must reproduce the above copyright
2298 **      notice, this list of conditions and the following disclaimer in the
2299 **      documentation and/or other materials provided with the distribution.
2300 **
2301 **   3. Neither the name of the copyright holder nor the names of its
2302 **      contributors may be used to endorse or promote products derived from
2303 **      this software without specific prior written permission.
2304 **
2305 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2306 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2307 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2308 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2309 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2310 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2311 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2312 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2313 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2314 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2315 ** POSSIBILITY OF SUCH DAMAGE.
2316 */
2317
2318 div.%(base)code-bt > pre, div.%(base)code > pre {
2319         margin: 0;
2320         padding: 0;
2321         overflow: auto;
2322 }
2323
2324 div.%(base)code-bt > pre > code, div.%(base)code > pre > code {
2325         display: inline-block;
2326         margin: 0;
2327         padding: 0.5em 0;
2328         border-top: thin dotted;
2329         border-bottom: thin dotted;
2330 }
2331
2332 table.%(base)table {
2333         margin-bottom: 0.5em;
2334 }
2335 table.%(base)table, table.%(base)table th, table.%(base)table td {
2336         border-collapse: collapse;
2337         border-spacing: 0;
2338         border: thin solid;
2339 }
2340
2341 ol.%(base)ol {
2342         counter-reset: %(base)item;
2343 }
2344 ol.%(base)ol[start="0"] {
2345         counter-reset: %(base)item -1;
2346 }
2347 ol.%(base)ol[start="5"] {
2348         counter-reset: %(base)item 4;
2349 }
2350 ol.%(base)ol[start="10"] {
2351         counter-reset: %(base)item 9;
2352 }
2353 ol.%(base)ol > span.%(base)ol-incr {
2354         counter-increment: %(base)item;
2355 }
2356 ol.%(base)ol > span.%(base)ol-incr-2 {
2357         counter-increment: %(base)item 2;
2358 }
2359 ol.%(base)ol > span.%(base)ol-incr-5 {
2360         counter-increment: %(base)item 5;
2361 }
2362 ol.%(base)ol > span.%(base)ol-incr-10 {
2363         counter-increment: %(base)item 10;
2364 }
2365 ol.%(base)lc-greek, li.%(base)lc-greek {
2366         list-style-type: lower-greek;
2367 }
2368 ol.%(base)ol > li {
2369         counter-increment: %(base)item;
2370 }
2371 ol.%(base)ol > li.%(base)li,
2372 ol.%(base)ol > li.%(base)li-lc,
2373 ol.%(base)ol > li.%(base)li-lc-greek,
2374 ol.%(base)ol > li.%(base)li-uc {
2375         list-style-type: none;
2376         display: block;
2377 }
2378 ol.%(base)ol > li.%(base)li:before,
2379 ol.%(base)ol > li.%(base)li-lc:before,
2380 ol.%(base)ol > li.%(base)li-lc-greek:before,
2381 ol.%(base)ol > li.%(base)li-uc:before {
2382         position: absolute;
2383         text-align: right;
2384         white-space: nowrap;
2385         margin-left: -9ex;
2386         width: 9ex;
2387 }
2388 ol.%(base)ol > li.%(base)li[type="1"]:before {
2389         content: counter(%(base)item, decimal) ")\A0 \A0 ";
2390 }
2391 ol.%(base)ol > li.%(base)li-lc[type="i"]:before,
2392 ol.%(base)ol > li.%(base)li-lc[type="I"]:before {
2393         content: counter(%(base)item, lower-roman) ")\A0 \A0 ";
2394 }
2395 ol.%(base)ol > li.%(base)li-uc[type="I"]:before,
2396 ol.%(base)ol > li.%(base)li-uc[type="i"]:before {
2397         content: counter(%(base)item, upper-roman) ")\A0 \A0 ";
2398 }
2399 ol.%(base)ol > li.%(base)li-lc[type="a"]:before,
2400 ol.%(base)ol > li.%(base)li-lc[type="A"]:before {
2401         content: counter(%(base)item, lower-alpha) ")\A0 \A0 ";
2402 }
2403 ol.%(base)ol > li.%(base)li-lc-greek[type="a"]:before,
2404 ol.%(base)ol > li.%(base)li-lc-greek[type="A"]:before {
2405         content: counter(%(base)item, lower-greek) ")\A0 \A0 ";
2406 }
2407 ol.%(base)ol > li.%(base)li-uc[type="A"]:before,
2408 ol.%(base)ol > li.%(base)li-uc[type="a"]:before {
2409         content: counter(%(base)item, upper-alpha) ")\A0 \A0 ";
2410 }
2411
2412 li.%(base)checkbox-on,
2413 li.%(base)checkbox-off {
2414         list-style-type: none;
2415         display: block;
2416 }
2417 li.%(base)checkbox-on > span:first-child + span + span,
2418 li.%(base)checkbox-off > span:first-child + span + span {
2419         position: absolute;
2420         clip: rect(0,0,0,0);
2421 }
2422 li.%(base)checkbox-on > span:first-child,
2423 li.%(base)checkbox-off > span:first-child,
2424 li.%(base)checkbox-on > span:first-child + span,
2425 li.%(base)checkbox-off > span:first-child + span {
2426         display: block;
2427         position: absolute;
2428         margin-left: -3ex;
2429         width: 1em;
2430         height: 1em;
2431 }
2432 li.%(base)checkbox-on > span:first-child > span:first-child,
2433 li.%(base)checkbox-off > span:first-child > span:first-child {
2434         display: block;
2435         position: absolute;
2436         left: 0.75pt; top: 0.75pt; right: 0.75pt; bottom: 0.75pt;
2437 }
2438 li.%(base)checkbox-on > span:first-child > span:first-child:before,
2439 li.%(base)checkbox-off > span:first-child > span:first-child:before {
2440         display: inline-block;
2441         position: relative;
2442         right: 1pt;
2443         width: 100%;
2444         height: 100%;
2445         border: 1pt solid;
2446         content: "";
2447 }
2448 li.%(base)checkbox-on > span:first-child + span:before {
2449         position: relative;
2450         left: 2pt;
2451         bottom: 1pt;
2452         font-size: 125%;
2453         line-height: 80%;
2454         content: "\2713";
2455 }
2456
2457 /* ]]> */
2458 </style>
2459
2460 STYLESHEET
2461     $g_style_sheet =~ s/^\s+//g;
2462     $g_style_sheet =~ s/\s+$//g;
2463     $g_style_sheet .= "\n";
2464 }
2465
2466 1;
2467
2468 __DATA__
2469
2470 =head1 NAME
2471
2472 Markdown.pl - convert Markdown format text files to HTML
2473
2474 =head1 SYNOPSIS
2475
2476 B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
2477     [B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>]
2478     [B<--tabwidth>=I<num>] [B<--stylesheet>] [B<--stub>] [--]
2479     [I<file>...]
2480
2481  Options:
2482    -h                                   show short usage help
2483    --help                               show long detailed help
2484    --html4tags                          use <br> instead of <br />
2485    --deprecated                         allow <dir> and <menu> tags
2486    --tabwidth=num                       expand tabs to num instead of 8
2487    -r prefix | --htmlroot=prefix        append relative non-img URLs
2488                                         to prefix
2489    -i prefix | --imageroot=prefix       append relative img URLs to
2490                                         prefix
2491    -V | --version                       show version, authors, license
2492                                         and copyright
2493    -s | --shortversion                  show just the version number
2494    --stylesheet                         output the fancy style sheet
2495    --no-stylesheet                      do not output fancy style sheet
2496    --stub                               wrap output in stub document
2497                                         implies --stylesheet
2498    --                                   end options and treat next
2499                                         argument as file
2500
2501 =head1 DESCRIPTION
2502
2503 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2504 easy-to-write structured text format into HTML. Markdown's text format
2505 is most similar to that of plain text email, and supports features such
2506 as headers, *emphasis*, code blocks, blockquotes, and links.
2507
2508 Markdown's syntax is designed not as a generic markup language, but
2509 specifically to serve as a front-end to (X)HTML. You can  use span-level
2510 HTML tags anywhere in a Markdown document, and you can use block level
2511 HTML tags (like <div> and <table> as well).
2512
2513 For more information about Markdown's syntax, see the F<basics.md>
2514 and F<syntax.md> files included with F<Markdown.pl>.
2515
2516 Input (auto-detected) may be either ISO-8859-1 or UTF-8.  Output is always
2517 converted to the UTF-8 character set.
2518
2519
2520 =head1 OPTIONS
2521
2522 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2523
2524     Markdown.pl -- -z
2525
2526 =over
2527
2528
2529 =item B<--html4tags>
2530
2531 Use HTML 4 style for empty element tags, e.g.:
2532
2533     <br>
2534
2535 instead of Markdown's default XHTML style tags, e.g.:
2536
2537     <br />
2538
2539
2540 =item B<--deprecated>
2541
2542 Both "<dir>" and "<menu>" are normally taken as literal text and the leading
2543 "<" will be automatically escaped.
2544
2545 If this option is used, they are recognized as valid tags and passed through
2546 without being escaped.
2547
2548 When dealing with program argument descriptions "<dir>" can be particularly
2549 problematic therefore use of this option is not recommended.
2550
2551 Other deprecated tags (such as "<font>" and "<center>" for example) continue
2552 to be recognized and passed through even without using this option.
2553
2554
2555 =item B<--tabwidth>=I<num>
2556
2557 Expand tabs to I<num> character wide tab stop positions instead of the default
2558 8.  Don't use this; physical tabs should always be expanded to 8-character
2559 positions.  This option does I<not> affect the number of spaces needed to
2560 start a new "indent level".  That will always be 4 no matter what value is
2561 used (or implied by default) with this option.  Also note that tabs inside
2562 backticks-delimited code blocks will always be expanded to 8-character tab
2563 stop positions no matter what value is used for this option.
2564
2565 The value must be S<2 <= I<num> <= 32>.
2566
2567
2568 =item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
2569
2570 Any non-absolute URLs have I<prefix> prepended.
2571
2572
2573 =item B<-i> I<prefix>, B<--imageroot>=I<prefix>
2574
2575 Any non-absolute URLs have I<prefix> prepended (overriding the B<-r> prefix
2576 if any) but only if they end in an image suffix.
2577
2578
2579 =item B<-V>, B<--version>
2580
2581 Display Markdown's version number and copyright information.
2582
2583
2584 =item B<-s>, B<--shortversion>
2585
2586 Display the short-form version number.
2587
2588
2589 =item B<--stylesheet>
2590
2591 Include the fancy style sheet at the beginning of the output (or in the
2592 C<head> section with B<--stub>).  This style sheet makes fancy checkboxes
2593 and makes a right parenthesis C<)> show instead of a C<.> for ordered lists
2594 that use them.  Without it things will still look fine except that the
2595 fancy stuff won't be there.
2596
2597 Use this option with no other arguments and redirect standard input to
2598 /dev/null to get just the style sheet and nothing else.
2599
2600
2601 =item B<--no-stylesheet>
2602
2603 Overrides a previous B<--stylesheet> and disables implicit inclusion
2604 of the style sheet by the B<--stub> option.
2605
2606
2607 =item B<--stub>
2608
2609 Wrap the output in a full document stub (i.e. has C<html>, C<head> and C<body>
2610 tags).  The style sheet I<will> be included in the C<head> section unless the
2611 B<--no-stylesheet> option is also used.
2612
2613
2614 =item B<-h>, B<--help>
2615
2616 Display Markdown's help.  With B<--help> full help is shown, with B<-h> only
2617 the usage and options are shown.
2618
2619
2620 =back
2621
2622
2623 =head1 VERSION HISTORY
2624
2625 Z<> See the F<README> file for detailed release notes for this version.
2626
2627 =over
2628
2629 =item Z<> 1.1.7 - 14 Feb 2018
2630
2631 =item Z<> 1.1.6 - 03 Jan 2018
2632
2633 =item Z<> 1.1.5 - 07 Dec 2017
2634
2635 =item Z<> 1.1.4 - 24 Jun 2017
2636
2637 =item Z<> 1.1.3 - 13 Feb 2017
2638
2639 =item Z<> 1.1.2 - 19 Jan 2017
2640
2641 =item Z<> 1.1.1 - 12 Jan 2017
2642
2643 =item Z<> 1.1.0 - 11 Jan 2017
2644
2645 =item Z<> 1.0.4 - 05 Jun 2016
2646
2647 =item Z<> 1.0.3 - 06 Sep 2015
2648
2649 =item Z<> 1.0.2 - 03 Sep 2015
2650
2651 =item Z<> 1.0.1 - 14 Dec 2004
2652
2653 =item Z<> 1.0.0 - 28 Aug 2004
2654
2655 =back
2656
2657 =head1 AUTHORS
2658
2659 =over
2660
2661 =item John Gruber
2662
2663 =item L<http://daringfireball.net>
2664
2665 =item L<http://daringfireball.net/projects/markdown/>
2666
2667 =item E<160>
2668
2669 =back
2670
2671 =over
2672
2673 =item PHP port and other contributions by Michel Fortin
2674
2675 =item L<http://michelf.com>
2676
2677 =item E<160>
2678
2679 =back
2680
2681 =over
2682
2683 =item Additional enhancements and tweaks by Kyle J. McKay
2684
2685 =item mackyle<at>gmail.com
2686
2687 =back
2688
2689 =head1 COPYRIGHT AND LICENSE
2690
2691 =over
2692
2693 =item Copyright (C) 2003-2004 John Gruber
2694
2695 =item Copyright (C) 2015-2019 Kyle J. McKay
2696
2697 =item All rights reserved.
2698
2699 =back
2700
2701 Redistribution and use in source and binary forms, with or without
2702 modification, are permitted provided that the following conditions are
2703 met:
2704
2705 =over
2706
2707 =item *
2708
2709 Redistributions of source code must retain the above copyright
2710 notice, this list of conditions and the following disclaimer.
2711
2712 =item *
2713
2714 Redistributions in binary form must reproduce the above copyright
2715 notice, this list of conditions and the following disclaimer in the
2716 documentation and/or other materials provided with the distribution.
2717
2718 =item *
2719
2720 Neither the name "Markdown" nor the names of its contributors may
2721 be used to endorse or promote products derived from this software
2722 without specific prior written permission.
2723
2724 =back
2725
2726 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
2727 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2728 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
2729 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
2730 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
2731 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
2732 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
2733 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
2734 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2735 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2736 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737
2738 =cut