Markdown.pl

   1 #!/usr/bin/env perl
   2
   3 #
   4 # Markdown -- A text-to-HTML conversion tool for web writers
   5 #
   6 # Copyright (C) 2004 John Gruber
   7 # Copyright (C) 2015,2016,2017,2018,2019 Kyle J. McKay
   8 # All rights reserved.
   9 # License is Modified BSD (aka 3-clause BSD) License\n";
  10 # See LICENSE file (or <https://opensource.org/licenses/BSD-3-Clause>)
  11 #
  12
  13 package Markdown;
  14
  15 require 5.008;
  16 use strict;
  17 use warnings;
  18
  19 use Encode;
  20
  21 use vars qw($COPYRIGHT $VERSION @ISA @EXPORT_OK);
  22
  23 BEGIN {*COPYRIGHT =
  24 \"Copyright (C) 2004 John Gruber
  25 Copyright (C) 2015,2016,2017,2018,2019 Kyle J. McKay
  26 All rights reserved.
  27 ";
  28 *VERSION = \"1.1.10-PRE"
  29 }
  30
  31 require Exporter;
  32 use Digest::MD5 qw(md5 md5_hex);
  33 use File::Basename qw(basename);
  34 use Scalar::Util qw(refaddr looks_like_number);
  35 my ($hasxml, $hasxml_err); BEGIN { ($hasxml, $hasxml_err) = (0, "") }
  36 my ($hasxmlp, $hasxmlp_err); BEGIN { ($hasxmlp, $hasxmlp_err) = (0, "") }
  37 @ISA = qw(Exporter);
  38 @EXPORT_OK = qw(Markdown);
  39 $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'};
  40
  41 close(DATA) if fileno(DATA);
  42 exit(&_main(@ARGV)||0) unless caller;
  43
  44 sub fauxdie($) {
  45     my $msg = join(" ", @_);
  46     $msg =~ s/\s+$//os;
  47     printf STDERR "%s: fatal: %s\n", basename($0), $msg;
  48     exit 1;
  49 }
  50
  51 my $encoder;
  52 BEGIN {
  53         $encoder = Encode::find_encoding('Windows-1252') ||
  54                    Encode::find_encoding('ISO-8859-1') or
  55                    die "failed to load ISO-8859-1 encoder\n";
  56 }
  57
  58 #
  59 # Global default settings:
  60 #
  61 my ($g_style_prefix, $g_empty_element_suffix, $g_indent_width, $g_tab_width);
  62 BEGIN {
  63     $g_style_prefix = "_markdown-";     # Prefix for markdown css class styles
  64     $g_empty_element_suffix = " />";    # Change to ">" for HTML output
  65     $g_indent_width = 4;                # Number of spaces considered new level
  66     $g_tab_width = 4;                   # Legacy even though it's wrong
  67 }
  68
  69
  70 #
  71 # Globals:
  72 #
  73
  74 # Style sheet template
  75 my $g_style_sheet;
  76
  77 # Permanent block id table
  78 my %g_perm_block_ids;
  79
  80 # Global hashes, used by various utility routines
  81 my %g_urls;
  82 my %g_titles;
  83 my %g_anchors;
  84 my %g_anchors_id;
  85 my %g_block_ids;
  86 my %g_code_block_ids;
  87 my %g_html_blocks;
  88 my %g_code_blocks;
  89 my %opt;
  90
  91 # Return a "block id" to use to identify the block that does not contain
  92 # any characters that could be misinterpreted by the rest of the code
  93 # Originally this used md5_hex but that's unnecessarily slow
  94 # Instead just use the refaddr of the scalar ref of the entry for that
  95 # key in either the global or, if the optional second argument is true,
  96 # permanent table.  To avoid the result being confused with anything
  97 # else, it's prefixed with a control character and suffixed with another
  98 # both of which are not allowed by the XML standard or Unicode.
  99 sub block_id {
 100     $_[1] or return "\5".refaddr(\$g_block_ids{$_[0]})."\6";
 101     $_[1] == 1 and return "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3";
 102     $_[1] == 2 and return "\25".refaddr(\$g_code_block_ids{$_[0]})."\26";
 103     die "programmer error: bad block_id type $_[1]";
 104 }
 105
 106 # Regex to match balanced [brackets]. See Friedl's
 107 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
 108 my $g_nested_brackets;
 109 BEGIN {
 110     $g_nested_brackets = qr{
 111     (?>                                 # Atomic matching
 112         [^\[\]]+                        # Anything other than brackets
 113      |
 114         \[
 115             (??{ $g_nested_brackets })  # Recursive set of nested brackets
 116         \]
 117     )*
 118     }ox
 119 }
 120
 121 # Regex to match balanced (parentheses)
 122 my $g_nested_parens;
 123 BEGIN {
 124     $g_nested_parens = qr{
 125     (?>                                 # Atomic matching
 126         [^\(\)]+                        # Anything other than parentheses
 127      |
 128         \(
 129             (??{ $g_nested_parens })    # Recursive set of nested parentheses
 130         \)
 131     )*
 132     }ox
 133 }
 134
 135 # Table of hash values for escaped characters:
 136 my %g_escape_table;
 137 BEGIN {
 138     $g_escape_table{""} = "\2\3";
 139     foreach my $char (split //, "\\\`*_~{}[]()>#+-.!|:<") {
 140         $g_escape_table{$char} = block_id($char,1);
 141     }
 142 }
 143
 144 # Used to track when we're inside an ordered or unordered list
 145 # (see _ProcessListItems() for details):
 146 my $g_list_level;
 147 BEGIN {
 148     $g_list_level = 0;
 149 }
 150
 151
 152 #### Blosxom plug-in interface ##########################################
 153 my $_haveBX;
 154 BEGIN {
 155     no warnings 'once';
 156     $_haveBX = defined($blosxom::version);
 157 }
 158
 159 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
 160 # which posts Markdown should process, using a "meta-markup: markdown"
 161 # header. If it's set to 0 (the default), Markdown will process all
 162 # entries.
 163 my $g_blosxom_use_meta;
 164 BEGIN {
 165     $g_blosxom_use_meta = 0;
 166 }
 167
 168 sub start { 1; }
 169 sub story {
 170     my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
 171
 172     if ((! $g_blosxom_use_meta) or
 173         (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
 174          ) {
 175             $$body_ref = Markdown($$body_ref);
 176     }
 177     1;
 178 }
 179
 180
 181 #### Movable Type plug-in interface #####################################
 182 my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT
 183 my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0.
 184
 185 if ($_haveMT) {
 186     require MT;
 187     import  MT;
 188     require MT::Template::Context;
 189     import  MT::Template::Context;
 190
 191     if ($_haveMT3) {
 192         require MT::Plugin;
 193         import  MT::Plugin;
 194         my $plugin = new MT::Plugin({
 195             name => "Markdown",
 196             description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 197             doc_link => 'https://daringfireball.net/projects/markdown/'
 198         });
 199         MT->add_plugin( $plugin );
 200     }
 201
 202     MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 203         my $ctx  = shift;
 204         my $args = shift;
 205         my $builder = $ctx->stash('builder');
 206         my $tokens = $ctx->stash('tokens');
 207
 208         if (defined ($args->{'output'}) ) {
 209             $ctx->stash('markdown_output', lc $args->{'output'});
 210         }
 211
 212         defined (my $str = $builder->build($ctx, $tokens) )
 213             or return $ctx->error($builder->errstr);
 214         $str; # return value
 215     });
 216
 217     MT->add_text_filter('markdown' => {
 218         label     => 'Markdown',
 219         docs      => 'https://daringfireball.net/projects/markdown/',
 220         on_format => sub {
 221             my $text = shift;
 222             my $ctx  = shift;
 223             my $raw  = 0;
 224             if (defined $ctx) {
 225             my $output = $ctx->stash('markdown_output');
 226                 if (defined $output && $output =~ m/^html/i) {
 227                     $g_empty_element_suffix = ">";
 228                     $ctx->stash('markdown_output', '');
 229                 }
 230                 elsif (defined $output && $output eq 'raw') {
 231                     $raw = 1;
 232                     $ctx->stash('markdown_output', '');
 233                 }
 234                 else {
 235                     $raw = 0;
 236                     $g_empty_element_suffix = " />";
 237                 }
 238             }
 239             $text = $raw ? $text : Markdown($text);
 240             $text;
 241         },
 242     });
 243
 244     # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 245     my $smartypants;
 246
 247     {
 248         no warnings "once";
 249         $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 250     }
 251
 252     if ($smartypants) {
 253         MT->add_text_filter('markdown_with_smartypants' => {
 254             label     => 'Markdown With SmartyPants',
 255             docs      => 'https://daringfireball.net/projects/markdown/',
 256             on_format => sub {
 257                 my $text = shift;
 258                 my $ctx  = shift;
 259                 if (defined $ctx) {
 260                     my $output = $ctx->stash('markdown_output');
 261                     if (defined $output && $output eq 'html') {
 262                         $g_empty_element_suffix = ">";
 263                     }
 264                     else {
 265                         $g_empty_element_suffix = " />";
 266                     }
 267                 }
 268                 $text = Markdown($text);
 269                 $text = $smartypants->($text, '1');
 270             },
 271         });
 272     }
 273 }
 274
 275 sub _strip {
 276         my $str = shift;
 277         defined($str) or return undef;
 278         $str =~ s/^\s+//;
 279         $str =~ s/\s+$//;
 280         $str =~ s/\s+/ /g;
 281         $str;
 282 }
 283
 284 #### BBEdit/command-line text filter interface ##########################
 285 sub _main {
 286     local *ARGV = \@_;
 287
 288
 289     #### Check for command-line switches: #################
 290     my %options = ();
 291     my %cli_opts;
 292     my $raw = 0;
 293     use Getopt::Long;
 294     Getopt::Long::Configure(qw(bundling require_order pass_through));
 295     GetOptions(\%cli_opts,
 296         'help','h',
 297         'version|V',
 298         'shortversion|short-version|s',
 299         'html4tags',
 300         'deprecated',
 301         'sanitize',
 302         'no-sanitize',
 303         'validate-xml',
 304         'validate-xml-internal',
 305         'no-validate-xml',
 306         'absroot|a=s',
 307         'base|b=s',
 308         'htmlroot|r=s',
 309         'imageroot|i=s',
 310         'wiki|w:s',
 311         'tabwidth|tab-width=s',
 312         'raw',
 313         'stylesheet|style-sheet',
 314         'no-stylesheet|no-style-sheet',
 315         'stub',
 316     );
 317     if ($cli_opts{'help'}) {
 318         require Pod::Usage;
 319         Pod::Usage::pod2usage(-verbose => 2, -exitval => 0);
 320     }
 321     if ($cli_opts{'h'}) {
 322         require Pod::Usage;
 323         Pod::Usage::pod2usage(-verbose => 0, -exitval => 0);
 324     }
 325     if ($cli_opts{'version'}) { # Version info
 326         print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT;
 327         print "License is Modified BSD (aka 3-clause BSD) License\n";
 328         print "<https://opensource.org/licenses/BSD-3-Clause>\n";
 329         exit 0;
 330     }
 331     if ($cli_opts{'shortversion'}) { # Just the version number string.
 332         print $VERSION;
 333         exit 0;
 334     }
 335     my $stub = 0;
 336     if ($cli_opts{'stub'}) {
 337         $stub = 1;
 338     }
 339     if ($cli_opts{'html4tags'}) {        # Use HTML tag style instead of XHTML
 340         $options{empty_element_suffix} = ">";
 341         $stub = -$stub;
 342     }
 343     if ($cli_opts{'deprecated'}) {       # Allow <dir> and <menu> tags to pass through
 344         _SetAllowedTag("dir");
 345         _SetAllowedTag("menu");
 346     }
 347     $options{sanitize} = 1; # sanitize by default
 348     if ($cli_opts{'no-sanitize'}) {  # Do not sanitize
 349         $options{sanitize} = 0;
 350     }
 351     if ($cli_opts{'sanitize'}) {  # --sanitize always wins
 352         $options{sanitize} = 1;
 353     }
 354     $options{xmlcheck} = $options{sanitize} ? 2 : 0;
 355     if ($cli_opts{'no-validate-xml'}) {  # Do not validate XML
 356         $options{xmlcheck} = 0;
 357     }
 358     if ($cli_opts{'validate-xml'}) {  # Validate XML output
 359         $options{xmlcheck} = 1;
 360     }
 361     if ($cli_opts{'validate-xml-internal'}) {  # Validate XML output internally
 362         $options{xmlcheck} = 2;
 363     }
 364     die "--html4tags and --validate-xml are incompatible\n"
 365         if $cli_opts{'html4tags'} && $options{xmlcheck} == 1;
 366     die "--no-sanitize and --validate-xml-internal are incompatible\n"
 367         if !$options{'sanitize'} && $options{xmlcheck} == 2;
 368     if ($options{xmlcheck} == 1) {
 369         eval { require XML::Simple; 1 } and $hasxml = 1 or $hasxml_err = $@;
 370         eval { require XML::Parser; 1 } and $hasxmlp = 1 or $hasxmlp_err = $@ unless $hasxml;
 371         die "$hasxml_err$hasxmlp_err" unless $hasxml || $hasxmlp;
 372     }
 373     if ($cli_opts{'tabwidth'}) {
 374         my $tw = $cli_opts{'tabwidth'};
 375         die "invalid tab width (must be integer)\n" unless looks_like_number $tw;
 376         die "invalid tab width (must be >= 2 and <= 32)\n" unless $tw >= 2 && $tw <= 32;
 377         $options{tab_width} = int(0+$tw);
 378     }
 379     $options{abs_prefix} = "";          # no abs prefix by default
 380     if ($cli_opts{'absroot'}) {         # Use abs prefix for absolute path URLs
 381         my $abs = $cli_opts{'absroot'};
 382         $abs =~ s{/+$}{};
 383         $options{abs_prefix} = $abs;
 384     }
 385     $options{base_prefix} = "";         # no base prefix by default
 386     if ($cli_opts{'base'}) {            # Use base prefix for fragment URLs
 387         $options{base_prefix} = $cli_opts{'base'};
 388     }
 389     if ($cli_opts{'htmlroot'}) {         # Use URL prefix
 390         $options{url_prefix} = $cli_opts{'htmlroot'};
 391     }
 392     if ($cli_opts{'imageroot'}) {        # Use image URL prefix
 393         $options{img_prefix} = $cli_opts{'imageroot'};
 394     }
 395     if (exists $cli_opts{'wiki'}) {      # Enable wiki links
 396         my $wpat = $cli_opts{'wiki'};
 397         defined($wpat) or $wpat = "";
 398         my $wopt = "s(:md)";
 399         if ($wpat =~ /^(.*?)%\{((?:[0-9A-Za-z]|[Ss]\([^)]*\))*)\}(.*)$/) {
 400             $options{wikipat} = $1 . "%{}" . $3;
 401             $wopt = $2;
 402         } else {
 403             $options{wikipat} = $wpat . "%{}.html";
 404         }
 405         my $sval = 1;
 406         while ($wopt =~ /^(.*?)s\(([^)]*)\)(.*)$/i) {
 407             my $sarg = $2;
 408             $wopt = $1 . "s" . $3;
 409             $sarg =~ s/^\s+//; $sarg =~ s/\s+$//;
 410             $sval = {} unless ref($sval) eq "HASH";
 411             s/^\.//, $sval->{lc($_)}=1 foreach split(/(?:\s*,\s*)|(?:(?<!,)\s+(?!,))/, $sarg);
 412             $sval = 1 unless scalar(keys(%$sval));
 413         }
 414         $options{wikiopt} = { map({$_ => 1} split(//,lc($wopt))) };
 415         if (ref($sval) eq "HASH" && $sval->{':md'}) {
 416             delete $sval->{':md'};
 417             $sval->{$_} = 1 foreach qw(md rmd mkd mkdn mdwn mdown markdown litcoffee);
 418         }
 419         $options{wikiopt}->{'s'} = $sval if $options{wikiopt}->{'s'};
 420     }
 421     if ($cli_opts{'raw'}) {
 422         $raw = 1;
 423     }
 424     if ($cli_opts{'stylesheet'}) {  # Display the style sheet
 425         $options{show_styles} = 1;
 426     }
 427     if ($cli_opts{'no-stylesheet'}) {  # Do not display the style sheet
 428         $options{show_styles} = 0;
 429     }
 430     $options{show_styles} = 1 if $stub && !defined($options{show_styles});
 431     $options{tab_width} = 8 unless defined($options{tab_width});
 432
 433     my $hdrf = sub {
 434         my $out = "";
 435         if ($stub > 0) {
 436             $out .= <<'HTML5';
 437 <!DOCTYPE html>
 438 <html xmlns="http://www.w3.org/1999/xhtml">
 439 <head>
 440 <meta charset="utf-8" />
 441 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 442 HTML5
 443         } elsif ($stub < 0) {
 444             $out .= <<'HTML4';
 445 <html>
 446 <head>
 447 <meta charset="utf-8">
 448 <meta http-equiv="content-type" content="text/html; charset=utf-8">
 449 HTML4
 450         }
 451         if ($stub && ($options{title} || $options{h1})) {
 452             my $title = $options{title};
 453             defined($title) && $title ne "" or $title = $options{h1};
 454             if (defined($title) && $title ne "") {
 455                 $title =~ s/&/&amp;/g;
 456                 $title =~ s/</&lt;/g;
 457                 $out .= "<title>$title</title>\n";
 458             }
 459         }
 460         if ($options{show_styles}) {
 461             my $stylesheet = $g_style_sheet;
 462             $stylesheet =~ s/%\(base\)/$g_style_prefix/g;
 463             $out .= $stylesheet;
 464         }
 465         if ($stub) {
 466             $out .= "</head>\n<body style=\"text-align:center\">\n" .
 467                 "<div style=\"display:inline-block;text-align:left;max-width:42pc\">\n";
 468         }
 469         $out;
 470     };
 471
 472     #### Process incoming text: ###########################
 473     my ($didhdr, $hdr, $result, $ftr) = (0, "", "", "");
 474     @ARGV or push(@ARGV, "-");
 475     foreach (@ARGV) {
 476         my ($fh, $contents, $oneresult);
 477         $_ eq "-" or open $fh, '<', $_ or fauxdie "could not open \"$_\": $!\n";
 478         {
 479             local $/; # Slurp the whole file
 480             $_ eq "-" and $contents = <STDIN>;
 481             $_ ne "-" and $contents = <$fh>;
 482         }
 483         defined($contents) or fauxdie "could not read \"$_\": $!\n";
 484         $_ eq "-" or close($fh);
 485         $oneresult = $raw ? ProcessRaw($contents, \%options) : Markdown($contents, \%options);
 486         $oneresult =~ s/\s+$//os;
 487         if ($oneresult ne "") {
 488             if (!$didhdr && !$raw) {
 489                 $hdr = &$hdrf();
 490                 $didhdr = 1;
 491             }
 492             $result .= $oneresult . "\n";
 493         }
 494     }
 495     $hdr = &$hdrf() unless $didhdr || $raw;
 496     $ftr = "</div>\n</body>\n</html>\n" if $stub && !$raw;
 497     if ($options{xmlcheck} == 1) {
 498         my ($good, $errs);
 499         if ($stub && !$raw) {
 500             ($good, $errs) = _xmlcheck($hdr.$result.$ftr);
 501         } else {
 502             ($good, $errs) = _xmlcheck("<div>".$result."</div>");
 503         }
 504         $good or die $errs;
 505     }
 506     print $hdr, $result, $ftr;
 507
 508     exit 0;
 509 }
 510
 511
 512 sub _xmlcheck {
 513         my $text = shift;
 514         my ($good, $errs);
 515         ($hasxml ? eval { XML::Simple::XMLin($text, KeepRoot => 1) && 1 } :
 516          eval {
 517                 my $p = XML::Parser->new(Style => 'Tree', ErrorContext => 1);
 518                 $p->parse($text) && 1;
 519         }) and $good = 1 or $errs = _trimerr($@);
 520         ($good, $errs);
 521 }
 522
 523
 524 sub _trimerr {
 525         my $err = shift;
 526         1 while $err =~ s{\s+at\s+\.?/[^,\s\n]+\sline\s+[0-9]+\.?(\n|$)}{$1}is;
 527         $err =~ s/\s+$//os;
 528         $err . "\n";
 529 }
 530
 531
 532 sub _PrepareInput {
 533     my $input = shift;
 534     defined $input or $input = "";
 535     {
 536         use bytes;
 537         $input =~ s/[\x00-\x08\x0B\x0E-\x1F\x7F]+//gso;
 538     }
 539     my $output;
 540     if (Encode::is_utf8($input) || utf8::decode($input)) {
 541         $output = $input;
 542     } else {
 543         $output = $encoder->decode($input, Encode::FB_DEFAULT);
 544     }
 545     # Standardize line endings:
 546     $output =~ s{\r\n}{\n}g;  # DOS to Unix
 547     $output =~ s{\r}{\n}g;    # Mac to Unix
 548     return $output;
 549 }
 550
 551
 552 sub ProcessRaw {
 553     my $text = _PrepareInput(shift);
 554
 555     %opt = (
 556         empty_element_suffix    => $g_empty_element_suffix,
 557     );
 558     my %args = ();
 559     if (ref($_[0]) eq "HASH") {
 560         %args = %{$_[0]};
 561     } else {
 562         %args = @_;
 563     }
 564     while (my ($k,$v) = each %args) {
 565         $opt{$k} = $v;
 566     }
 567     $opt{xmlcheck} = 0 unless looks_like_number($opt{xmlcheck});
 568
 569     # Sanitize all '<'...'>' tags if requested
 570     $text = _SanitizeTags($text, $opt{xmlcheck} == 2) if $opt{sanitize};
 571
 572     utf8::encode($text);
 573     return $text;
 574 }
 575
 576
 577 sub Markdown {
 578 #
 579 # Primary function. The order in which other subs are called here is
 580 # essential. Link and image substitutions need to happen before
 581 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 582 # and <img> tags get encoded.
 583 #
 584     my $text = _PrepareInput(shift);
 585
 586     # Any remaining arguments after the first are options; either a single
 587     # hashref or a list of name, value paurs.
 588     %opt = (
 589         # set initial defaults
 590         style_prefix            => $g_style_prefix,
 591         empty_element_suffix    => $g_empty_element_suffix,
 592         tab_width               => $g_tab_width,
 593         indent_width            => $g_indent_width,
 594         url_prefix              => "", # Prefixed to non-absolute URLs
 595         img_prefix              => "", # Prefixed to non-absolute image URLs
 596     );
 597     my %args = ();
 598     if (ref($_[0]) eq "HASH") {
 599         %args = %{$_[0]};
 600     } else {
 601         %args = @_;
 602     }
 603     while (my ($k,$v) = each %args) {
 604         $opt{$k} = $v;
 605     }
 606     $opt{xmlcheck} = 0 unless looks_like_number($opt{xmlcheck});
 607
 608     # Clear the globals. If we don't clear these, you get conflicts
 609     # from other articles when generating a page which contains more than
 610     # one article (e.g. an index page that shows the N most recent
 611     # articles):
 612     %g_urls = ();
 613     %g_titles = ();
 614     %g_anchors = ();
 615     %g_block_ids = ();
 616     %g_code_block_ids = ();
 617     %g_html_blocks = ();
 618     %g_code_blocks = ();
 619     $g_list_level = 0;
 620
 621     # Make sure $text ends with a couple of newlines:
 622     $text .= "\n\n";
 623
 624     # Handle backticks-delimited code blocks
 625     $text = _HashBTCodeBlocks($text);
 626
 627     # Convert all tabs to spaces.
 628     $text = _DeTab($text);
 629
 630     # Strip any lines consisting only of spaces.
 631     # This makes subsequent regexen easier to write, because we can
 632     # match consecutive blank lines with /\n+/ instead of something
 633     # contorted like / *\n+/ .
 634     $text =~ s/^ +$//mg;
 635
 636     # Turn block-level HTML blocks into hash entries
 637     $text = _HashHTMLBlocks($text);
 638
 639     # Strip link definitions, store in hashes.
 640     $text = _StripLinkDefinitions($text);
 641
 642     $text = _RunBlockGamut($text, 1);
 643
 644     # Remove indentation markers
 645     $text =~ s/\027+//gs;
 646
 647     # Unhashify code blocks
 648     $text =~ s/(\025\d+\026)/$g_code_blocks{$1}/g;
 649
 650     $text = _UnescapeSpecialChars($text);
 651
 652     $text .= "\n" unless $text eq "";
 653
 654     # Sanitize all '<'...'>' tags if requested
 655     $text = _SanitizeTags($text, $opt{xmlcheck} == 2) if $opt{sanitize};
 656
 657     utf8::encode($text);
 658     if (defined($opt{h1}) && $opt{h1} ne "" && ref($_[0]) eq "HASH") {
 659         utf8::encode($opt{h1});
 660         ${$_[0]}{h1} = $opt{h1}
 661     }
 662     return $text;
 663 }
 664
 665
 666 sub _HashBTCodeBlocks {
 667 #
 668 #   Process Markdown backticks (```) delimited code blocks
 669 #
 670     my $text = shift;
 671     my $less_than_indent = $opt{indent_width} - 1;
 672
 673     $text =~ s{
 674             (?:(?<=\n)|\A)
 675                 ([ ]{0,$less_than_indent})``(`+)[ \t]*(?:([\w.+-]+[#]?)[ \t]*)?\n
 676              ( # $4 = the code block -- one or more lines, starting with ```
 677               (?:
 678                 .*\n+
 679               )+?
 680              )
 681             # and ending with ``` or end of document
 682             (?:(?:[ ]{0,$less_than_indent}``\2[ \t]*(?:\n|\Z))|\Z)
 683         }{
 684             # $2 contains syntax highlighting to use if defined
 685             my $leadsp = length($1);
 686             my $codeblock = $4;
 687             $codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines
 688             $codeblock = _DeTab($codeblock, 8, $leadsp); # physical tab stops are always 8
 689             $codeblock =~ s/\A\n+//; # trim leading newlines
 690             $codeblock =~ s/\s+\z//; # trim trailing whitespace
 691             $codeblock = _EncodeCode($codeblock); # or run highlighter here
 692             $codeblock = "<div class=\"$opt{style_prefix}code-bt\"><pre style=\"display:none\"></pre><pre><code>"
 693                 . $codeblock . "\n</code></pre></div>";
 694
 695             my $key = block_id($codeblock);
 696             $g_html_blocks{$key} = $codeblock;
 697             "\n\n" . $key . "\n\n";
 698         }egmx;
 699
 700     return $text;
 701 }
 702
 703
 704 sub _StripLinkDefinitions {
 705 #
 706 # Strips link definitions from text, stores the URLs and titles in
 707 # hash references.
 708 #
 709     my $text = shift;
 710     my $less_than_indent = $opt{indent_width} - 1;
 711
 712     # Link defs are in the form: ^[id]: url "optional title"
 713     while ($text =~ s{
 714                         ^[ ]{0,$less_than_indent}\[(.+)\]: # id = $1
 715                           [ ]*
 716                           \n?               # maybe *one* newline
 717                           [ ]*
 718                         <?((?:\S(?:\\\n\s*[^\s"(])?)+?)>? # url = $2
 719                           [ ]*
 720                           \n?               # maybe one newline
 721                           [ ]*
 722                         (?:
 723                             (?<=\s)         # lookbehind for whitespace
 724                             (?:(['"])|(\()) # title quote char
 725                             (.+?)           # title = $5
 726                             (?(4)\)|\3)     # match same quote
 727                             [ ]*
 728                         )?  # title is optional
 729                         (?:\n+|\Z)
 730                     }
 731                     {}mx) {
 732         my $id = _strip(lc $1); # Link IDs are case-insensitive
 733         my $url = $2;
 734         my $title = _strip($5);
 735         $url =~ s/\\\n\s*//gs;
 736         if ($id ne "") {
 737                 # These values always get passed through _MakeATag or _MakeIMGTag later
 738                 $g_urls{$id} = $url;
 739                 if (defined($title) && $title ne "") {
 740                     $g_titles{$id} = $title;
 741                 }
 742         }
 743     }
 744
 745     return $text;
 746 }
 747
 748 my ($block_tags_a, $block_tags_b);
 749 BEGIN {
 750     $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/io;
 751     $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/io;
 752 }
 753
 754 sub _HashHTMLBlocks {
 755     my $text = shift;
 756     my $less_than_indent = $opt{indent_width} - 1;
 757     my $idt = "\027" x $g_list_level;
 758
 759     # Hashify HTML blocks:
 760     # We only want to do this for block-level HTML tags, such as headers,
 761     # lists, and tables. That's because we still want to wrap <p>s around
 762     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 763     # phrase emphasis, and spans. The list of tags we're looking for is
 764     # hard-coded:
 765
 766     # First, look for nested blocks, e.g.:
 767     #   <div>
 768     #       <div>
 769     #       tags for inner block must be indented.
 770     #       </div>
 771     #   </div>
 772     #
 773     # The outermost tags must start at the left margin for this to match, and
 774     # the inner nested divs must be indented.
 775     # We need to do this before the next, more liberal match, because the next
 776     # match will start at the first `<div>` and stop at the first `</div>`.
 777     $text =~ s{
 778                 (                       # save in $1
 779                     ^                   # start of line (with /m)
 780                     ((?:\Q$idt\E)?)     # optional lead in = $2
 781                     <($block_tags_a)    # start tag = $3
 782                     \b                  # word break
 783                     (?:.*\n)*?          # any number of lines, minimally matching
 784                     \2</\3>             # the matching end tag
 785                     [ ]*                # trailing spaces
 786                     (?=\n+|\Z) # followed by a newline or end of document
 787                 )
 788             }{
 789                 my $key = block_id($1);
 790                 $g_html_blocks{$key} = $1;
 791                 "\n\n" . $key . "\n\n";
 792             }eigmx;
 793
 794
 795     #
 796     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 797     #
 798     $text =~ s{
 799                 (                       # save in $1
 800                     ^                   # start of line (with /m)
 801                     (?:\Q$idt\E)?       # optional lead in
 802                     <($block_tags_b)    # start tag = $2
 803                     \b                  # word break
 804                     (?:.*\n)*?          # any number of lines, minimally matching
 805                     .*</\2>             # the matching end tag
 806                     [ ]*                # trailing spaces
 807                     (?=\n+|\Z) # followed by a newline or end of document
 808                 )
 809             }{
 810                 my $key = block_id($1);
 811                 $g_html_blocks{$key} = $1;
 812                 "\n\n" . $key . "\n\n";
 813             }eigmx;
 814     # Special case just for <hr />. It was easier to make a special case than
 815     # to make the other regex more complicated.
 816     $text =~ s{
 817                 (?:
 818                     (?<=\n)         # Starting after end of line
 819                     |               # or
 820                     \A              # the beginning of the doc
 821                 )
 822                 (                       # save in $1
 823                     [ ]{0,$less_than_indent}
 824                     <(?:hr)             # start tag
 825                     \b                  # word break
 826                     (?:[^<>])*?         #
 827                     /?>                 # the matching end tag
 828                     [ ]*
 829                     (?=\n{1,}|\Z)       # followed by end of line or end of document
 830                 )
 831             }{
 832                 my $key = block_id($1);
 833                 $g_html_blocks{$key} = $1;
 834                 "\n\n" . $key . "\n\n";
 835             }eigx;
 836
 837     # Special case for standalone HTML comments:
 838     $text =~ s{
 839                 (?:
 840                     (?<=\n\n)       # Starting after a blank line
 841                     |               # or
 842                     \A\n?           # the beginning of the doc
 843                 )
 844                 (                   # save in $1
 845                     [ ]{0,$less_than_indent}
 846                     (?s:
 847                         <!--
 848                         (?:[^-]|(?:-(?!-)))*
 849                         -->
 850                     )
 851                     [ ]*
 852                     (?=\n{1,}|\Z)   # followed by end of line or end of document
 853                 )
 854             }{
 855                 my $key = block_id($1);
 856                 $g_html_blocks{$key} = $1;
 857                 "\n\n" . $key . "\n\n";
 858             }egx;
 859
 860
 861     return $text;
 862 }
 863
 864
 865 sub _RunBlockGamut {
 866 #
 867 # These are all the transformations that form block-level
 868 # tags like paragraphs, headers, and list items.
 869 #
 870     my ($text, $anchors) = @_;
 871
 872     $text = _DoHeaders($text, $anchors);
 873
 874     # Do Horizontal Rules:
 875     $text =~ s{^ {0,3}\*(?: {0,2}\*){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 876     $text =~ s{^ {0,3}\_(?: {0,2}\_){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 877     $text =~ s{^ {0,3}\-(?: {0,2}\-){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
 878
 879     $text = _DoListsAndBlocks($text);
 880
 881     $text = _DoTables($text);
 882
 883     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
 884     # was to escape raw HTML in the original Markdown source. This time,
 885     # we're escaping the markup we've just created, so that we don't wrap
 886     # <p> tags around block-level tags.
 887     $text = _HashHTMLBlocks($text);
 888
 889     $text = _FormParagraphs($text);
 890
 891     return $text;
 892 }
 893
 894
 895 sub _DoListBlocks {
 896     return _DoBlockQuotes(_DoCodeBlocks($_[0])) if $_[0] ne "";
 897 }
 898
 899
 900 sub _RunSpanGamut {
 901 #
 902 # These are all the transformations that occur *within* block-level
 903 # tags like paragraphs, headers, and list items.
 904 #
 905     my $text = shift;
 906
 907     $text = _DoCodeSpans($text);
 908
 909     $text = _EscapeSpecialChars($text);
 910
 911     # Process anchor and image tags. Images must come first,
 912     # because ![foo][f] looks like an anchor.
 913     $text = _DoImages($text);
 914     $text = _DoAnchors($text);
 915
 916     # Make links out of things like `<http://example.com/>`
 917     # Must come after _DoAnchors(), because you can use < and >
 918     # delimiters in inline links like [this](<url>).
 919     $text = _DoAutoLinks($text);
 920
 921     $text = _EncodeAmpsAndAngles($text);
 922
 923     $text = _DoItalicsAndBoldAndStrike($text);
 924
 925     # Do hard breaks:
 926     $text =~ s/ {2,}\n/<br$opt{empty_element_suffix}\n/g;
 927
 928     return $text;
 929 }
 930
 931
 932 sub _EscapeSpecialChars {
 933     my $text = shift;
 934     my $tokens ||= _TokenizeHTML($text);
 935
 936     $text = ''; # rebuild $text from the tokens
 937 #   my $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
 938 #   my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
 939
 940     foreach my $cur_token (@$tokens) {
 941         if ($cur_token->[0] eq "tag") {
 942             # Within tags, encode *, _ and ~ so they don't conflict
 943             # with their use in Markdown for italics and strong.
 944             # We're replacing each such character with its
 945             # corresponding block id value; this is likely
 946             # overkill, but it should prevent us from colliding
 947             # with the escape values by accident.
 948             $cur_token->[1] =~ s!([*_~])!$g_escape_table{$1}!g;
 949             $text .= $cur_token->[1];
 950         } else {
 951             my $t = $cur_token->[1];
 952             $t = _EncodeBackslashEscapes($t);
 953             $text .= $t;
 954         }
 955     }
 956     return $text;
 957 }
 958
 959
 960 sub _ProcessWikiLink {
 961     my ($link_text, $link_loc) = @_;
 962     if (defined($link_loc) &&
 963         ($link_loc =~ m{^#\S*$} || $link_loc =~ m{^(?:http|ftp)s?://\S+$}i)) {
 964         # Return the new link
 965         return _MakeATag(_FindFragmentMatch($link_loc), $link_text);
 966     }
 967     if (!defined($link_loc) &&
 968         ($link_loc = _strip($link_text)) =~ m{^(?:http|ftp)s?://\S+$}i) {
 969         # Return the new link
 970         return _MakeATag($link_loc, $link_text);
 971     }
 972     return undef if $link_loc eq "" || $link_text eq "";
 973     if ($link_loc =~ /^[A-Za-z][A-Za-z0-9+.-]*:/os) {
 974         # Unrecognized scheme
 975         return undef;
 976     }
 977     if ($opt{wikipat}) {
 978         my $o = $opt{wikiopt};
 979         my $qsfrag = "";
 980         my $base = $link_loc;
 981         if ($link_loc =~ /^(.*?)([?#].*)$/os) {
 982             ($base, $qsfrag) = ($1, $2);
 983         }
 984         $base = _wxform($base);
 985         my $result = $opt{wikipat};
 986         $result =~ s/%\{\}/$base/;
 987         if ($qsfrag =~ /^([^#]*)(#.+)$/os) {
 988             my ($q,$f) = ($1,$2);
 989             #$f = _wxform($f) if $f =~ / /;
 990             $qsfrag = $q . $f;
 991         }
 992         $result .= $qsfrag;
 993         {
 994             use bytes;
 995             $result =~ s/%(?![0-9A-Fa-f]{2})/%25/sog;
 996             if ($o->{r}) {
 997                 $result =~
 998                 s/([\x00-\x1F <>"{}|\\^`x7F])/sprintf("%%%02X",ord($1))/soge;
 999             } else {
1000                 $result =~
1001                 s/([\x00-\x1F <>"{}|\\^`\x7F-\xFF])/sprintf("%%%02X",ord($1))/soge;
1002             }
1003             $result =~ s/(%(?![0-9A-F]{2})[0-9A-Fa-f]{2})/uc($1)/soge;
1004         }
1005         # Return the new link
1006         return _MakeATag($result, $link_text);
1007     }
1008     # leave it alone
1009     return undef;
1010 }
1011
1012
1013 sub _wxform {
1014     my $w = shift;
1015     my $o = $opt{wikiopt};
1016     my $opt_s = $o->{s};
1017     if ($opt_s) {
1018         if (ref($opt_s)) {
1019             if ($w =~ m{^(.*)[.]([^./]*)$}) {
1020                 my ($base, $ext) = ($1, $2);
1021                 $w = $base if $opt_s->{lc($ext)};
1022             }
1023         } else {
1024             $w =~ s{[.][^./]*$}{};
1025         }
1026     }
1027     $w =~ tr{/}{ } if $o->{f};
1028     $w =~ s{/+}{/}gos if !$o->{f} && !$o->{v};
1029     if ($o->{d}) {
1030         $w =~ tr{ }{-};
1031         $w =~ s/-+/-/gos unless $o->{v};
1032     } else {
1033         $w =~ tr{ }{_};
1034         $w =~ s/_+/_/gos unless $o->{v};
1035     }
1036     $w = uc($w) if $o->{u};
1037     $w = lc($w) if $o->{l};
1038     return $w;
1039 }
1040
1041
1042 # Return a suitably encoded <a...> tag string
1043 # On input NONE of $url, $text or $title should be xmlencoded
1044 # but $url should already be url-encoded if needed, but NOT g_escape_table'd
1045 sub _MakeATag {
1046     my ($url, $text, $title) = @_;
1047     defined($url) or $url="";
1048     defined($text) or $text="";
1049     defined($title) or $title="";
1050
1051     $url =~ m"^#" and $url = $opt{base_prefix} . $url;
1052     my $result = $g_escape_table{'<'}."a href=\"" . _EncodeAttText($url) . "\"";
1053     $title = _strip($title);
1054     $text =~ s{<(/?a)}{&lt;$1}sogi;
1055     $text = _DoItalicsAndBoldAndStrike($text);
1056     # We've got to encode any of these remaining to avoid
1057     # conflicting with other italics, bold and strike through.
1058     $text =~ s!([*_~])!$g_escape_table{$1}!g;
1059     $result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
1060     return $result . $g_escape_table{'>'} .
1061         $text . $g_escape_table{'<'}."/a".$g_escape_table{'>'};
1062 }
1063
1064
1065 sub _DoAnchors {
1066 #
1067 # Turn Markdown link shortcuts into XHTML <a> tags.
1068 #
1069     my $text = shift;
1070
1071     #
1072     # First, handle wiki-style links: [[wiki style link]]
1073     #
1074     $text =~ s{
1075         (                   # wrap whole match in $1
1076           \[\[
1077             ($g_nested_brackets) # link text and id = $2
1078           \]\]
1079         )
1080     }{
1081         my $result;
1082         my $whole_match = $1;
1083         my $link_text   = $2;
1084         my $link_loc    = undef;
1085
1086         if ($link_text =~ /^(.*)\|(.*)$/s) {
1087             $link_text = $1;
1088             $link_loc = _strip($2);
1089         }
1090
1091         $result = _ProcessWikiLink($link_text, $link_loc);
1092         defined($result) or $result = $whole_match;
1093         $result;
1094     }xsge;
1095
1096     #
1097     # Next, handle reference-style links: [link text] [id]
1098     #
1099     $text =~ s{
1100         (                   # wrap whole match in $1
1101           \[
1102             ($g_nested_brackets) # link text = $2
1103           \]
1104
1105           [ ]?              # one optional space
1106           (?:\n[ ]*)?       # one optional newline followed by spaces
1107
1108           \[
1109             ($g_nested_brackets) # id = $3
1110           \]
1111         )
1112     }{
1113         my $result;
1114         my $whole_match = $1;
1115         my $link_text   = $2;
1116         my $link_id     = $3;
1117
1118         $link_id ne "" or $link_id = $link_text; # for shortcut links like [this][].
1119         $link_id = _strip(lc $link_id);
1120
1121         if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
1122             my $url = $g_urls{$link_id};
1123             defined($url) or $url = $g_anchors{$link_id};
1124             $url = _FindFragmentMatch($url);
1125             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
1126             $result = _MakeATag(_PrefixURL($url), $link_text, $g_titles{$link_id});
1127         }
1128         else {
1129             $result = $whole_match;
1130         }
1131         $result;
1132     }xsge;
1133
1134     #
1135     # Subsequently, inline-style links: [link text](url "optional title")
1136     #
1137     $text =~ s{
1138         (               # wrap whole match in $1
1139           \[
1140             ($g_nested_brackets) # link text = $2
1141           \]
1142           \(            # literal paren
1143             ($g_nested_parens) # href and optional title = $3
1144           \)
1145         )
1146     }{
1147         #my $result;
1148         my $whole_match = $1;
1149         my $link_text   = $2;
1150         my ($url, $title) = _SplitUrlTitlePart($3);
1151
1152         if (defined($url)) {
1153             $url = _FindFragmentMatch($url);
1154             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
1155             _MakeATag(_PrefixURL($url), $link_text, $title);
1156         } else {
1157             # The href/title part didn't match the pattern
1158             $whole_match;
1159         }
1160     }xsge;
1161
1162     #
1163     # Finally, handle reference-style implicit shortcut links: [link text]
1164     #
1165     $text =~ s{
1166         (                   # wrap whole match in $1
1167           \[
1168             ($g_nested_brackets) # link text = $2
1169           \]
1170         )
1171     }{
1172         my $result;
1173         my $whole_match = $1;
1174         my $link_text   = $2;
1175         my $link_id     = _strip(lc $2);
1176
1177         if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
1178             my $url = $g_urls{$link_id};
1179             defined($url) or $url = $g_anchors{$link_id};
1180             $url = _FindFragmentMatch($url);
1181             $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
1182             $result = _MakeATag(_PrefixURL($url), $link_text, $g_titles{$link_id});
1183         }
1184         else {
1185             $result = $whole_match;
1186         }
1187         $result;
1188     }xsge;
1189
1190     return $text;
1191 }
1192
1193
1194 sub _PeelWrapped {
1195     defined($_[0]) or return undef;
1196     if (substr($_[0],0,1) eq "(") {
1197         return substr($_[0], 1, length($_[0]) - (substr($_[0], -1, 1) eq ")" ? 2 : 1));
1198     }
1199     return $_[0];
1200 }
1201
1202
1203 sub _SplitUrlTitlePart {
1204     return ("", undef) if $_[0] =~ m{^\s*$}; # explicitly allowed
1205     my $u = $_[0];
1206     $u =~ s/^\s*(['\042])/# $1/;
1207     if ($u =~ m{
1208         ^               # match beginning
1209         \s*?
1210         <?([^\s'\042]\S*?)>? # URL = $1
1211         (?:             # optional grouping
1212           \s+           # must be distinct from URL
1213           (['\042]?)    # quote char = $2
1214           (.*?)         # Title = $3
1215           \2?           # matching quote
1216         )?              # title is optional
1217         \s*
1218         \z              # match end
1219     }osx) {
1220         return (undef, undef) if $_[1] && ($1 eq "" || $1 eq "#");
1221         return (_PeelWrapped($1), $2 ? $3 : _PeelWrapped($3));
1222     } else {
1223         return (undef, undef);
1224     }
1225 }
1226
1227
1228 sub _FindFragmentMatch {
1229     my $url = shift;
1230     if (defined($url) && $url =~ /^#\S/) {
1231         # try very hard to find a match
1232         my $idbase = _strip(lc(substr($url, 1)));
1233         my $idbase0 = $idbase;
1234         my $id = _MakeAnchorId($idbase);
1235         if (defined($g_anchors_id{$id})) {
1236             $url = $g_anchors_id{$id};
1237         } else {
1238             $idbase =~ s/-/_/gs;
1239             $id = _MakeAnchorId($idbase);
1240             if (defined($g_anchors_id{$id})) {
1241                 $url = $g_anchors_id{$id};
1242             } else {
1243                 $id = _MakeAnchorId($idbase0, 1);
1244                 if (defined($g_anchors_id{$id})) {
1245                     $url = $g_anchors_id{$id};
1246                 } else {
1247                     $id = _MakeAnchorId($idbase, 1);
1248                     if (defined($g_anchors_id{$id})) {
1249                         $url = $g_anchors_id{$id};
1250                     }
1251                 }
1252             }
1253         }
1254     }
1255     return $url;
1256 }
1257
1258
1259 # Return a suitably encoded <img...> tag string
1260 # On input NONE of $url, $alt or $title should be xmlencoded
1261 # but $url should already be url-encoded if needed, but NOT g_escape_table'd
1262 sub _MakeIMGTag {
1263     my ($url, $alt, $title) = @_;
1264     defined($url) or $url="";
1265     defined($alt) or $alt="";
1266     defined($title) or $title="";
1267     return "" unless $url ne "";
1268
1269     my $result = $g_escape_table{'<'}."img src=\"" . _EncodeAttText($url) . "\"";
1270     my ($w, $h) = (0, 0);
1271     ($alt, $title) = (_strip($alt), _strip($title));
1272     if ($title =~ /^(.*)\(([1-9][0-9]*)[xX]([1-9][0-9]*)\)$/os) {
1273         ($title, $w, $h) = (_strip($1), $2, $3);
1274     } elsif ($title =~ /^(.*)\(\?[xX]([1-9][0-9]*)\)$/os) {
1275         ($title, $h) = (_strip($1), $2);
1276     } elsif ($title =~ /^(.*)\(([1-9][0-9]*)[xX]\?\)$/os) {
1277         ($title, $w) = (_strip($1), $2);
1278     }
1279     $result .= " alt=\"" . _EncodeAttText($alt) . "\"" if $alt ne "";
1280     $result .= " width=\"$w\"" if $w != 0;
1281     $result .= " height=\"$h\"" if $h != 0;
1282     $result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
1283     $result .= " /" unless $opt{empty_element_suffix} eq ">";
1284     $result .= $g_escape_table{'>'};
1285     return $result;
1286 }
1287
1288
1289 sub _DoImages {
1290 #
1291 # Turn Markdown image shortcuts into <img> tags.
1292 #
1293     my $text = shift;
1294
1295     #
1296     # First, handle reference-style labeled images: ![alt text][id]
1297     #
1298     $text =~ s{
1299         (               # wrap whole match in $1
1300           !\[
1301             ($g_nested_brackets) # alt text = $2
1302           \]
1303
1304           [ ]?          # one optional space
1305           (?:\n[ ]*)?   # one optional newline followed by spaces
1306
1307           \[
1308             ($g_nested_brackets) # id = $3
1309           \]
1310
1311         )
1312     }{
1313         my $result;
1314         my $whole_match = $1;
1315         my $alt_text    = $2;
1316         my $link_id     = $3;
1317
1318         $link_id ne "" or $link_id = $alt_text; # for shortcut links like ![this][].
1319         $link_id = _strip(lc $link_id);
1320
1321         if (defined $g_urls{$link_id}) {
1322             $result = _MakeIMGTag(
1323                 _PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
1324         }
1325         else {
1326             # If there's no such link ID, leave intact:
1327             $result = $whole_match;
1328         }
1329
1330         $result;
1331     }xsge;
1332
1333     #
1334     # Next, handle inline images:  ![alt text](url "optional title")
1335     # Don't forget: encode * and _
1336
1337     $text =~ s{
1338         (               # wrap whole match in $1
1339           !\[
1340             ($g_nested_brackets) # alt text = $2
1341           \]
1342           \(            # literal paren
1343             ($g_nested_parens) # src and optional title = $3
1344           \)
1345         )
1346     }{
1347         my $whole_match = $1;
1348         my $alt_text    = $2;
1349         my ($url, $title) = _SplitUrlTitlePart($3, 1);
1350         defined($url) ?  _MakeIMGTag(_PrefixURL($url), $alt_text, $title) : $whole_match;
1351     }xsge;
1352
1353     #
1354     # Finally, handle reference-style implicitly labeled links: ![alt text]
1355     #
1356     $text =~ s{
1357         (               # wrap whole match in $1
1358           !\[
1359             ($g_nested_brackets) # alt text = $2
1360           \]
1361         )
1362     }{
1363         my $result;
1364         my $whole_match = $1;
1365         my $alt_text    = $2;
1366         my $link_id     = lc(_strip($alt_text));
1367
1368         if (defined $g_urls{$link_id}) {
1369             $result = _MakeIMGTag(
1370                 _PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
1371         }
1372         else {
1373             # If there's no such link ID, leave intact:
1374             $result = $whole_match;
1375         }
1376
1377         $result;
1378     }xsge;
1379
1380     return $text;
1381 }
1382
1383 sub _EncodeAttText {
1384     my $text = shift;
1385     defined($text) or return undef;
1386     $text = _HTMLEncode(_strip($text));
1387     # We've got to encode these to avoid conflicting
1388     # with italics, bold and strike through.
1389     $text =~ s!([*_~:])!$g_escape_table{$1}!g;
1390     return $text;
1391 }
1392
1393
1394 sub _MakeAnchorId {
1395     use bytes;
1396     my ($link, $strip) = @_;
1397     $link = lc($link);
1398     if ($strip) {
1399         $link =~ s/\s+/_/gs;
1400         $link =~ tr/-a-z0-9_//cd;
1401     } else {
1402         $link =~ tr/-a-z0-9_/_/cs;
1403     }
1404     return '' unless $link ne '';
1405     $link = "_".$link."_";
1406     $link =~ s/__+/_/gs;
1407     $link = "_".md5_hex($link)."_" if length($link) > 66;
1408     return $link;
1409 }
1410
1411
1412 sub _GetNewAnchorId {
1413     my $link = _strip(lc(shift));
1414     return '' if $link eq "" || defined($g_anchors{$link});
1415     my $id = _MakeAnchorId($link);
1416     return '' unless $id;
1417     $g_anchors{$link} = '#'.$id;
1418     $g_anchors_id{$id} = $g_anchors{$link};
1419     if ($id =~ /-/) {
1420         my $id2 = $id;
1421         $id2 =~ s/-/_/gs;
1422         $id2 =~ s/__+/_/gs;
1423         defined($g_anchors_id{$id2}) or $g_anchors_id{$id2} = $g_anchors{$link};
1424     }
1425     my $idd = _MakeAnchorId($link, 1);
1426     if ($idd) {
1427         defined($g_anchors_id{$idd}) or $g_anchors_id{$idd} = $g_anchors{$link};
1428         if ($idd =~ /-/) {
1429             my $idd2 = $idd;
1430             $idd2 =~ s/-/_/gs;
1431             $idd2 =~ s/__+/_/gs;
1432             defined($g_anchors_id{$idd2}) or $g_anchors_id{$idd2} = $g_anchors{$link};
1433         }
1434     }
1435     $id;
1436 }
1437
1438
1439 sub _DoHeaders {
1440     my ($text, $anchors) = @_;
1441     my $h1;
1442     my $geth1 = $anchors && !defined($opt{h1}) ? sub {
1443         return unless !defined($h1);
1444         my $h = shift;
1445         return unless defined($h) && $h !~ /^\s*$/;
1446         $h = _StripTags(_UnescapeSpecialChars($h));
1447         $h =~ s/^\s+//;
1448         $h =~ s/\s+$//;
1449         $h =~ s/\s+/ /g;
1450         $h1 = $h if $h ne "";
1451     } : sub {};
1452
1453     # atx-style headers:
1454     #   # Header 1
1455     #   ## Header 2
1456     #   ## Header 2 with closing hashes ##
1457     #   ...
1458     #   ###### Header 6
1459     #
1460     $text =~ s{
1461             ^(\#{1,6})  # $1 = string of #'s
1462             [ ]*
1463             ((?:(?:(?<![#])[^\s]|[^#\s]).*?)?) # $2 = Header text
1464             [ ]*
1465             \n+
1466         }{
1467             my $h_level = length($1);
1468             my $h = $2;
1469             $h =~ s/#+$//;
1470             $h =~ s/\s+$//;
1471             my $id = $h eq "" ? "" : _GetNewAnchorId($h);
1472             $id = " id=\"$id\"" if $id ne "";
1473             my $rsg = _RunSpanGamut($h);
1474             &$geth1($rsg) if $h_level == 1 && $h ne "";
1475             "<h$h_level$id>" . $rsg . "</h$h_level>\n\n";
1476         }egmx;
1477
1478     # Setext-style headers:
1479     #     Header 1
1480     #     ========
1481     #
1482     #     Header 2
1483     #     --------
1484     #
1485     #     Header 3
1486     #     ~~~~~~~~
1487     #
1488     $text =~ s{ ^(?:=+[ ]*\n)?[ ]*(.+?)[ ]*\n=+[ ]*\n+ }{
1489         my $h = $1;
1490         my $id = _GetNewAnchorId($h);
1491         $id = " id=\"$id\"" if $id ne "";
1492         my $rsg = _RunSpanGamut($h);
1493         &$geth1($rsg);
1494         "<h1$id>" . $rsg . "</h1>\n\n";
1495     }egmx;
1496
1497     $text =~ s{ ^(?:-+[ ]*\n)?[ ]*(.+?)[ ]*\n-+[ ]*\n+ }{
1498         my $h = $1;
1499         my $id = _GetNewAnchorId($h);
1500         $id = " id=\"$id\"" if $id ne "";
1501         "<h2$id>" . _RunSpanGamut($h) . "</h2>\n\n";
1502     }egmx;
1503
1504     $text =~ s{ ^(?:~+[ ]*\n)?[ ]*(.+?)[ ]*\n~+[ ]*\n+ }{
1505         my $h = $1;
1506         my $id = _GetNewAnchorId($h);
1507         $id = " id=\"$id\"" if $id ne "";
1508         "<h3$id>" . _RunSpanGamut($h) . "</h3>\n\n";
1509     }egmx;
1510
1511     $opt{h1} = $h1 if defined($h1) && $h1 ne "";
1512     return $text;
1513 }
1514
1515
1516 my ($marker_ul, $marker_ol, $marker_any, $roman_numeral, $greek_lower);
1517 BEGIN {
1518     # Re-usable patterns to match list item bullets and number markers:
1519     $roman_numeral = qr/(?:
1520         [IiVvXx]|[Ii]{2,3}|[Ii][VvXx]|[VvXx][Ii]{1,3}|[Xx][Vv][Ii]{0,3}|
1521         [Xx][Ii][VvXx]|[Xx]{2}[Ii]{0,3}|[Xx]{2}[Ii]?[Vv]|[Xx]{2}[Vv][Ii]{1,2})/ox;
1522     $greek_lower = qr/(?:[\x{03b1}-\x{03c9}])/o;
1523     $marker_ul  = qr/[*+-]/o;
1524     $marker_ol  = qr/(?:\d+|[A-Za-z]|$roman_numeral|$greek_lower)[.\)]/o;
1525     $marker_any = qr/(?:$marker_ul|$marker_ol)/o;
1526 }
1527
1528
1529 sub _GetListMarkerType {
1530     my ($list_type, $list_marker, $last_marker) = @_;
1531     return "" unless $list_type && $list_marker && lc($list_type) eq "ol";
1532     my $last_marker_type = '';
1533     $last_marker_type = _GetListMarkerType($list_type, $last_marker)
1534         if defined($last_marker) &&
1535             # these are roman unless $last_marker type case matches and is 'a' or 'A'
1536             $list_marker =~ /^[IiVvXx][.\)]?$/;
1537     return "I" if $list_marker =~ /^[IVX]/ && $last_marker_type ne 'A';
1538     return "i" if $list_marker =~ /^[ivx]/ && $last_marker_type ne 'a';
1539     return "A" if $list_marker =~ /^[A-Z]/;
1540     return "a" if $list_marker =~ /^[a-z]/ || $list_marker =~ /^$greek_lower/o;
1541     return "1";
1542 }
1543
1544
1545 sub _GetListItemTypeClass {
1546     my ($list_type, $list_marker, $last_marker) = @_;
1547     my $list_marker_type = _GetListMarkerType($list_type, $list_marker, $last_marker);
1548     my $ans = &{sub{
1549         return "" unless length($list_marker) >= 2 && $list_marker_type =~ /^[IiAa1]$/;
1550         return "lower-greek" if $list_marker_type eq "a" && $list_marker =~ /^$greek_lower/o;
1551         return "" unless $list_marker =~ /\)$/;
1552         return "upper-roman" if $list_marker_type eq "I";
1553         return "lower-roman" if $list_marker_type eq "i";
1554         return "upper-alpha" if $list_marker_type eq "A";
1555         return "lower-alpha" if $list_marker_type eq "a";
1556         return "decimal";
1557     }};
1558     return ($list_marker_type, $ans);
1559 }
1560
1561
1562 my %_roman_number_table;
1563 BEGIN {
1564     %_roman_number_table = (
1565         i       =>  1,
1566         ii      =>  2,
1567         iii     =>  3,
1568         iv      =>  4,
1569         v       =>  5,
1570         vi      =>  6,
1571         vii     =>  7,
1572         viii    =>  8,
1573         ix      =>  9,
1574         x       => 10,
1575         xi      => 11,
1576         xii     => 12,
1577         xiii    => 13,
1578         xiv     => 14,
1579         xv      => 15,
1580         xvi     => 16,
1581         xvii    => 17,
1582         xviii   => 18,
1583         xix     => 19,
1584         xx      => 20,
1585         xxi     => 21,
1586         xxii    => 22,
1587         xxiii   => 23,
1588         xxiv    => 24,
1589         xxv     => 25,
1590         xxvi    => 26,
1591         xxvii   => 27
1592     );
1593 }
1594
1595
1596 # Necessary because ς and σ are the same value grrr
1597 my %_greek_number_table;
1598 BEGIN {
1599     %_greek_number_table = (
1600         "\x{03b1}" =>  1, # α
1601         "\x{03b2}" =>  2, # β
1602         "\x{03b3}" =>  3, # γ
1603         "\x{03b4}" =>  4, # δ
1604         "\x{03b5}" =>  5, # ε
1605         "\x{03b6}" =>  6, # ζ
1606         "\x{03b7}" =>  7, # η
1607         "\x{03b8}" =>  8, # θ
1608         "\x{03b9}" =>  9, # ι
1609         "\x{03ba}" => 10, # κ
1610         "\x{03bb}" => 11, # λ
1611         #"\x{00b5}"=> 12, # µ is "micro" not "mu"
1612         "\x{03bc}" => 12, # μ
1613         "\x{03bd}" => 13, # ν
1614         "\x{03be}" => 14, # ξ
1615         "\x{03bf}" => 15, # ο
1616         "\x{03c0}" => 16, # π
1617         "\x{03c1}" => 17, # ρ
1618         "\x{03c2}" => 18, # ς
1619         "\x{03c3}" => 18, # σ
1620         "\x{03c4}" => 19, # τ
1621         "\x{03c5}" => 20, # υ
1622         "\x{03c6}" => 21, # φ
1623         "\x{03c7}" => 22, # χ
1624         "\x{03c8}" => 23, # ψ
1625         "\x{03c9}" => 24  # ω
1626     );
1627 }
1628
1629
1630 sub _GetMarkerIntegerNum {
1631     my ($list_marker_type, $marker_val) = @_;
1632     my $ans = &{sub{
1633         return 0 + $marker_val if $list_marker_type eq "1";
1634         $list_marker_type = lc($list_marker_type);
1635         return $_greek_number_table{$marker_val}
1636             if $list_marker_type eq "a" &&
1637             defined($_greek_number_table{$marker_val});
1638         $marker_val = lc($marker_val);
1639         return ord($marker_val) - ord("a") + 1 if $list_marker_type eq "a";
1640         return 1 unless $list_marker_type eq "i";
1641         defined($_roman_number_table{$marker_val}) and
1642             return $_roman_number_table{$marker_val};
1643         return 1;
1644     }};
1645     return $ans if $ans == 0 && $list_marker_type eq "1";
1646     return $ans >= 1 ? $ans : 1;
1647 }
1648
1649
1650 sub _IncrList {
1651     my ($from, $to, $extra) = @_;
1652     $extra = defined($extra) ? " $extra" : "";
1653     my $result = "";
1654     while ($from + 10 <= $to) {
1655         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-10\"></span>\n";
1656         $from += 10;
1657     }
1658     while ($from + 5 <= $to) {
1659         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-5\"></span>\n";
1660         $from += 5;
1661     }
1662     while ($from + 2 <= $to) {
1663         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-2\"></span>\n";
1664         $from += 2;
1665     }
1666     while ($from < $to) {
1667         $result .= "<span$extra class=\"$opt{style_prefix}ol-incr\"></span>\n";
1668         ++$from;
1669     }
1670     return $result;
1671 }
1672
1673
1674 sub _DoListsAndBlocks {
1675 #
1676 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1677 #
1678     my $text = shift;
1679     my $indent = $opt{indent_width};
1680     my $less_than_indent = $indent - 1;
1681     my $less_than_double_indent = 2 * $indent - 1;
1682
1683     # Re-usable pattern to match any entire ul or ol list:
1684     my $whole_list = qr{
1685         (                           # $1 (or $_[0]) = whole list
1686           (                         # $2 (or $_[1])
1687             (?:(?<=\n)|\A)
1688             [ ]{0,$less_than_indent}
1689             (${marker_any})         # $3 (or $_[2]) = first list item marker
1690             [ ]+
1691           )
1692           (?s:.+?)
1693           (                         # $4 (or $_[3])
1694               \z
1695             |
1696               \n{2,}
1697               (?=\S)
1698               (?!                   # Negative lookahead for another list item marker
1699                 ${marker_any}[ ]
1700               )
1701           )
1702         )
1703     }mx;
1704
1705     my $list_item_sub = sub {
1706         my $list = $_[0];
1707         my $list_type = ($_[2] =~ m/$marker_ul/) ? "ul" : "ol";
1708         my $list_att = "";
1709         my $list_class = "";
1710         my $list_incr = "";
1711         # Turn double returns into triple returns, so that we can make a
1712         # paragraph for the last item in a list, if necessary:
1713         $list =~ s/\n\n/\n\n\n/g;
1714         my ($result, $first_marker, $fancy) = _ProcessListItems($list_type, $list);
1715         defined($first_marker) or return $list;
1716         my $list_marker_type = _GetListMarkerType($list_type, $first_marker);
1717         if ($list_marker_type) {
1718                 $first_marker =~ s/[.\)]$//;
1719                 my $first_marker_num = _GetMarkerIntegerNum($list_marker_type, $first_marker);
1720                 $list_att = $list_marker_type eq "1" ? "" : " type=\"$list_marker_type\"";
1721                 if ($fancy) {
1722                     $list_class = " class=\"$opt{style_prefix}ol\"";
1723                     my $start = $first_marker_num;
1724                     $start = 10 if $start > 10;
1725                     $start = 5 if $start > 5 && $start < 10;
1726                     $start = 1 if $start > 1 && $start < 5;
1727                     $list_att .= " start=\"$start\"" unless $start == 1;
1728                     $list_incr = _IncrList($start, $first_marker_num);
1729                 } else {
1730                     $list_class = " class=\"$opt{style_prefix}lc-greek\""
1731                         if $list_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1732                     $list_att .= " start=\"$first_marker_num\"" unless $first_marker_num == 1;
1733                 }
1734         }
1735         my $idt = "\027" x $g_list_level;
1736         $result = "$idt<$list_type$list_att$list_class>\n$list_incr" . $result . "$idt</$list_type>\n\n";
1737         $result;
1738     };
1739
1740     # We use a different prefix before nested lists than top-level lists.
1741     # See extended comment in _ProcessListItems().
1742     #
1743     # Note: (jg) There's a bit of duplication here. My original implementation
1744     # created a scalar regex pattern as the conditional result of the test on
1745     # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1746     # substitution once, using the scalar as the pattern. This worked,
1747     # everywhere except when running under MT on my hosting account at Pair
1748     # Networks. There, this caused all rebuilds to be killed by the reaper (or
1749     # perhaps they crashed, but that seems incredibly unlikely given that the
1750     # same script on the same server ran fine *except* under MT. I've spent
1751     # more time trying to figure out why this is happening than I'd like to
1752     # admit. My only guess, backed up by the fact that this workaround works,
1753     # is that Perl optimizes the substition when it can figure out that the
1754     # pattern will never change, and when this optimization isn't on, we run
1755     # afoul of the reaper. Thus, the slightly redundant code to that uses two
1756     # static s/// patterns rather than one conditional pattern.
1757     #
1758     # Note: (kjm) With the addition of the two-of-the-same-kind-in-a-row-
1759     # starts-a-list-at-the-top-level rule the two patterns really are somewhat
1760     # different now, but the duplication has pretty much been eliminated via
1761     # use of a separate sub which has the side-effect of making the below
1762     # two cases much easier to grok all at once.
1763
1764     if ($g_list_level) {
1765         my $parse = $text;
1766         $text = "";
1767         pos($parse) = 0;
1768         while ($parse =~ /\G(?s:.)*?^$whole_list/gmc) {
1769             my @captures = ($1, $2, $3, $4);
1770             if ($-[1] > $-[0]) {
1771                 $text .= _DoListBlocks(substr($parse, $-[0], $-[1] - $-[0]));
1772             }
1773             $text .= &$list_item_sub(@captures);
1774         }
1775         $text .= _DoListBlocks(substr($parse, pos($parse))) if pos($parse) < length($parse);
1776     }
1777     else {
1778         my $parse = $text;
1779         $text = "";
1780         pos($parse) = 0;
1781         while ($parse =~ m{\G(?s:.)*?
1782                 (?: (?<=\n\n) |
1783                     \A\n? |
1784                     (?<=:\n) |
1785                     (?:(?<=\n) # a list starts with one unordered marker line
1786                        (?=[ ]{0,$less_than_indent}$marker_ul[ ])) |
1787                     (?:(?<=\n) # or two ordered marker lines in a row
1788                        (?=[ ]{0,$less_than_indent}$marker_ol[ ].*\n\n?
1789                           [ ]{0,$less_than_indent}$marker_ol[ ])) |
1790                     (?:(?<=\n) # or any marker and a sublist marker
1791                        (?=[ ]{0,$less_than_indent}$marker_any[ ].*\n\n?
1792                           [ ]{$indent,$less_than_double_indent}$marker_any[ ]))
1793                 )
1794                 $whole_list
1795             }gmcx) {
1796             my @captures = ($1, $2, $3, $4);
1797             if ($-[1] > $-[0]) {
1798                 $text .= _DoListBlocks(substr($parse, $-[0], $-[1] - $-[0]));
1799             }
1800             $text .= &$list_item_sub(@captures);
1801         }
1802         $text .= _DoListBlocks(substr($parse, pos($parse))) if pos($parse) < length($parse);
1803     }
1804
1805     return $text;
1806 }
1807
1808
1809 sub _ProcessListItems {
1810 #
1811 #   Process the contents of a single ordered or unordered list, splitting it
1812 #   into individual list items.
1813 #
1814
1815     my $list_type = shift;
1816     my $list_str = shift;
1817
1818     # The $g_list_level global keeps track of when we're inside a list.
1819     # Each time we enter a list, we increment it; when we leave a list,
1820     # we decrement. If it's zero, we're not in a list anymore.
1821     #
1822     # We do this because when we're not inside a list, we want to treat
1823     # something like this:
1824     #
1825     #   I recommend upgrading to version
1826     #   8. Oops, now this line is treated
1827     #   as a sub-list.
1828     #
1829     # As a single paragraph, despite the fact that the second line starts
1830     # with a digit-period-space sequence.
1831     #
1832     # Whereas when we're inside a list (or sub-list), that line will be
1833     # treated as the start of a sub-list. What a kludge, huh? This is
1834     # an aspect of Markdown's syntax that's hard to parse perfectly
1835     # without resorting to mind-reading. Perhaps the solution is to
1836     # change the syntax rules such that sub-lists must start with a
1837     # starting cardinal number; e.g. "1." or "a.".
1838
1839     $g_list_level++;
1840     my $idt = "\027" x $g_list_level;
1841     my $marker_kind = $list_type eq "ul" ? $marker_ul : $marker_ol;
1842     my $first_marker;
1843     my $first_marker_type;
1844     my $first_marker_num;
1845     my $last_marker;
1846     my $fancy;
1847     my $skipped;
1848     my $typechanged;
1849     my $next_num = 1;
1850
1851     # trim trailing blank lines:
1852     $list_str =~ s/\n{2,}\z/\n/;
1853
1854     my $result = "";
1855     my $oldpos = 0;
1856     pos($list_str) = 0;
1857     while ($list_str =~ m{\G            # start where we left off
1858         (\n+)?                          # leading line = $1
1859         (^[ ]*)                         # leading whitespace = $2
1860         ($marker_any) [ ] ([ ]*)        # list marker = $3 leading item space = $4
1861     }cgmx) {
1862         my $leading_line = $1;
1863         my $leading_space = $2;
1864         my $list_marker = $3;
1865         my $list_marker_len = length($list_marker);
1866         my $leading_item_space = $4;
1867         if ($-[0] > $oldpos) {
1868             $result .= substr($list_str, $oldpos, $-[0] - $oldpos); # Sort-of $`
1869             $oldpos = $-[0]; # point at start of this entire match
1870         }
1871         if (!defined($first_marker)) {
1872             $first_marker = $list_marker;
1873             $first_marker_type = _GetListMarkerType($list_type, $first_marker);
1874             if ($first_marker_type) {
1875                 (my $marker_val = $first_marker) =~ s/[.\)]$//;
1876                 $first_marker_num = _GetMarkerIntegerNum($first_marker_type, $marker_val);
1877                 $next_num = $first_marker_num;
1878                 $skipped = 1 if $next_num != 1;
1879             }
1880         } elsif ($list_marker !~ /$marker_kind/) {
1881             # Wrong marker kind, "fix up" the marker to a correct "lazy" marker
1882             # But keep the old length in $list_marker_len
1883             $list_marker = $last_marker;
1884         }
1885
1886         # Now grab the rest of this item's data upto but excluding the next
1887         # list marker at the SAME indent level, but sublists must be INCLUDED
1888
1889         my $item = "";
1890         while ($list_str =~ m{\G
1891             ((?:.+?)(?:\n{1,2}))        # list item text = $1
1892             (?= \n* (?: \z |            # end of string OR
1893                     (^[ ]*)             # leading whitespace = $2
1894                     ($marker_any)       # next list marker = $3
1895                     ([ ]+) ))           # one or more spaces after marker = $4
1896         }cgmxs) {
1897
1898             # If $3 has a left edge that is at the left edge of the previous
1899             # marker OR $3 has a right edge that is at the right edge of the
1900             # previous marker then we stop; otherwise we go on
1901
1902             $item .= substr($list_str, $-[0], $+[0] - $-[0]); # $&
1903             last if !defined($4) || length($2) == length($leading_space) ||
1904                 length($2) + length($3) == length($leading_space) + $list_marker_len;
1905             # move along, you're not the marker droid we're looking for...
1906             $item .= substr($list_str, $+[0], $+[4] - $+[0]);
1907             pos($list_str) = $+[4]; # ...move along over the marker droid
1908         }
1909         # Remember where we parked
1910         $oldpos = pos($list_str);
1911
1912         # Process the $list_marker $item
1913
1914         my $liatt = '';
1915         my $checkbox = '';
1916         my $incr = '';
1917
1918         if ($list_type eq "ul" && !$leading_item_space && $item =~ /^\[([ xX])\] +(.*)$/s) {
1919             my $checkmark = lc $1;
1920             $item = $2;
1921             my ($checkbox_class, $checkbox_val);
1922             if ($checkmark eq "x") {
1923                 ($checkbox_class, $checkbox_val) = ("checkbox-on", "x");
1924             } else {
1925                 ($checkbox_class, $checkbox_val) = ("checkbox-off", "&#160;");
1926             }
1927             $liatt = " class=\"$opt{style_prefix}$checkbox_class\"";
1928             $checkbox = "<span><span></span></span><span></span><span>[<tt>$checkbox_val</tt>]&#160;</span>";
1929         } else {
1930             my $list_marker_type;
1931             ($list_marker_type, $liatt) = _GetListItemTypeClass($list_type, $list_marker, $last_marker);
1932             if ($list_type eq "ol" && defined($first_marker)) {
1933                 my $styled = $fancy = 1 if $liatt && $list_marker =~ /\)$/;
1934                 my ($sfx, $dash) = ("", "");
1935                 ($sfx, $dash) = ("li", "-") if $styled;
1936                 if ($liatt =~ /lower/) {
1937                     $sfx .= "${dash}lc";
1938                 } elsif ($liatt =~ /upper/) {
1939                     $sfx .= "${dash}uc";
1940                 }
1941                 $sfx .= "-greek" if $liatt =~ /greek/;
1942                 $liatt = " class=\"$opt{style_prefix}$sfx\"" if $sfx;
1943                 $typechanged = 1 if $list_marker_type ne $first_marker_type;
1944                 (my $marker_val = $list_marker) =~ s/[.\)]$//;
1945                 my $marker_num = _GetMarkerIntegerNum($list_marker_type, $marker_val);
1946                 $marker_num = $next_num if $marker_num < $next_num;
1947                 $skipped = 1 if $next_num < $marker_num;
1948                 $incr = _IncrList($next_num, $marker_num, "incrlevel=$g_list_level");
1949                 $liatt = " value=\"$marker_num\"$liatt" if $fancy || $skipped;
1950                 $liatt = " type=\"$list_marker_type\"$liatt" if $styled || $typechanged;
1951                 $next_num = $marker_num + 1;
1952             }
1953         }
1954         $last_marker = $list_marker;
1955
1956         if ($leading_line or ($item =~ m/\n{2,}/)) {
1957             $item = _RunBlockGamut(_Outdent($item));
1958             $item =~ s{(</[OUou][Ll]>)\s*\z}{$1} and $item .= "\n$idt<span style=\"display:none\">&#160;</span>";
1959         }
1960         else {
1961             # Recursion for sub-lists:
1962             $item = _DoListsAndBlocks(_Outdent($item));
1963             chomp $item;
1964             $item = _RunSpanGamut($item);
1965         }
1966
1967         # Append to $result
1968         $result .= "$incr$idt<li$liatt>" . $checkbox . $item . "$idt</li>\n";
1969     }
1970     if ($fancy) {
1971         # remove "incrlevel=$g_list_level " parts
1972         $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr((?:-\d{1,2})?)">}
1973             {$idt<span class="$opt{style_prefix}ol-incr$1">}g;
1974     } else {
1975         # remove the $g_list_level incr spans entirely
1976         $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr(?:-\d{1,2})?"></span>\n}{}g;
1977         # remove the class="$opt{style_prefix}lc-greek" if first_marker is greek
1978         $result =~ s{(<li[^>]*?) class="$opt{style_prefix}lc-greek">}{$1>}g
1979             if defined($first_marker_type) && $first_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1980     }
1981
1982     # Anything left over (similar to $') goes into result, but this should always be empty
1983     $result .= _RunBlockGamut(substr($list_str, pos($list_str))) if pos($list_str) < length($list_str);
1984
1985     $g_list_level--;
1986
1987     # After all that, if we only got an ordered list with a single item
1988     # and its first marker is a four-digit number >= 1492 and <= 2999
1989     # or an UPPERCASE letter, then pretend we didn't see any list at all.
1990
1991     if ($first_marker_type && $first_marker_num + 1 == $next_num) {
1992         if (($first_marker_type eq "1" && $first_marker_num >= 1492 && $first_marker_num <= 2999) ||
1993             ($first_marker_type eq "A" && !$fancy)) {
1994             return (undef, undef, undef);
1995         }
1996     }
1997
1998     return ($result, $first_marker, $fancy);
1999 }
2000
2001
2002 sub _DoCodeBlocks {
2003 #
2004 #   Process Markdown `<pre><code>` blocks.
2005 #
2006
2007     my $text = shift;
2008
2009     $text =~ s{
2010             (?:\n\n|\A\n?)
2011             (           # $1 = the code block -- one or more lines, starting with indent_width spaces
2012               (?:
2013                 (?:[ ]{$opt{indent_width}})  # Lines must start with indent_width of spaces
2014                 .*\n+
2015               )+
2016             )
2017             ((?=^[ ]{0,$opt{indent_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
2018         }{
2019             my $codeblock = $1;
2020
2021             $codeblock =~ s/\n\n\n/\n\n/g; # undo "paragraph for last list item" change
2022             $codeblock = _EncodeCode(_Outdent($codeblock));
2023             $codeblock =~ s/\A\n+//; # trim leading newlines
2024             $codeblock =~ s/\s+\z//; # trim trailing whitespace
2025
2026             my $result = "<div class=\"$opt{style_prefix}code\"><pre style=\"display:none\"></pre><pre><code>"
2027                 . $codeblock . "\n</code></pre></div>";
2028             my $key = block_id($result, 2);
2029             $g_code_blocks{$key} = $result;
2030             "\n\n" . $key . "\n\n";
2031         }egmx;
2032
2033     return $text;
2034 }
2035
2036
2037 sub _DoCodeSpans {
2038 #
2039 # * Backtick quotes are used for <code></code> spans.
2040 #
2041 # * You can use multiple backticks as the delimiters if you want to
2042 #   include literal backticks in the code span. So, this input:
2043 #
2044 #     Just type ``foo `bar` baz`` at the prompt.
2045 #
2046 #   Will translate to:
2047 #
2048 #     <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
2049 #
2050 #   There's no arbitrary limit to the number of backticks you
2051 #   can use as delimters. If you need three consecutive backticks
2052 #   in your code, use four for delimiters, etc.
2053 #
2054 # * You can use spaces to get literal backticks at the edges:
2055 #
2056 #     ... type `` `bar` `` ...
2057 #
2058 #   Turns to:
2059 #
2060 #     ... type <code>`bar`</code> ...
2061 #
2062
2063     my $text = shift;
2064
2065     $text =~ s@
2066             (`+)        # $1 = Opening run of `
2067             (.+?)       # $2 = The code block
2068             (?<!`)
2069             \1          # Matching closer
2070             (?!`)
2071         @
2072             my $c = "$2";
2073             $c =~ s/^[ ]+//g; # leading whitespace
2074             $c =~ s/[ ]+$//g; # trailing whitespace
2075             $c = _EncodeCode($c);
2076             "<code>$c</code>";
2077         @egsx;
2078
2079     return $text;
2080 }
2081
2082
2083 sub _EncodeCode {
2084 #
2085 # Encode/escape certain characters inside Markdown code runs.
2086 # The point is that in code, these characters are literals,
2087 # and lose their special Markdown meanings.
2088 #
2089     local $_ = shift;
2090
2091     # Encode all ampersands; HTML entities are not
2092     # entities within a Markdown code span.
2093     s/&/&amp;/g;
2094
2095     # Encode $'s, but only if we're running under Blosxom.
2096     # (Blosxom interpolates Perl variables in article bodies.)
2097     s/\$/&#036;/g if $_haveBX;
2098
2099     # Do the angle bracket song and dance:
2100     s! <  !&lt;!gx;
2101     s! >  !&gt;!gx;
2102
2103     # Now, escape characters that are magic in Markdown:
2104     s!([*_~{}\[\]\\])!$g_escape_table{$1}!g;
2105
2106     return $_;
2107 }
2108
2109
2110 sub _DoItalicsAndBoldAndStrike {
2111     my $text = shift;
2112
2113     my $doital1 = sub {
2114         my $text = shift;
2115         $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
2116             {<em>$1</em>}gsx;
2117         # We've got to encode any of these remaining to
2118         # avoid conflicting with other italics and bold.
2119         $text =~ s!([*])!$g_escape_table{$1}!g;
2120         $text;
2121     };
2122     my $doital2 = sub {
2123         my $text = shift;
2124         $text =~ s{ (?<!\w) _ (?=\S) (.+?) (?<=\S) _ (?!\w) }
2125             {<em>$1</em>}gsx;
2126         # We've got to encode any of these remaining to
2127         # avoid conflicting with other italics and bold.
2128         $text =~ s!([_])!$g_escape_table{$1}!g;
2129         $text;
2130     };
2131
2132     # <strong> must go first:
2133     $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
2134         {"<strong>".&$doital1($1)."</strong>"}gsex;
2135     $text =~ s{ (?<!\w) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\w) }
2136         {"<strong>".&$doital2($1)."</strong>"}gsex;
2137
2138     $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
2139         {<strike>$1</strike>}gsx;
2140
2141     $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
2142         {<em>$1</em>}gsx;
2143     $text =~ s{ (?<!\w) _ (?=\S) (.+?) (?<=\S) _ (?!\w) }
2144         {<em>$1</em>}gsx;
2145
2146     return $text;
2147 }
2148
2149
2150 sub _DoBlockQuotes {
2151     my $text = shift;
2152
2153     $text =~ s{
2154           (                     # Wrap whole match in $1
2155             (
2156               ^[ ]*>[ ]?        # '>' at the start of a line
2157                 .*\n            # rest of the first line
2158               (.+\n)*           # subsequent consecutive lines
2159               \n*               # blanks
2160             )+
2161           )
2162         }{
2163             my $bq = $1;
2164             $bq =~ s/^[ ]*>[ ]?//gm; # trim one level of quoting
2165             $bq =~ s/^[ ]+$//mg;         # trim whitespace-only lines
2166             $bq = _RunBlockGamut($bq);   # recurse
2167
2168             $bq =~ s/^/\027/mg;
2169             "<blockquote>\n$bq\n</blockquote>\n\n";
2170         }egmx;
2171
2172
2173     return $text;
2174 }
2175
2176
2177 my ($LEAD, $TRAIL, $LEADBAR, $LEADSP, $COLPL, $SEP);
2178 BEGIN {
2179     $LEAD = qr/(?>[ ]*(?:\|[ ]*)?)/o;
2180     $TRAIL = qr/[ ]*(?<!\\)\|[ ]*/o;
2181     $LEADBAR = qr/(?>[ ]*\|[ ]*)/o;
2182     $LEADSP = qr/(?>[ ]*)/o;
2183     $COLPL = qr/(?:[^\n|\\]|\\(?:(?>[^\n])|(?=\n|$)))+/o;
2184     $SEP = qr/[ ]*:?-+:?[ ]*/o;
2185 }
2186
2187 sub _DoTables {
2188     my $text = shift;
2189
2190     $text =~ s{
2191         (                               # Wrap whole thing to avoid $&
2192          (?: (?<=\n\n) | \A\n? )        # Preceded by blank line or beginning of string
2193          ^(                             # Header line
2194             $LEADBAR \| [^\n]* |
2195             $LEADBAR $COLPL [^\n]* |
2196             $LEADSP $COLPL \| [^\n]*
2197           )\n
2198           (                             # Separator line
2199             $LEADBAR $SEP (?: \| $SEP )* (?: \| [ ]*)? |
2200             $SEP (?: \| $SEP )+ (?: \| [ ]*)? |
2201             $SEP \| [ ]*
2202           )\n
2203           ((?:                          # Rows (0+)
2204             $LEADBAR \| [^\n]* \n |
2205             $LEADBAR $COLPL [^\n]* \n |
2206             $LEADSP $COLPL \| [^\n]* \n
2207           )*)
2208         )
2209     } {
2210         my ($w, $h, $s, $rows) = ($1, $2, $3, $4);
2211         my @heads = _SplitTableRow($h);
2212         my @seps = _SplitTableRow($s);
2213         if (@heads == @seps) {
2214             my @align = map {
2215                 if (/^:-+:$/) {" align=\"center\""}
2216                 elsif (/^:/) {" align=\"left\""}
2217                 elsif (/:$/) {" align=\"right\""}
2218                 else {""}
2219             } @seps;
2220             my $nohdr = "";
2221             $nohdr = " $opt{style_prefix}table-nohdr" if join("", @heads) eq "";
2222             my $tab ="\n<table border=\"1\" cellspacing=\"0\" cellpadding=\"2\" class=\"$opt{style_prefix}table$nohdr\">\n";
2223             $tab .=
2224                 "  <tr class=\"$opt{style_prefix}row-hdr\">" . _MakeTableRow("th", \@align, @heads) . "</tr>\n"
2225                 unless $nohdr;
2226             my $cnt = 0;
2227             my @classes = ("class=\"$opt{style_prefix}row-even\"", "class=\"$opt{style_prefix}row-odd\"");
2228             $tab .= "  <tr " . $classes[++$cnt % 2] . ">" . _MakeTableRow("td", \@align, @$_) . "</tr>\n"
2229                     foreach (_SplitMergeRows($rows));
2230             $tab .= "</table>\n\n";
2231         } else {
2232             $w;
2233         }
2234     }egmx;
2235
2236     return $text;
2237 }
2238
2239
2240 sub _SplitMergeRows {
2241     my @rows = ();
2242     my ($mergeprev, $mergenext) = (0,0);
2243     foreach (split(/\n/, $_[0])) {
2244         $mergeprev = $mergenext;
2245         $mergenext = 0;
2246         my @cols = _SplitTableRow($_);
2247         if (_endswithbareslash($cols[$#cols])) {
2248             my $last = $cols[$#cols];
2249             substr($last, -1, 1) = "";
2250             $last =~ s/[ ]+$//;
2251             $cols[$#cols] = $last;
2252             $mergenext = 1;
2253         }
2254         if ($mergeprev) {
2255             for (my $i = 0; $i <= $#cols; ++$i) {
2256                 my $cell = $rows[$#rows]->[$i];
2257                 defined($cell) or $cell = "";
2258                 $rows[$#rows]->[$i] = _MergeCells($cell, $cols[$i]);
2259             }
2260         } else {
2261             push(@rows, [@cols]);
2262         }
2263     }
2264     return @rows;
2265 }
2266
2267
2268 sub _endswithbareslash {
2269     return 0 unless substr($_[0], -1, 1) eq "\\";
2270     my @parts = split(/\\\\/, $_[0], -1);
2271     return substr($parts[$#parts], -1, 1) eq "\\";
2272 }
2273
2274
2275 sub _MergeCells {
2276     my ($c1, $c2) = @_;
2277     return $c1 if $c2 eq "";
2278     return $c2 if $c1 eq "";
2279     return $c1 . " " . $c2;
2280 }
2281
2282
2283 sub _SplitTableRow {
2284     my $row = shift;
2285     $row =~ s/^$LEAD//;
2286     $row =~ s/$TRAIL$//;
2287     $row =~ s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
2288     $row =~ s!\\\|!$g_escape_table{'|'}!go; # Then do \|
2289     my @elems = map {
2290       s!$g_escape_table{'|'}!|!go;
2291       s!$g_escape_table{'\\'}!\\\\!go;
2292       s/^[ ]+//;
2293       s/[ ]+$//;
2294       $_;
2295     } split(/[ ]*\|[ ]*/, $row, -1);
2296     @elems or push(@elems, "");
2297     return @elems;
2298 }
2299
2300
2301 sub _MakeTableRow {
2302     my $etype = shift;
2303     my $align = shift;
2304     my $row = "";
2305     for (my $i = 0; $i < @$align; ++$i) {
2306         my $data = $_[$i];
2307         defined($data) or $data = "";
2308         $row .= "<" . $etype . $$align[$i] . ">" .
2309             _RunSpanGamut($data) . "</" . $etype . ">";
2310     }
2311     return $row;
2312 }
2313
2314
2315 sub _FormParagraphs {
2316 #
2317 # Params:
2318 #   $text - string to process with html <p> tags
2319 #
2320     my $text = shift;
2321
2322     # Strip leading and trailing lines:
2323     $text =~ s/\A\n+//;
2324     $text =~ s/\n+\z//;
2325
2326     my @grafs = split(/\n{2,}/, $text);
2327
2328     #
2329     # Wrap <p> tags.
2330     #
2331     foreach (@grafs) {
2332         unless (defined($g_html_blocks{$_}) || defined($g_code_blocks{$_})) {
2333             $_ = _RunSpanGamut($_);
2334             s/^([ ]*)/<p>/;
2335             $_ .= "</p>";
2336         }
2337     }
2338
2339     #
2340     # Unhashify HTML blocks
2341     #
2342     foreach (@grafs) {
2343         if (defined( $g_html_blocks{$_} )) {
2344             $_ = $g_html_blocks{$_};
2345         }
2346     }
2347
2348     return join "\n\n", @grafs;
2349 }
2350
2351
2352 my $g_possible_tag_name;
2353 my %ok_tag_name;
2354 BEGIN {
2355     # note: length("blockquote") == 10
2356     $g_possible_tag_name = qr/(?i:[a-z]{1,10}|h[1-6])/o;
2357     %ok_tag_name = map({$_ => 1} qw(
2358         a abbr acronym address area
2359         b basefont bdo big blockquote br
2360         caption center cite code col colgroup
2361         dd del dfn div dl dt
2362         em
2363         font
2364         h1 h2 h3 h4 h5 h6 hr
2365         i img ins
2366         kbd
2367         li
2368         map
2369         ol
2370         p pre
2371         q
2372         s samp small span strike strong sub sup
2373         table tbody td tfoot th thead tr tt
2374         u ul
2375         var
2376     ));
2377     $ok_tag_name{$_} = 0 foreach (qw(
2378         dir menu
2379     ));
2380 }
2381
2382
2383 sub _SetAllowedTag {
2384     my ($tag, $forbid) = @_;
2385     $ok_tag_name{$tag} = $forbid ? 0 : 1
2386         if defined($tag) && exists($ok_tag_name{$tag});
2387 }
2388
2389
2390 # Encode leading '<' of any non-tags
2391 # However, "<?", "<!" and "<$" are passed through (legacy on that "<$" thing)
2392 sub _DoTag {
2393     my $tag = shift;
2394     return $tag if $tag =~ /^<[?\$!]/;
2395     if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
2396         $ok_tag_name{lc($1)}) {
2397
2398         return _ProcessURLTag("href", $tag, 1) if $tag =~ /^<a\s/i;
2399         return _ProcessURLTag("src", $tag) if $tag =~ /^<img\s/i;
2400         return $tag;
2401     }
2402     $tag =~ s/^</&lt;/;
2403     return $tag;
2404 }
2405
2406 # Strip out all tags that _DoTag would match
2407 sub _StripTags {
2408     my $text = shift;
2409     my $_StripTag = sub {
2410         my $tag = shift;
2411         return $tag if $tag =~ /^<[?\$!]/;
2412         if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
2413             $ok_tag_name{lc($1)}) {
2414
2415             return ""; # strip it out
2416         }
2417         return $tag;
2418     };
2419     $text =~ s{(<[^>]*>)}{&$_StripTag($1)}ige;
2420     return $text;
2421 }
2422
2423 my %univatt;    # universally allowed attribute names
2424 my %tagatt;     # per-element allowed attribute names
2425 my %tagmt;      # empty element tags
2426 my %tagocl;     # non-empty elements with optional closing tag
2427 my %tagacl;     # which %tagocl an opening %tagocl will close
2428 my %tagblk;     # block elements
2429 my %taga1p;     # open tags which require at least one attribute
2430 my %lcattval;   # names of attribute values to lowercase
2431 my %impatt;     # names of "implied" attributes
2432 BEGIN {
2433     %univatt = map({$_ => 1} qw(class dir id lang style title xml:lang));
2434     %tagatt = (
2435         'a' => { map({$_ => 1} qw(href name)) },
2436         'area' => { map({$_ => 1} qw(alt coords href nohref shape)) },
2437         'basefont' => { map({$_ => 1} qw(color face size)) },
2438         'br' => { map({$_ => 1} qw(clear)) },
2439         'caption' => { map({$_ => 1} qw(align)) },
2440         'col' => { map({$_ => 1} qw(align span width valign)) },
2441         'colgroup' => { map({$_ => 1} qw(align span width valign)) },
2442         'dir' => { map({$_ => 1} qw(compact)) },
2443         'div' => { map({$_ => 1} qw(align)) },
2444         'dl' => { map({$_ => 1} qw(compact)) },
2445         'font' => { map({$_ => 1} qw(color face size)) },
2446         'h1' => { map({$_ => 1} qw(align)) },
2447         'h2' => { map({$_ => 1} qw(align)) },
2448         'h3' => { map({$_ => 1} qw(align)) },
2449         'h4' => { map({$_ => 1} qw(align)) },
2450         'h5' => { map({$_ => 1} qw(align)) },
2451         'h6' => { map({$_ => 1} qw(align)) },
2452         'hr' => { map({$_ => 1} qw(align noshade size width)) },
2453         # NO server-side image maps, therefore NOT ismap !
2454         'img' => { map({$_ => 1} qw(align alt border height hspace src usemap vspace width)) },
2455         'li' => { map({$_ => 1} qw(compact type value)) },
2456         'map' => { map({$_ => 1} qw(name)) },
2457         'menu' => { map({$_ => 1} qw(compact)) },
2458         'ol' => { map({$_ => 1} qw(compact start type)) },
2459         'p' => { map({$_ => 1} qw(align)) },
2460         'pre' => { map({$_ => 1} qw(width)) },
2461         'table' => { map({$_ => 1} qw(align border cellpadding cellspacing summary width)) },
2462         'tbody' => { map({$_ => 1} qw(align valign)) },
2463         'tfoot' => { map({$_ => 1} qw(align valign)) },
2464         'thead' => { map({$_ => 1} qw(align valign)) },
2465         'td' => { map({$_ => 1} qw(align colspan height nowrap rowspan valign width)) },
2466         'th' => { map({$_ => 1} qw(align colspan height nowrap rowspan valign width)) },
2467         'tr' => { map({$_ => 1} qw(align valign)) },
2468         'ul' => { map({$_ => 1} qw(compact type)) }
2469     );
2470     %tagmt = map({$_ => 1} qw(area basefont br col hr img));
2471     %tagocl = map({$_ => 1} qw(colgroup dd dt li p tbody td tfoot th thead tr));
2472     %tagacl = (
2473         'colgroup' => \%tagocl,
2474         'dd' => \%tagocl,
2475         'dt' => \%tagocl,
2476         'li' => \%tagocl,
2477         'tbody' => \%tagocl,
2478         'td' => { map({$_ => 1} qw(colgroup dd dt li p td tfoot th thead)) },
2479         'tfoot' => \%tagocl,
2480         'th' => { map({$_ => 1} qw(colgroup dd dt li p td tfoot th thead)) },
2481         'thead' => \%tagocl,
2482         'tr' => { map({$_ => 1} qw(colgroup dd dt li p td tfoot th thead tr)) },
2483     );
2484     %tagblk = map({$_ => 1} qw(address blockquote div dl h1 h2 h3 h4 h5 h6 hr ol p pre table));
2485     %impatt = map({$_ => 1} qw(checked compact ismap nohref noshade nowrap));
2486     %lcattval = map({$_ => 1} qw(
2487         align border cellpadding cellspacing checked clear color colspan
2488         compact coords height hspace ismap nohref noshade nowrap rowspan size
2489         span shape valign vspace width
2490     ));
2491     %taga1p = map({$_ => 1} qw(a area img map));
2492 }
2493
2494
2495 # _SanitizeTags
2496 #
2497 # Inspect all '<'...'>' tags in the input and HTML encode those things
2498 # that cannot possibly be tags and at the same time sanitize them.
2499 #
2500 # $1 => text to process
2501 # <= sanitized text
2502 sub _SanitizeTags {
2503     my ($text, $validate) = @_;
2504     $text =~ s/\s+$//;
2505     $text ne "" or return "";
2506     my @stack = ();
2507     my $ans = "";
2508     my $end = length($text);
2509     pos($text) = 0;
2510     my ($autoclose, $autoclopen);
2511     my $lastmt = "";
2512     $autoclose = sub {
2513         my $s = $_[0] || "";
2514         while (@stack && $stack[$#stack]->[0] ne $s &&
2515                 $tagocl{$stack[$#stack]->[0]}) {
2516             $ans .= "</" . $stack[$#stack]->[0] . ">";
2517             pop(@stack);
2518         }
2519     } if $validate;
2520     $autoclopen = sub {
2521         my $s = $_[0] || "";
2522         my $c;
2523         if ($tagblk{$s}) {$c = {p=>1}}
2524         elsif ($tagocl{$s}) {$c = $tagacl{$s}}
2525         else {return}
2526         while (@stack && $c->{$stack[$#stack]->[0]}) {
2527             $ans .= "</" . $stack[$#stack]->[0] . ">";
2528             pop(@stack);
2529         }
2530     } if $validate;
2531     while (pos($text) < $end) {
2532         if ($text =~ /\G([^<]+)/gc) {
2533             $ans .= $1;
2534             $lastmt = "" if $1 =~ /\S/;
2535             next;
2536         }
2537         my $tstart = pos($text);
2538         if ($text =~ /\G(<[^>]*>)/gc) {
2539             my $tag = $1;
2540             if ($tag =~ /^<!--/) { # pass "comments" through
2541                 $ans .= $tag;
2542                 next;
2543             }
2544             my $tt;
2545             if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} ||
2546                  $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
2547                 $ok_tag_name{$tt=lc($1)})
2548             {
2549                 my ($stag, $styp) = _Sanitize($tag);
2550                 if ($styp == 2 && $lastmt eq $tt) {
2551                     $lastmt = "";
2552                     next;
2553                 }
2554                 $lastmt = $styp == 3 ? $tt : "";
2555                 if ($validate && $styp) {
2556                     &$autoclopen($tt) if $styp == 1 || $styp == 3;
2557                     if ($styp == 1) {
2558                         push(@stack,[$tt,$tstart]);
2559                     } elsif ($styp == 2) {
2560                         &$autoclose($tt) unless $tt eq "p";
2561                         !@stack and _xmlfail("closing tag $tt without matching open at " .
2562                             _linecol($tstart, $text));
2563                         if ($stack[$#stack]->[0] eq $tt) {
2564                             pop(@stack);
2565                         } else {
2566                             my @i = @{$stack[$#stack]};
2567                             _xmlfail("opening tag $i[0] at " . _linecol($i[1], $text) .
2568                                 " mismatch with closing tag $tt at " . _linecol($tstart, $text));
2569                         }
2570                     }
2571                 }
2572                 $ans .= $stag;
2573                 next;
2574             } else {
2575                 $tag =~ s/^</&lt;/;
2576                 $ans .= $tag;
2577                 $lastmt = "";
2578                 next;
2579             }
2580         }
2581         # can only get here if "\G" char is an unmatched "<"
2582         pos($text) += 1;
2583         $ans .= "&lt;";
2584         $lastmt = "";
2585     }
2586     &$autoclose if $validate;
2587     if ($validate && @stack) {
2588         my @errs;
2589         my $j;
2590         for ($j = 0; $j <= $#stack; ++$j) {
2591                 my @i = @{$stack[$j]};
2592                 unshift(@errs, "opening tag $i[0] without matching close at " .
2593                             _linecol($i[1], $text));
2594         }
2595         _xmlfail(@errs);
2596     }
2597     return $ans."\n";
2598 }
2599
2600
2601 sub _linecol {
2602         my ($pos, $txt) = @_;
2603         pos($txt) = 0;
2604         my ($l, $p);
2605         $l = 1;
2606         ++$l while ($p = pos($txt)), $txt =~ /\G[^\n]*\n/gc && pos($txt) <= $pos;
2607         return "line $l col " . (1 + ($pos - $p));
2608 }
2609
2610
2611 sub _xmlfail {
2612         die join("", map("$_\n", @_));
2613 }
2614
2615
2616 sub _Sanitize {
2617     my $tag = shift;
2618     my $seenatt = {};
2619     if ($tag =~ m{^</}) {
2620         $tag =~ s/\s+>$/>/;
2621         return (lc($tag),2);
2622     }
2623     if ($tag =~ /^<([^\s<\/>]+)\s+/gs) {
2624         my $tt = lc($1);
2625         my $out = "<" . $tt . " ";
2626         my $ok = $tagatt{$tt};
2627         ref($ok) eq "HASH" or $ok = {};
2628         while ($tag =~ /\G\s*([^\s\042\047<\/>=]+)((?>=)|\s*)/gcs) {
2629             my ($a,$s) = ($1, $2);
2630             if ($s eq "" && substr($tag, pos($tag), 1) =~ /^[\042\047]/) {
2631                 # pretend the "=" sign wasn't overlooked
2632                 $s = "=";
2633             }
2634             if (substr($s,0,1) ne "=") {
2635                 # it's one of "those" attributes (e.g. compact) or not
2636                 # _SanitizeAtt will fix it up if it is
2637                 $out .= _SanitizeAtt($a, '""', $ok, $seenatt);
2638                 next;
2639             }
2640             if ($tag =~ /\G([\042\047])((?:(?!\1)(?!<).)*)\1\s*/gcs) {
2641                 $out .= _SanitizeAtt($a, $1.$2.$1, $ok, $seenatt);
2642                 next;
2643             }
2644             if ($tag =~ /\G([\042\047])((?:(?!\1)(?![<>])(?![\/][>]).)*)/gcs) {
2645                 # what to do what to do what to do
2646                 # trim trailing \s+ and magically add the missing quote
2647                 my ($q, $v) = ($1, $2);
2648                 $v =~ s/\s+$//;
2649                 $out .= _SanitizeAtt($a, $q.$v.$q, $ok, $seenatt);
2650                 next;
2651             }
2652             if ($tag =~ /\G([^\s<\/>]+)\s*/gcs) {
2653                 # auto quote it
2654                 my $v = $1;
2655                 $v =~ s/\042/&quot;/go;
2656                 $out .= _SanitizeAtt($a, '"'.$v.'"', $ok, $seenatt);
2657                 next;
2658             }
2659             # give it an empty value
2660             $out .= _SanitizeAtt($a, '""', $ok, $seenatt);
2661         }
2662         my $sfx = substr($tag, pos($tag));
2663         $out =~ s/\s+$//;
2664         my $typ = 1;
2665         if ($tagmt{$tt}) {
2666             $typ = 3;
2667             $out .= $opt{empty_element_suffix};
2668         } else {
2669             $out .= ">";
2670             $out .= "</$tt>" and $typ = 3 if $tag =~ m,/>$,;
2671         }
2672         return ($out,$typ);
2673     } elsif ($tag =~ /^<([^\s<\/>]+)/s) {
2674         my $tt = lc($1);
2675         return ("&lt;" . substr($tag,1), 0) if $taga1p{$tt};
2676         if ($tagmt{$tt}) {
2677             return ("<" . $tt . $opt{empty_element_suffix}, 3);
2678         } elsif ($tag =~ m,/>$,) {
2679             return ("<" . $tt . "></" . $tt . ">", 3);
2680         } else {
2681             return ("<" . $tt . ">", 1);
2682         }
2683     }
2684     return (lc($tag),0);
2685 }
2686
2687
2688 sub _SanitizeAtt {
2689     my $att = lc($_[0]);
2690     return "" unless $att =~ /^[_a-z:][_a-z:0-9.-]*$/; # no weirdo char att names
2691     return "" unless $univatt{$att} || $_[2]->{$att};
2692     return "" if $_[3]->{$att}; # no repeats
2693     $_[3]->{$att} = 1;
2694     $impatt{$att} and return $att."=".'"'.$att.'"';
2695     if ($lcattval{$att}) {
2696         return $att."=".lc($_[1])." ";
2697     } else {
2698         return $att."=".$_[1]." ";
2699     }
2700 }
2701
2702
2703 sub _ProcessURLTag {
2704     my ($att, $tag, $dofrag) = @_;
2705
2706     $att = lc($att) . "=";
2707     if ($tag =~ /^(<[^\s>]+\s+)/g) {
2708         my $out = $1;
2709         while ($tag =~ /\G([^\s\042\047<\/>=]+=)([\042\047])((?:(?!\2)(?!<).)*)(\2\s*)/gcs) {
2710             my ($p, $q, $v, $s) = ($1, $2, $3, $4);
2711             if (lc($p) eq $att && $v ne "") {
2712                 if ($dofrag && $v =~ m"^#") {
2713                     $v = _FindFragmentMatch($v);
2714                     my $bp;
2715                     if (($bp = $opt{base_prefix}) ne "") {
2716                         $v = "\2\3" . $bp . $v;
2717                     }
2718                 } else {
2719                     $v = _PrefixURL($v);
2720                 }
2721                 $v = _EncodeAttText($v);
2722             }
2723             $out .= $p . $q . $v . $s;
2724         }
2725         $out .= substr($tag, pos($tag));
2726         substr($out,0,1) = $g_escape_table{'<'};
2727         substr($out,-1,1) = $g_escape_table{'>'};
2728         return $out;
2729     }
2730
2731     return $tag;
2732 }
2733
2734
2735 sub _HTMLEncode {
2736     my $text = shift;
2737
2738     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
2739     #   http://bumppo.net/projects/amputator/
2740     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
2741
2742     # Remaining entities now
2743     $text =~ s/\042/&quot;/g;
2744     $text =~ s/\047/&#39;/g; # Some older browsers do not grok &apos;
2745     $text =~ s/</&lt;/g;
2746     $text =~ s/>/&gt;/g;
2747
2748     return $text;
2749 }
2750
2751
2752 sub _EncodeAmps {
2753     my $text = shift;
2754
2755     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
2756     #   http://bumppo.net/projects/amputator/
2757     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
2758
2759     return $text;
2760 }
2761
2762
2763 sub _EncodeAmpsAndAngles {
2764 # Smart processing for ampersands and angle brackets that need to be encoded.
2765
2766     my $text = shift;
2767
2768     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
2769     #   http://bumppo.net/projects/amputator/
2770     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
2771
2772     # Encode naked <'s
2773     $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
2774     $text =~ s{<(?=[^>]*$)}{&lt;}g;
2775
2776     # Encode <'s that cannot possibly be a start or end tag
2777     $text =~ s{(<[^>]*>)}{_DoTag($1)}ige;
2778
2779     return $text;
2780 }
2781
2782
2783 sub _EncodeBackslashEscapes {
2784 #
2785 # Parameter: String.
2786 # Returns:   String after processing the following backslash escape sequences.
2787 #
2788     local $_ = shift;
2789
2790     s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
2791     s{\\([`*_~{}\[\]()>#+\-.!`])}{$g_escape_table{$1}}g;
2792
2793     return $_;
2794 }
2795
2796
2797 sub _DoAutoLinks {
2798     local $_ = shift;
2799
2800     s{<((https?|ftps?):[^'\042>\s]+)>(?!\s*</a>)}{_MakeATag($1, "&lt;".$1."&gt;")}gise;
2801
2802     # Email addresses: <address@domain.foo>
2803     s{
2804         <
2805         (?:mailto:)?
2806         (
2807             [-.\w]+
2808             \@
2809             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
2810         )
2811         >
2812     }{
2813         _EncodeEmailAddress(_UnescapeSpecialChars($1), "&#x3c;", "&#62;");
2814     }egix;
2815
2816     # (kjm) I don't do "x" patterns
2817     s{(?:^|(?<=\s))((?:https?|ftps?)://(?:[-a-zA-Z0-9./?\&\%=_~!*;:\@+\$,\x23](?:(?<![.,:;])|(?=[^\s])))+)}
2818      {_MakeATag($1, $1)}soge;
2819     s{(?<![][])(?<!\] )\[RFC( ?)([0-9]{1,5})\](?![][])(?! \[)}
2820      {"["._MakeATag("https://tools.ietf.org/html/rfc$2", "RFC$1$2", "RFC $2")."]"}soge;
2821
2822     return $_;
2823 }
2824
2825
2826 sub _EncodeEmailAddress {
2827 #
2828 # Input: an email address, e.g. "foo@example.com"
2829 #
2830 # Output: the email address as a mailto link, with each character
2831 #         of the address encoded as either a decimal or hex entity, in
2832 #         the hopes of foiling most address harvesting spam bots. E.g.:
2833 #
2834 #   <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
2835 #   x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
2836 #   &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
2837 #
2838 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
2839 # mailing list: <https://tinyurl.com/yu7ue>
2840 #
2841
2842     my ($addr, $prefix, $suffix) = @_;
2843     $prefix = "" unless defined($prefix);
2844     $suffix = "" unless defined($suffix);
2845
2846     srand(unpack('N',md5($addr)));
2847     my @encode = (
2848         sub { '&#' .                 ord(shift)   . ';' },
2849         sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
2850         sub {                            shift          },
2851     );
2852
2853     $addr = "mailto:" . $addr;
2854
2855     $addr =~ s{(.)}{
2856         my $char = $1;
2857         if ( $char eq '@' ) {
2858             # this *must* be encoded. I insist.
2859             $char = $encode[int rand 1]->($char);
2860         } elsif ( $char ne ':' ) {
2861             # leave ':' alone (to spot mailto: later)
2862             my $r = rand;
2863             # roughly 10% raw, 45% hex, 45% dec
2864             $char = (
2865                 $r > .9   ?  $encode[2]->($char)  :
2866                 $r < .45  ?  $encode[1]->($char)  :
2867                              $encode[0]->($char)
2868             );
2869         }
2870         $char;
2871     }gex;
2872
2873     # strip the mailto: from the visible part
2874     (my $bareaddr = $addr) =~ s/^.+?://;
2875     $addr = _MakeATag("$addr", $prefix.$bareaddr.$suffix);
2876
2877     return $addr;
2878 }
2879
2880
2881 sub _UnescapeSpecialChars {
2882 #
2883 # Swap back in all the special characters we've hidden.
2884 #
2885     my $text = shift;
2886
2887     while( my($char, $hash) = each(%g_escape_table) ) {
2888         $text =~ s/$hash/$char/g;
2889     }
2890     return $text;
2891 }
2892
2893
2894 sub _TokenizeHTML {
2895 #
2896 # Parameter: String containing HTML markup.
2897 # Returns:   Reference to an array of the tokens comprising the input
2898 #            string. Each token is either a tag (possibly with nested,
2899 #            tags contained therein, such as <a href="<MTFoo>">, or a
2900 #            run of text between tags. Each element of the array is a
2901 #            two-element array; the first is either 'tag' or 'text';
2902 #            the second is the actual value.
2903 #
2904 #
2905 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
2906 #   <https://web.archive.org/web/20041215155739/http://bradchoate.com/weblog/2002/07/27/mtregex>
2907 #
2908
2909     my $str = shift;
2910     my $pos = 0;
2911     my $len = length $str;
2912     my @tokens;
2913
2914     my $depth = 6;
2915     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
2916     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
2917                    (?s: <\? .*? \?> ) |             # processing instruction
2918                    $nested_tags/iox;                # nested tags
2919
2920     while ($str =~ m/($match)/g) {
2921         my $whole_tag = $1;
2922         my $sec_start = pos $str;
2923         my $tag_start = $sec_start - length $whole_tag;
2924         if ($pos < $tag_start) {
2925             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
2926         }
2927         push @tokens, ['tag', $whole_tag];
2928         $pos = pos $str;
2929     }
2930     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
2931     \@tokens;
2932 }
2933
2934
2935 sub _Outdent {
2936 #
2937 # Remove one level of line-leading indent_width of spaces
2938 #
2939     my $text = shift;
2940
2941     $text =~ s/^ {1,$opt{indent_width}}//gm;
2942     return $text;
2943 }
2944
2945
2946 # _DeTab
2947 #
2948 # $1 => input text
2949 # $2 => optional tab width (default is $opt{tab_width})
2950 # $3 => leading spaces to strip off each line first (default is 0 aka none)
2951 # <= result with tabs expanded
2952 sub _DeTab {
2953     my $text = shift;
2954     my $ts = shift || $opt{tab_width};
2955     my $leadsp = shift || 0;
2956     my $spr = qr/^ {1,$leadsp}/ if $leadsp;
2957     pos($text) = 0;
2958     my $end = length($text);
2959     my $ans = "";
2960     while (pos($text) < $end) {
2961         my $line;
2962         if ($text =~ /\G(.*?\n)/gcs) {
2963             $line = $1;
2964         } else {
2965             $line = substr($text, pos($text));
2966             pos($text) = $end;
2967         }
2968         $line =~ s/$spr// if $leadsp;
2969         # From the Perl camel book section "Fluent Perl" but modified a bit
2970         $line =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ges;
2971         $ans .= $line;
2972     }
2973     return $ans;
2974 }
2975
2976
2977 sub _PrefixURL {
2978 #
2979 # Add URL prefix if needed
2980 #
2981     my $url = shift;
2982     $url =~ s/^\s+//;
2983     $url =~ s/\s+$//;
2984     $url = "#" unless $url ne "";
2985
2986     return $url unless $opt{abs_prefix} ne '' || $opt{url_prefix} ne '' || $opt{img_prefix} ne '';
2987     return $url if $url =~ m"^\002\003" || $url =~ m"^#" || $url =~ m,^//,;
2988     $url = $opt{abs_prefix} . $url if $url =~ m,^/, && $opt{abs_prefix} ne '';
2989     return $url if $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/ || $url =~ m,^//,;
2990     my $ans = $opt{url_prefix};
2991     $ans = $opt{img_prefix}
2992         if $opt{img_prefix} ne '' && $url =~ m"^[^#?]*\.(?:png|gif|jpe?g|svgz?)(?:[#?]|$)"i;
2993     return $url unless $ans ne '';
2994     $ans .= '/' if substr($ans, -1, 1) ne '/';
2995     $ans .= substr($url, 0, 1) eq '/' ? substr($url, 1) : $url;
2996     return "\2\3".$ans;
2997 }
2998
2999
3000 BEGIN {
3001     $g_style_sheet = <<'STYLESHEET';
3002
3003 <style type="text/css">
3004 /* <![CDATA[ */
3005
3006 /* Markdown.pl fancy style sheet
3007 ** Copyright (C) 2017,2018,2019 Kyle J. McKay.
3008 ** All rights reserved.
3009 **
3010 ** Redistribution and use in source and binary forms, with or without
3011 ** modification, are permitted provided that the following conditions are met:
3012 **
3013 **   1. Redistributions of source code must retain the above copyright notice,
3014 **      this list of conditions and the following disclaimer.
3015 **
3016 **   2. Redistributions in binary form must reproduce the above copyright
3017 **      notice, this list of conditions and the following disclaimer in the
3018 **      documentation and/or other materials provided with the distribution.
3019 **
3020 **   3. Neither the name of the copyright holder nor the names of its
3021 **      contributors may be used to endorse or promote products derived from
3022 **      this software without specific prior written permission.
3023 **
3024 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
3025 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3026 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3027 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
3028 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
3029 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
3030 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
3031 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
3032 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3033 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3034 ** POSSIBILITY OF SUCH DAMAGE.
3035 */
3036
3037 div.%(base)code-bt > pre, div.%(base)code > pre {
3038         margin: 0;
3039         padding: 0;
3040         overflow: auto;
3041 }
3042
3043 div.%(base)code-bt > pre > code, div.%(base)code > pre > code {
3044         display: inline-block;
3045         margin: 0;
3046         padding: 0.5em 0;
3047         border-top: thin dotted;
3048         border-bottom: thin dotted;
3049 }
3050
3051 table.%(base)table {
3052         margin-bottom: 0.5em;
3053 }
3054 table.%(base)table, table.%(base)table th, table.%(base)table td {
3055         border-collapse: collapse;
3056         border-spacing: 0;
3057         border: thin solid;
3058 }
3059
3060 ol.%(base)ol {
3061         counter-reset: %(base)item;
3062 }
3063 ol.%(base)ol[start="0"] {
3064         counter-reset: %(base)item -1;
3065 }
3066 ol.%(base)ol[start="5"] {
3067         counter-reset: %(base)item 4;
3068 }
3069 ol.%(base)ol[start="10"] {
3070         counter-reset: %(base)item 9;
3071 }
3072 ol.%(base)ol > span.%(base)ol-incr {
3073         counter-increment: %(base)item;
3074 }
3075 ol.%(base)ol > span.%(base)ol-incr-2 {
3076         counter-increment: %(base)item 2;
3077 }
3078 ol.%(base)ol > span.%(base)ol-incr-5 {
3079         counter-increment: %(base)item 5;
3080 }
3081 ol.%(base)ol > span.%(base)ol-incr-10 {
3082         counter-increment: %(base)item 10;
3083 }
3084 ol.%(base)lc-greek, li.%(base)lc-greek {
3085         list-style-type: lower-greek;
3086 }
3087 ol.%(base)ol > li {
3088         counter-increment: %(base)item;
3089 }
3090 ol.%(base)ol > li.%(base)li,
3091 ol.%(base)ol > li.%(base)li-lc,
3092 ol.%(base)ol > li.%(base)li-lc-greek,
3093 ol.%(base)ol > li.%(base)li-uc {
3094         list-style-type: none;
3095         display: block;
3096 }
3097 ol.%(base)ol > li.%(base)li:before,
3098 ol.%(base)ol > li.%(base)li-lc:before,
3099 ol.%(base)ol > li.%(base)li-lc-greek:before,
3100 ol.%(base)ol > li.%(base)li-uc:before {
3101         position: absolute;
3102         text-align: right;
3103         white-space: nowrap;
3104         margin-left: -9ex;
3105         width: 9ex;
3106 }
3107 ol.%(base)ol > li.%(base)li[type="1"]:before {
3108         content: counter(%(base)item, decimal) ")\A0 \A0 ";
3109 }
3110 ol.%(base)ol > li.%(base)li-lc[type="i"]:before,
3111 ol.%(base)ol > li.%(base)li-lc[type="I"]:before {
3112         content: counter(%(base)item, lower-roman) ")\A0 \A0 ";
3113 }
3114 ol.%(base)ol > li.%(base)li-uc[type="I"]:before,
3115 ol.%(base)ol > li.%(base)li-uc[type="i"]:before {
3116         content: counter(%(base)item, upper-roman) ")\A0 \A0 ";
3117 }
3118 ol.%(base)ol > li.%(base)li-lc[type="a"]:before,
3119 ol.%(base)ol > li.%(base)li-lc[type="A"]:before {
3120         content: counter(%(base)item, lower-alpha) ")\A0 \A0 ";
3121 }
3122 ol.%(base)ol > li.%(base)li-lc-greek[type="a"]:before,
3123 ol.%(base)ol > li.%(base)li-lc-greek[type="A"]:before {
3124         content: counter(%(base)item, lower-greek) ")\A0 \A0 ";
3125 }
3126 ol.%(base)ol > li.%(base)li-uc[type="A"]:before,
3127 ol.%(base)ol > li.%(base)li-uc[type="a"]:before {
3128         content: counter(%(base)item, upper-alpha) ")\A0 \A0 ";
3129 }
3130
3131 li.%(base)checkbox-on,
3132 li.%(base)checkbox-off {
3133         list-style-type: none;
3134         display: block;
3135 }
3136 li.%(base)checkbox-on > span:first-child + span + span,
3137 li.%(base)checkbox-off > span:first-child + span + span {
3138         position: absolute;
3139         clip: rect(0,0,0,0);
3140 }
3141 li.%(base)checkbox-on > span:first-child,
3142 li.%(base)checkbox-off > span:first-child,
3143 li.%(base)checkbox-on > span:first-child + span,
3144 li.%(base)checkbox-off > span:first-child + span {
3145         display: block;
3146         position: absolute;
3147         margin-left: -3ex;
3148         width: 1em;
3149         height: 1em;
3150 }
3151 li.%(base)checkbox-on > span:first-child > span:first-child,
3152 li.%(base)checkbox-off > span:first-child > span:first-child {
3153         display: block;
3154         position: absolute;
3155         left: 0.75pt; top: 0.75pt; right: 0.75pt; bottom: 0.75pt;
3156 }
3157 li.%(base)checkbox-on > span:first-child > span:first-child:before,
3158 li.%(base)checkbox-off > span:first-child > span:first-child:before {
3159         display: inline-block;
3160         position: relative;
3161         right: 1pt;
3162         width: 100%;
3163         height: 100%;
3164         border: 1pt solid;
3165         content: "";
3166 }
3167 li.%(base)checkbox-on > span:first-child + span:before {
3168         position: relative;
3169         left: 2pt;
3170         bottom: 1pt;
3171         font-size: 125%;
3172         line-height: 80%;
3173         vertical-align: text-top;
3174         content: "\2713";
3175 }
3176
3177 /* ]]> */
3178 </style>
3179
3180 STYLESHEET
3181     $g_style_sheet =~ s/^\s+//g;
3182     $g_style_sheet =~ s/\s+$//g;
3183     $g_style_sheet .= "\n";
3184 }
3185
3186 1;
3187
3188 __DATA__
3189
3190 =head1 NAME
3191
3192 Markdown.pl - convert Markdown format text files to HTML
3193
3194 =head1 SYNOPSIS
3195
3196 B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
3197     [B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>]
3198     [B<--tabwidth>=I<num>] [B<--stylesheet>] [B<--stub>] [--]
3199     [I<file>...]
3200
3201  Options:
3202    -h                                   show short usage help
3203    --help                               show long detailed help
3204    --html4tags                          use <br> instead of <br />
3205    --deprecated                         allow <dir> and <menu> tags
3206    --sanitize                           sanitize tag attributes
3207    --no-sanitize                        do not sanitize tag attributes
3208    --validate-xml                       check if output is valid XML
3209    --validate-xml-internal              fast basic check if output is valid XML
3210    --no-validate-xml                    do not check output for valid XML
3211    --tabwidth=num                       expand tabs to num instead of 8
3212    -a prefix | --absroot=prefix         append abspath URLs to prefix
3213    -b prefix | --base=prefix            prepend prefix to fragment-only URLs
3214    -r prefix | --htmlroot=prefix        append relative non-img URLs to prefix
3215    -i prefix | --imageroot=prefix       append relative img URLs to prefix
3216    -w [wikipat] | --wiki[=wikipat]      activate wiki links using wikipat
3217    -V | --version                       show version, authors, license
3218                                         and copyright
3219    -s | --shortversion                  show just the version number
3220    --raw                                input contains only raw html
3221    --stylesheet                         output the fancy style sheet
3222    --no-stylesheet                      do not output fancy style sheet
3223    --stub                               wrap output in stub document
3224                                         implies --stylesheet
3225    --                                   end options and treat next
3226                                         argument as file
3227
3228 =head1 DESCRIPTION
3229
3230 Markdown is a text-to-HTML filter; it translates an easy-to-read /
3231 easy-to-write structured text format into HTML. Markdown's text format
3232 is most similar to that of plain text email, and supports features such
3233 as headers, *emphasis*, code blocks, blockquotes, and links.
3234
3235 Markdown's syntax is designed not as a generic markup language, but
3236 specifically to serve as a front-end to (X)HTML. You can  use span-level
3237 HTML tags anywhere in a Markdown document, and you can use block level
3238 HTML tags (like <div> and <table> as well).
3239
3240 For more information about Markdown's syntax, see the F<basics.md>
3241 and F<syntax.md> files included with F<Markdown.pl>.
3242
3243 Input (auto-detected) may be either ISO-8859-1 or UTF-8.  Output is always
3244 converted to the UTF-8 character set.
3245
3246
3247 =head1 OPTIONS
3248
3249 Use "--" to end switch parsing. For example, to open a file named "-z", use:
3250
3251     Markdown.pl -- -z
3252
3253 =over
3254
3255
3256 =item B<--html4tags>
3257
3258 Use HTML 4 style for empty element tags, e.g.:
3259
3260     <br>
3261
3262 instead of Markdown's default XHTML style tags, e.g.:
3263
3264     <br />
3265
3266 This option is I<NOT compatible> with the B<--validate-xml> option
3267 and will produce an immediate error if both are given.
3268
3269
3270 =item B<--deprecated>
3271
3272 Both "<dir>" and "<menu>" are normally taken as literal text and the leading
3273 "<" will be automatically escaped.
3274
3275 If this option is used, they are recognized as valid tags and passed through
3276 without being escaped.
3277
3278 When dealing with program argument descriptions "<dir>" can be particularly
3279 problematic therefore use of this option is not recommended.
3280
3281 Other deprecated tags (such as "<font>" and "<center>" for example) continue
3282 to be recognized and passed through even without using this option.
3283
3284
3285 =item B<--sanitize>
3286
3287 Removes troublesome tag attributes from embedded tags.  Only a very strictly
3288 limited set of tag attributes will be permitted, other attributes will be
3289 silently discarded.  The set of allowed attributes varies by tag.
3290
3291 Splits empty minimized elements that are not one of the HTML allowed empty
3292 elements (C<area> C<basefont> C<br> C<col> C<hr> C<img>) into separate begin
3293 and end tags.  For example, C<< <p/> >> or C<< <p /> >> will be split into
3294 C<< <p></p> >>.
3295
3296 Combines adjacent (whitespace separated only) opening and closing tags for
3297 the same HTML empty element into a single minimized tag.  For example,
3298 C<< <br></br> >> will become C<< <br /> >>.
3299
3300 This is enabled by default.
3301
3302
3303 =item B<--no-sanitize>
3304
3305 Do not sanitize tag attributes.  This option does not allow any tags that would
3306 not be allowed without this option, but it does completely suppress the
3307 attribute sanitation process.   If this option is specified, no attributes will
3308 be removed from any tag (although C<img> and C<a> tags will still be affected
3309 by B<--imageroot>, B<--htmlroot>, B<--absroot> and/or B<--base> options).
3310 Use of this option is I<NOT RECOMMENDED>.
3311
3312
3313 =item B<--validate-xml>
3314
3315 Perform XML validation on the output before it's output and die if
3316 it fails validation.  This requires the C<XML::Simple> or C<XML::Parser>
3317 module be present (one is only required if this option is given).
3318
3319 Any errors are reported to STDERR and the exit status will be
3320 non-zero on XML validation failure.  Note that all line and column
3321 numbers in the error output refer to the entire output that would
3322 have been produced.  Re-run with B<--no-validate-xml> to see what's
3323 actually present at those line and column positions.
3324
3325 If the B<--stub> option has also been given, then the entire output is
3326 validated as-is.  Without the B<--stub> option, the output will be wrapped
3327 in C<< <div>...</div> >> for validation purposes but that extra "div" added
3328 for validation will not be added to the final output.
3329
3330 This option is I<NOT enabled by default>.
3331
3332 This option is I<NOT compatible> with the B<--html4tags> option and will
3333 produce an immediate error if both are given.
3334
3335
3336 =item B<--validate-xml-internal>
3337
3338 Perform XML validation on the output before it's output and die if
3339 it fails validation.  This uses a simple internal consistency checker
3340 that finds unmatched and mismatched open/close tags.
3341
3342 Non-empty elements that in HTML have optional closing tags (C<colgroup>
3343 C<dd> C<dt> C<li> C<p> C<tbody> C<td> C<tfoot> C<th> C<thead> C<tr>)
3344 will automatically have any omitted end tags inserted during the
3345 `--validate-xml-internal` process.
3346
3347 Any errors are reported to STDERR and the exit status will be
3348 non-zero on XML validation failure.  Note that all line and column
3349 numbers in the error output refer to the entire output that would
3350 have been produced before sanitization without any B<--stub> or
3351 B<--stylesheet> options.  Re-run with B<--no-sanitize> and
3352 B<--no-validate-xml> and I<without> any B<--stub> or B<--stylesheet>
3353 options to see what's actually present at those line and column
3354 positions.
3355
3356 This option validates the output I<prior to> adding any requested
3357 B<--stub> or B<--stylesheet>.  As the built-in stub and stylesheet
3358 have already been validated that speeds things up.  The output is
3359 I<NOT> wrapped (in a C<< <div>...</div> >>) for validation as that's
3360 not required for the internal checker.
3361
3362 This option is I<IS enabled by default> unless B<--no-sanitize> is
3363 active.
3364
3365 This option is I<IS compatible> with the B<--html4tags> option.
3366
3367 This option requires the B<--sanitize> option and will produce an
3368 immediate error if both B<--no-sanitize> and B<--validate-xml-internal>
3369 are given.
3370
3371 Note that B<--validate-xml-internal> is I<MUCH faster> than
3372 B<--validate-xml> and I<does NOT> require any extra XML modules to
3373 be present.
3374
3375
3376 =item B<--no-validate-xml>
3377
3378 Do not perform XML validation on the output.  Markdown.pl itself will
3379 normally generate valid XML sequences (unless B<--html4tags> has been
3380 used).  However, any raw tags in the input (that are on the "approved"
3381 list), could potentially result in invalid XML output (i.e. mismatched
3382 start and end tags, missing start or end tag etc.).
3383
3384 Markdown.pl will I<NOT check> for these issues itself.  But with
3385 the B<--validate-xml> option will use C<XML::Simple> or C<XML::Parser>
3386 to do so.
3387
3388 Note that B<--validate-xml-internal> is the default option unless
3389 B<--no-sanitize> is used in which case B<--no-validate-xml> is the
3390 default option.
3391
3392
3393 =item B<--tabwidth>=I<num>
3394
3395 Expand tabs to I<num> character wide tab stop positions instead of the default
3396 8.  Don't use this; physical tabs should always be expanded to 8-character
3397 positions.  This option does I<not> affect the number of spaces needed to
3398 start a new "indent level".  That will always be 4 no matter what value is
3399 used (or implied by default) with this option.  Also note that tabs inside
3400 backticks-delimited code blocks will always be expanded to 8-character tab
3401 stop positions no matter what value is used for this option.
3402
3403 The value must be S<2 <= I<num> <= 32>.
3404
3405
3406 =item B<-a> I<prefix>, B<--absroot>=I<prefix>
3407
3408 Any absolute path URLs (i.e. URLs without a scheme starting with "/" but not
3409 "//") have I<prefix> prepended which prevents them from being acted upon by the
3410 B<--htmlroot> and/or B<--imageroot> options provided the result is a full
3411 absolute URL.  The default is to prepend nothing and leave them as absolute
3412 path URLs which will allow them to be processed by any B<--htmlroot> and/or
3413 B<--imageroot> options.
3414
3415 This option can be helpful when documents are being formatted for display on a
3416 different system and the absolute path URLs need to be "fixed up".
3417
3418
3419 =item B<-b> I<prefix>, B<--base>=I<prefix>
3420
3421 Any fragment-only URLs have I<prefix> prepended.  The default is to prepend
3422 nothing and leave them as bare fragment URLs.  Use of this option may be
3423 necessary when embedding the output of Markdown.pl into a document that makes
3424 use of the C<< <base> >> tag in order for intra-document fragment URL links to
3425 work properly in such a document.
3426
3427
3428 =item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
3429
3430 Any non-absolute URLs have I<prefix> prepended.
3431
3432
3433 =item B<-i> I<prefix>, B<--imageroot>=I<prefix>
3434
3435 Any non-absolute URLs have I<prefix> prepended (overriding the B<-r> prefix
3436 if any) but only if they end in an image suffix.
3437
3438
3439 =item B<-w> [I<wikipat>], B<--wiki>[=I<wikipat>]
3440
3441 Activate wiki links.  Any link enclosed in double brackets (e.g. "[[link]]") is
3442 considered a wiki link.  By default only absolute URL and fragment links are
3443 allowed in the "wiki link style" format.  Any other double-bracketed strings
3444 are left unmolested.
3445
3446 If this option is given, all other wiki links are enabled as well.  Any
3447 non-absolute URL or fragment links will be transformed into a link using
3448 I<wikipat> where the default I<wikipat> if none is given is C<%{s(:md)}.html>.
3449
3450 If the given I<wikipat> does not contain a C<%{...}> placeholder sequence
3451 then it will automatically have C<%{s(:md)}.html> suffixed to it.
3452
3453 The C<...> part of the C<%{...}> sequence specifies zero or more case-insensitive
3454 single-letter options with the following effects:
3455
3456 =over
3457
3458 =item B<d>
3459
3460 Convert spaces to dashes (ASCII 0x2D) instead of underscore (ASCII 0x5F).  Note
3461 that if this option is given then runs of multiple dashes will be converted to
3462 a single dash I<instead> but runs of multiple underscores will be left untouched.
3463
3464 =item B<f>
3465
3466 Flatten the resulting name by replacing forward slashes (ASCII 0x2F) as well.
3467 They will be converted to underscores unless the C<d> option is given (in which
3468 case they will be converted to dashes).  This conversion takes place before
3469 applying the runs-of-multiple reduction.
3470
3471 =item B<l>
3472
3473 Convert link target (excluding any query string and/or fragment) to lowercase.
3474 Takes precedence over any C<u> option, but specifically excludes C<%>-escapes
3475 which are always UPPERCASE hexadecimal.
3476
3477 =item B<r>
3478
3479 Leave raw UTF-8 characters in the result.  Normally anything not allowed
3480 directly in a URL ends up URL-encoded.  With this option, raw valid UTF-8
3481 sequences will be left untouched.  Use with care.
3482
3483 =item B<s> or B<s(>I<< <ext> >>[B<,>I<< <ext> >>]...B<)>
3484
3485 After (temporarily) removing any query string and/or fragment, strip any final
3486 "dot" suffix so long as it occurs after the last slash (if any slash was
3487 present before applying the C<f> option).  The "dot" (ASCII 0x2E) and all
3488 following characters (if any) are removed.  If the optional C<< (<ext>,...) >>
3489 part is present then only strip the extension if it consists of a "dot"
3490 followed by one of the case-insensitive I<< <ext> >> values.  As a special
3491 case, using the value C<:md> for one of the I<< <ext> >> values causes that
3492 value to be expanded to all known markdown extensions.
3493
3494 =item B<u>
3495
3496 Convert link target (excluding any query string and/or fragment) to UPPERCASE.
3497
3498 =item B<v>
3499
3500 Leave runs-of-multiple characters alone (aka "verbatim").  Does not affect
3501 any of the other options except by eliminating the runs-of-multple reduction
3502 step.  Also does I<not> inhibit the initial whitespace trimming.
3503
3504 =back
3505
3506 The URL target of the wiki link is created by first trimming whitespace
3507 (starting and ending whitespace is removed and all other runs of consecutive
3508 whitespace are replaced with a single space) from the wiki link target,
3509 removing (temporarily) any query string and/or fragment, if no options are
3510 present, spaces are converted to underscores (C<_>) and runs of multiple
3511 consecutive underscores are replaced with a single underscore (ASCII 0x5F).
3512 Finally, the I<wikipat> string gets its first placeholder (the C<%{...}>
3513 sequence) replaced with this computed value and the original query string
3514 and/or fragment is re-appended (if any were originally present) and
3515 URL-encoding is applied as needed to produce the actual final target URL.
3516
3517 See above option descriptions for possible available modifications.
3518
3519 One of the commonly used hosting platforms does something substantially similar
3520 to using C<%{dfv}> as the placeholder.
3521
3522
3523 =item B<-V>, B<--version>
3524
3525 Display Markdown's version number and copyright information.
3526
3527
3528 =item B<-s>, B<--shortversion>
3529
3530 Display the short-form version number.
3531
3532
3533 =item B<--raw>
3534
3535 Input contains only raw HTML/XHTML.  All options other than
3536 B<--html4tags>, B<--deprecated>, B<--sanitize> (on by default),
3537 B<--validate-xml> and B<--validate-xml-internal> (and their B<--no-...>
3538 variants) are ignored.
3539
3540 With this option, arbitrary HTML/XHTML input can be passed through
3541 the sanitizer and/or validator.  If sanitation is requested (the
3542 default), input must only contain the contents of the "<body>"
3543 section (i.e. no "<head>" or "<html>").  Output I<will> be converted
3544 to UTF-8 regardless of the input encoding.  All line endings will
3545 be normalized to C<\n> and input encodings other than UTF-8 or
3546 ISO-8859-1 or US-ASCII will end up mangled.
3547
3548 Remember that any B<--stub> and/or B<--stylesheet> options are
3549 I<completely ignored> when B<--raw> is given.
3550
3551
3552 =item B<--stylesheet>
3553
3554 Include the fancy style sheet at the beginning of the output (or in the
3555 C<head> section with B<--stub>).  This style sheet makes fancy checkboxes
3556 and makes a right parenthesis C<)> show instead of a C<.> for ordered lists
3557 that use them.  Without it things will still look fine except that the
3558 fancy stuff won't be there.
3559
3560 Use this option with no other arguments and redirect standard input to
3561 /dev/null to get just the style sheet and nothing else.
3562
3563
3564 =item B<--no-stylesheet>
3565
3566 Overrides a previous B<--stylesheet> and disables implicit inclusion
3567 of the style sheet by the B<--stub> option.
3568
3569
3570 =item B<--stub>
3571
3572 Wrap the output in a full document stub (i.e. has C<html>, C<head> and C<body>
3573 tags).  The style sheet I<will> be included in the C<head> section unless the
3574 B<--no-stylesheet> option is also used.
3575
3576
3577 =item B<-h>, B<--help>
3578
3579 Display Markdown's help.  With B<--help> full help is shown, with B<-h> only
3580 the usage and options are shown.
3581
3582
3583 =back
3584
3585
3586 =head1 VERSION HISTORY
3587
3588 Z<> See the F<README> file for detailed release notes for this version.
3589
3590 =over
3591
3592 =item Z<> 1.1.9 - 15 Dec 2019
3593
3594 =item Z<> 1.1.8 - 22 Nov 2019
3595
3596 =item Z<> 1.1.7 - 14 Feb 2018
3597
3598 =item Z<> 1.1.6 - 03 Jan 2018
3599
3600 =item Z<> 1.1.5 - 07 Dec 2017
3601
3602 =item Z<> 1.1.4 - 24 Jun 2017
3603
3604 =item Z<> 1.1.3 - 13 Feb 2017
3605
3606 =item Z<> 1.1.2 - 19 Jan 2017
3607
3608 =item Z<> 1.1.1 - 12 Jan 2017
3609
3610 =item Z<> 1.1.0 - 11 Jan 2017
3611
3612 =item Z<> 1.0.4 - 05 Jun 2016
3613
3614 =item Z<> 1.0.3 - 06 Sep 2015
3615
3616 =item Z<> 1.0.2 - 03 Sep 2015
3617
3618 =item Z<> 1.0.1 - 14 Dec 2004
3619
3620 =item Z<> 1.0.0 - 28 Aug 2004
3621
3622 =back
3623
3624 =head1 AUTHORS
3625
3626 =over
3627
3628 =item John Gruber
3629
3630 =item L<https://daringfireball.net>
3631
3632 =item L<https://daringfireball.net/projects/markdown/>
3633
3634 =item E<160>
3635
3636 =back
3637
3638 =over
3639
3640 =item PHP port and other contributions by Michel Fortin
3641
3642 =item L<https://michelf.ca>
3643
3644 =item E<160>
3645
3646 =back
3647
3648 =over
3649
3650 =item Additional enhancements and tweaks by Kyle J. McKay
3651
3652 =item mackyle<at>gmail.com
3653
3654 =back
3655
3656 =head1 COPYRIGHT AND LICENSE
3657
3658 =over
3659
3660 =item Copyright (C) 2003-2004 John Gruber
3661
3662 =item Copyright (C) 2015-2019 Kyle J. McKay
3663
3664 =item All rights reserved.
3665
3666 =back
3667
3668 Redistribution and use in source and binary forms, with or without
3669 modification, are permitted provided that the following conditions are
3670 met:
3671
3672 =over
3673
3674 =item *
3675
3676 Redistributions of source code must retain the above copyright
3677 notice, this list of conditions and the following disclaimer.
3678
3679 =item *
3680
3681 Redistributions in binary form must reproduce the above copyright
3682 notice, this list of conditions and the following disclaimer in the
3683 documentation and/or other materials provided with the distribution.
3684
3685 =item *
3686
3687 Neither the name "Markdown" nor the names of its contributors may
3688 be used to endorse or promote products derived from this software
3689 without specific prior written permission.
3690
3691 =back
3692
3693 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
3694 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
3695 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
3696 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
3697 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
3698 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
3699 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
3700 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
3701 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
3702 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
3703 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3704
3705 =cut