Markdown.pl

   1 #!/usr/bin/perl
   2
   3 #
   4 # Markdown -- A text-to-HTML conversion tool for web writers
   5 #
   6 # Copyright (C) 2004 John Gruber
   7 # Copyright (C) 2015 Kyle J. McKay
   8 #
   9
  10
  11 package Markdown;
  12 require 5.006_000;
  13 use strict;
  14 use warnings;
  15
  16 use Digest::MD5 qw(md5_hex);
  17 use vars qw($VERSION);
  18 $VERSION = '1.0.3';
  19 # Sun 06 Sep 2015
  20
  21 ## Disabled; causes problems under Perl 5.6.1:
  22 # use utf8;
  23 # binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
  24
  25
  26 #
  27 # Global default settings:
  28 #
  29 my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
  30 my $g_url_prefix = "";                  # Prefixed to non-absolute URLs
  31 my $g_img_prefix = "";                  # Prefixed to non-absolute image URLs
  32 my $g_tab_width = 4;
  33
  34
  35 #
  36 # Globals:
  37 #
  38
  39 # Regex to match balanced [brackets]. See Friedl's
  40 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
  41 my $g_nested_brackets;
  42 $g_nested_brackets = qr{
  43     (?>                             # Atomic matching
  44        [^\[\]]+                         # Anything other than brackets
  45      |
  46        \[
  47          (??{ $g_nested_brackets })     # Recursive set of nested brackets
  48        \]
  49     )*
  50 }x;
  51
  52
  53 # Table of hash values for escaped characters:
  54 my %g_escape_table;
  55 foreach my $char (split //, '\\`*_{}[]()>#+-.!~') {
  56     $g_escape_table{$char} = md5_hex($char);
  57 }
  58
  59
  60 # Global hashes, used by various utility routines
  61 my %g_urls;
  62 my %g_titles;
  63 my %g_html_blocks;
  64
  65 # Used to track when we're inside an ordered or unordered list
  66 # (see _ProcessListItems() for details):
  67 my $g_list_level = 0;
  68
  69
  70 #### Blosxom plug-in interface ##########################################
  71
  72 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
  73 # which posts Markdown should process, using a "meta-markup: markdown"
  74 # header. If it's set to 0 (the default), Markdown will process all
  75 # entries.
  76 my $g_blosxom_use_meta = 0;
  77
  78 sub start { 1; }
  79 sub story {
  80     my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
  81
  82     if ( (! $g_blosxom_use_meta) or
  83          (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
  84          ){
  85             $$body_ref  = Markdown($$body_ref);
  86      }
  87      1;
  88 }
  89
  90
  91 #### Movable Type plug-in interface #####################################
  92 eval {require MT};  # Test to see if we're running in MT.
  93 unless ($@) {
  94     require MT;
  95     import  MT;
  96     require MT::Template::Context;
  97     import  MT::Template::Context;
  98
  99     eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
 100     unless ($@) {
 101         require MT::Plugin;
 102         import  MT::Plugin;
 103         my $plugin = new MT::Plugin({
 104             name => "Markdown",
 105             description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 106             doc_link => 'http://daringfireball.net/projects/markdown/'
 107         });
 108         MT->add_plugin( $plugin );
 109     }
 110
 111     MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 112         my $ctx  = shift;
 113         my $args = shift;
 114         my $builder = $ctx->stash('builder');
 115         my $tokens = $ctx->stash('tokens');
 116
 117         if (defined ($args->{'output'}) ) {
 118             $ctx->stash('markdown_output', lc $args->{'output'});
 119         }
 120
 121         defined (my $str = $builder->build($ctx, $tokens) )
 122             or return $ctx->error($builder->errstr);
 123         $str;       # return value
 124     });
 125
 126     MT->add_text_filter('markdown' => {
 127         label     => 'Markdown',
 128         docs      => 'http://daringfireball.net/projects/markdown/',
 129         on_format => sub {
 130             my $text = shift;
 131             my $ctx  = shift;
 132             my $raw  = 0;
 133             if (defined $ctx) {
 134             my $output = $ctx->stash('markdown_output');
 135                 if (defined $output  &&  $output =~ m/^html/i) {
 136                     $g_empty_element_suffix = ">";
 137                     $ctx->stash('markdown_output', '');
 138                 }
 139                 elsif (defined $output  &&  $output eq 'raw') {
 140                     $raw = 1;
 141                     $ctx->stash('markdown_output', '');
 142                 }
 143                 else {
 144                     $raw = 0;
 145                     $g_empty_element_suffix = " />";
 146                 }
 147             }
 148             $text = $raw ? $text : Markdown($text);
 149             $text;
 150         },
 151     });
 152
 153     # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 154     my $smartypants;
 155
 156     {
 157         no warnings "once";
 158         $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 159     }
 160
 161     if ($smartypants) {
 162         MT->add_text_filter('markdown_with_smartypants' => {
 163             label     => 'Markdown With SmartyPants',
 164             docs      => 'http://daringfireball.net/projects/markdown/',
 165             on_format => sub {
 166                 my $text = shift;
 167                 my $ctx  = shift;
 168                 if (defined $ctx) {
 169                     my $output = $ctx->stash('markdown_output');
 170                     if (defined $output  &&  $output eq 'html') {
 171                         $g_empty_element_suffix = ">";
 172                     }
 173                     else {
 174                         $g_empty_element_suffix = " />";
 175                     }
 176                 }
 177                 $text = Markdown($text);
 178                 $text = $smartypants->($text, '1');
 179             },
 180         });
 181     }
 182 }
 183 else {
 184 #### BBEdit/command-line text filter interface ##########################
 185 # Needs to be hidden from MT (and Blosxom when running in static mode).
 186
 187     # We're only using $blosxom::version once; tell Perl not to warn us:
 188     no warnings 'once';
 189     unless ( defined($blosxom::version) ) {
 190         use warnings;
 191
 192         #### Check for command-line switches: #################
 193         my %cli_opts;
 194         use Getopt::Long;
 195         Getopt::Long::Configure('pass_through');
 196         GetOptions(\%cli_opts,
 197             'help|h',
 198             'version|V|v',
 199             'shortversion|short-version|s',
 200             'html4tags',
 201             'htmlroot|r=s',
 202             'imageroot|i=s',
 203         );
 204         if ($cli_opts{'help'}) {
 205             exec 'perldoc', $0;
 206         }
 207         if ($cli_opts{'version'}) {     # Version info
 208             print "\nThis is Markdown, version $VERSION.\n";
 209             print "Copyright (C) 2004 John Gruber\n";
 210             print "Copyright (C) 2015 Kyle J. McKay\n";
 211             exit 0;
 212         }
 213         if ($cli_opts{'shortversion'}) {        # Just the version number string.
 214             print $VERSION;
 215             exit 0;
 216         }
 217         if ($cli_opts{'html4tags'}) {           # Use HTML tag style instead of XHTML
 218             $g_empty_element_suffix = ">";
 219         }
 220         if ($cli_opts{'htmlroot'}) {            # Use URL prefix
 221             $g_url_prefix = $cli_opts{'htmlroot'};
 222         }
 223         if ($cli_opts{'imageroot'}) {           # Use image URL prefix
 224             $g_img_prefix = $cli_opts{'imageroot'};
 225         }
 226
 227
 228         #### Process incoming text: ###########################
 229         my $text;
 230         {
 231             local $/;               # Slurp the whole file
 232             $text = <>;
 233         }
 234         print Markdown($text);
 235     }
 236 }
 237
 238
 239
 240 sub Markdown {
 241 #
 242 # Main function. The order in which other subs are called here is
 243 # essential. Link and image substitutions need to happen before
 244 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 245 # and <img> tags get encoded.
 246 #
 247     my $text = shift;
 248
 249     # Clear the global hashes. If we don't clear these, you get conflicts
 250     # from other articles when generating a page which contains more than
 251     # one article (e.g. an index page that shows the N most recent
 252     # articles):
 253     %g_urls = ();
 254     %g_titles = ();
 255     %g_html_blocks = ();
 256
 257
 258     # Standardize line endings:
 259     $text =~ s{\r\n}{\n}g;  # DOS to Unix
 260     $text =~ s{\r}{\n}g;    # Mac to Unix
 261
 262     # Make sure $text ends with a couple of newlines:
 263     $text .= "\n\n";
 264
 265     # Convert all tabs to spaces.
 266     $text = _Detab($text);
 267
 268     # Strip any lines consisting only of spaces and tabs.
 269     # This makes subsequent regexen easier to write, because we can
 270     # match consecutive blank lines with /\n+/ instead of something
 271     # contorted like /[ \t]*\n+/ .
 272     $text =~ s/^[ \t]+$//mg;
 273
 274     # Turn block-level HTML blocks into hash entries
 275     $text = _HashHTMLBlocks($text);
 276
 277     # Strip link definitions, store in hashes.
 278     $text = _StripLinkDefinitions($text);
 279
 280     $text = _RunBlockGamut($text);
 281
 282     $text = _UnescapeSpecialChars($text);
 283
 284     return $text . "\n";
 285 }
 286
 287
 288 sub _StripLinkDefinitions {
 289 #
 290 # Strips link definitions from text, stores the URLs and titles in
 291 # hash references.
 292 #
 293     my $text = shift;
 294     my $less_than_tab = $g_tab_width - 1;
 295
 296     # Link defs are in the form: ^[id]: url "optional title"
 297     while ($text =~ s{
 298                         ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
 299                           [ \t]*
 300                           \n?               # maybe *one* newline
 301                           [ \t]*
 302                         <?(\S+?)>?          # url = $2
 303                           [ \t]*
 304                           \n?               # maybe one newline
 305                           [ \t]*
 306                         (?:
 307                             (?<=\s)         # lookbehind for whitespace
 308                             ["(]
 309                             (.+?)           # title = $3
 310                             [")]
 311                             [ \t]*
 312                         )?  # title is optional
 313                         (?:\n+|\Z)
 314                     }
 315                     {}mx) {
 316         $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );    # Link IDs are case-insensitive
 317         if ($3) {
 318             $g_titles{lc $1} = $3;
 319             $g_titles{lc $1} =~ s/"/&quot;/g;
 320         }
 321     }
 322
 323     return $text;
 324 }
 325
 326
 327 sub _HashHTMLBlocks {
 328     my $text = shift;
 329     my $less_than_tab = $g_tab_width - 1;
 330
 331     # Hashify HTML blocks:
 332     # We only want to do this for block-level HTML tags, such as headers,
 333     # lists, and tables. That's because we still want to wrap <p>s around
 334     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 335     # phrase emphasis, and spans. The list of tags we're looking for is
 336     # hard-coded:
 337     my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
 338     my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
 339
 340     # First, look for nested blocks, e.g.:
 341     #   <div>
 342     #       <div>
 343     #       tags for inner block must be indented.
 344     #       </div>
 345     #   </div>
 346     #
 347     # The outermost tags must start at the left margin for this to match, and
 348     # the inner nested divs must be indented.
 349     # We need to do this before the next, more liberal match, because the next
 350     # match will start at the first `<div>` and stop at the first `</div>`.
 351     $text =~ s{
 352                 (                       # save in $1
 353                     ^                   # start of line  (with /m)
 354                     <($block_tags_a)    # start tag = $2
 355                     \b                  # word break
 356                     (.*\n)*?            # any number of lines, minimally matching
 357                     </\2>               # the matching end tag
 358                     [ \t]*              # trailing spaces/tabs
 359                     (?=\n+|\Z)  # followed by a newline or end of document
 360                 )
 361             }{
 362                 my $key = md5_hex($1);
 363                 $g_html_blocks{$key} = $1;
 364                 "\n\n" . $key . "\n\n";
 365             }egmx;
 366
 367
 368     #
 369     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 370     #
 371     $text =~ s{
 372                 (                       # save in $1
 373                     ^                   # start of line  (with /m)
 374                     <($block_tags_b)    # start tag = $2
 375                     \b                  # word break
 376                     (.*\n)*?            # any number of lines, minimally matching
 377                     .*</\2>             # the matching end tag
 378                     [ \t]*              # trailing spaces/tabs
 379                     (?=\n+|\Z)  # followed by a newline or end of document
 380                 )
 381             }{
 382                 my $key = md5_hex($1);
 383                 $g_html_blocks{$key} = $1;
 384                 "\n\n" . $key . "\n\n";
 385             }egmx;
 386     # Special case just for <hr />. It was easier to make a special case than
 387     # to make the other regex more complicated.
 388     $text =~ s{
 389                 (?:
 390                     (?<=\n\n)       # Starting after a blank line
 391                     |               # or
 392                     \A\n?           # the beginning of the doc
 393                 )
 394                 (                       # save in $1
 395                     [ ]{0,$less_than_tab}
 396                     <(hr)               # start tag = $2
 397                     \b                  # word break
 398                     ([^<>])*?           #
 399                     /?>                 # the matching end tag
 400                     [ \t]*
 401                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 402                 )
 403             }{
 404                 my $key = md5_hex($1);
 405                 $g_html_blocks{$key} = $1;
 406                 "\n\n" . $key . "\n\n";
 407             }egx;
 408
 409     # Special case for standalone HTML comments:
 410     $text =~ s{
 411                 (?:
 412                     (?<=\n\n)       # Starting after a blank line
 413                     |               # or
 414                     \A\n?           # the beginning of the doc
 415                 )
 416                 (                       # save in $1
 417                     [ ]{0,$less_than_tab}
 418                     (?s:
 419                         <!
 420                         (--.*?--\s*)+
 421                         >
 422                     )
 423                     [ \t]*
 424                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 425                 )
 426             }{
 427                 my $key = md5_hex($1);
 428                 $g_html_blocks{$key} = $1;
 429                 "\n\n" . $key . "\n\n";
 430             }egx;
 431
 432
 433     return $text;
 434 }
 435
 436
 437 sub _RunBlockGamut {
 438 #
 439 # These are all the transformations that form block-level
 440 # tags like paragraphs, headers, and list items.
 441 #
 442     my $text = shift;
 443
 444     $text = _DoHeaders($text);
 445
 446     # Do Horizontal Rules:
 447     $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 448     $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 449     $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 450
 451     $text = _DoLists($text);
 452
 453     $text = _DoCodeBlocks($text);
 454
 455     $text = _DoBlockQuotes($text);
 456
 457     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
 458     # was to escape raw HTML in the original Markdown source. This time,
 459     # we're escaping the markup we've just created, so that we don't wrap
 460     # <p> tags around block-level tags.
 461     $text = _HashHTMLBlocks($text);
 462
 463     $text = _FormParagraphs($text);
 464
 465     return $text;
 466 }
 467
 468
 469 sub _RunSpanGamut {
 470 #
 471 # These are all the transformations that occur *within* block-level
 472 # tags like paragraphs, headers, and list items.
 473 #
 474     my $text = shift;
 475
 476     $text = _DoCodeSpans($text);
 477
 478     $text = _EscapeSpecialChars($text);
 479
 480     # Process anchor and image tags. Images must come first,
 481     # because ![foo][f] looks like an anchor.
 482     $text = _DoImages($text);
 483     $text = _DoAnchors($text);
 484
 485     # Make links out of things like `<http://example.com/>`
 486     # Must come after _DoAnchors(), because you can use < and >
 487     # delimiters in inline links like [this](<url>).
 488     $text = _DoAutoLinks($text);
 489
 490     $text = _EncodeAmpsAndAngles($text);
 491
 492     $text = _DoItalicsAndBoldAndStrike($text);
 493
 494     # Do hard breaks:
 495     $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
 496
 497     return $text;
 498 }
 499
 500
 501 sub _EscapeSpecialChars {
 502     my $text = shift;
 503     my $tokens ||= _TokenizeHTML($text);
 504
 505     $text = '';   # rebuild $text from the tokens
 506 #   my $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
 507 #   my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
 508
 509     foreach my $cur_token (@$tokens) {
 510         if ($cur_token->[0] eq "tag") {
 511             # Within tags, encode *, _ and ~ so they don't conflict
 512             # with their use in Markdown for italics and strong.
 513             # We're replacing each such character with its
 514             # corresponding MD5 checksum value; this is likely
 515             # overkill, but it should prevent us from colliding
 516             # with the escape values by accident.
 517             $cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
 518             $cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
 519             $cur_token->[1] =~  s! ~  !$g_escape_table{'~'}!gx;
 520             $text .= $cur_token->[1];
 521         } else {
 522             my $t = $cur_token->[1];
 523             $t = _EncodeBackslashEscapes($t);
 524             $text .= $t;
 525         }
 526     }
 527     return $text;
 528 }
 529
 530
 531 sub _DoAnchors {
 532 #
 533 # Turn Markdown link shortcuts into XHTML <a> tags.
 534 #
 535     my $text = shift;
 536
 537     #
 538     # First, handle reference-style links: [link text] [id]
 539     #
 540     $text =~ s{
 541         (                   # wrap whole match in $1
 542           \[
 543             ($g_nested_brackets)    # link text = $2
 544           \]
 545
 546           [ ]?              # one optional space
 547           (?:\n[ ]*)?       # one optional newline followed by spaces
 548
 549           \[
 550             (.*?)       # id = $3
 551           \]
 552         )
 553     }{
 554         my $result;
 555         my $whole_match = $1;
 556         my $link_text   = $2;
 557         my $link_id     = lc $3;
 558
 559         if ($link_id eq "") {
 560             $link_id = lc $link_text;     # for shortcut links like [this][].
 561         }
 562
 563         if (defined $g_urls{$link_id}) {
 564             my $url = _PrefixURL($g_urls{$link_id});
 565             $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
 566             $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics, bold
 567             $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # and strike through.
 568             $result = "<a href=\"$url\"";
 569             if ( defined $g_titles{$link_id} ) {
 570                 my $title = $g_titles{$link_id};
 571                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 572                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 573                 $title =~ s!  ~ !$g_escape_table{'~'}!gx;
 574                 $result .=  " title=\"$title\"";
 575             }
 576             $result .= ">$link_text</a>";
 577         }
 578         else {
 579             $result = $whole_match;
 580         }
 581         $result;
 582     }xsge;
 583
 584     #
 585     # Next, inline-style links: [link text](url "optional title")
 586     #
 587     $text =~ s{
 588         (               # wrap whole match in $1
 589           \[
 590             ($g_nested_brackets)    # link text = $2
 591           \]
 592           \(            # literal paren
 593             [ \t]*
 594             <?(.*?)>?   # href = $3
 595             [ \t]*
 596             (           # $4
 597               (['"])    # quote char = $5
 598               (.*?)     # Title = $6
 599               \5        # matching quote
 600             )?          # title is optional
 601           \)
 602         )
 603     }{
 604         my $result;
 605         my $whole_match = $1;
 606         my $link_text   = $2;
 607         my $url     = $3;
 608         my $title   = $6;
 609
 610         $url = _PrefixURL($url);
 611         $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
 612         $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics, bold
 613         $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # and strike through.
 614         $result = "<a href=\"$url\"";
 615
 616         if (defined $title) {
 617             $title =~ s/"/&quot;/g;
 618             $title =~ s! \* !$g_escape_table{'*'}!gx;
 619             $title =~ s!  _ !$g_escape_table{'_'}!gx;
 620             $title =~ s!  ~ !$g_escape_table{'~'}!gx;
 621             $result .=  " title=\"$title\"";
 622         }
 623
 624         $result .= ">$link_text</a>";
 625
 626         $result;
 627     }xsge;
 628
 629     return $text;
 630 }
 631
 632
 633 sub _DoImages {
 634 #
 635 # Turn Markdown image shortcuts into <img> tags.
 636 #
 637     my $text = shift;
 638
 639     #
 640     # First, handle reference-style labeled images: ![alt text][id]
 641     #
 642     $text =~ s{
 643         (               # wrap whole match in $1
 644           !\[
 645             (.*?)       # alt text = $2
 646           \]
 647
 648           [ ]?              # one optional space
 649           (?:\n[ ]*)?       # one optional newline followed by spaces
 650
 651           \[
 652             (.*?)       # id = $3
 653           \]
 654
 655         )
 656     }{
 657         my $result;
 658         my $whole_match = $1;
 659         my $alt_text    = $2;
 660         my $link_id     = lc $3;
 661
 662         if ($link_id eq "") {
 663             $link_id = lc $alt_text;     # for shortcut links like ![this][].
 664         }
 665
 666         $alt_text =~ s/"/&quot;/g;
 667         if (defined $g_urls{$link_id}) {
 668             my $url = _PrefixURL($g_urls{$link_id});
 669             $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
 670             $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics, bold
 671             $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # and strike through.
 672             $result = "<img src=\"$url\" alt=\"$alt_text\"";
 673             if (defined $g_titles{$link_id}) {
 674                 my $title = $g_titles{$link_id};
 675                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 676                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 677                 $title =~ s!  ~ !$g_escape_table{'~'}!gx;
 678                 $result .=  " title=\"$title\"";
 679             }
 680             $result .= $g_empty_element_suffix;
 681         }
 682         else {
 683             # If there's no such link ID, leave intact:
 684             $result = $whole_match;
 685         }
 686
 687         $result;
 688     }xsge;
 689
 690     #
 691     # Next, handle inline images:  ![alt text](url "optional title")
 692     # Don't forget: encode * and _
 693
 694     $text =~ s{
 695         (               # wrap whole match in $1
 696           !\[
 697             (.*?)       # alt text = $2
 698           \]
 699           \(            # literal paren
 700             [ \t]*
 701             <?(\S+?)>?  # src url = $3
 702             [ \t]*
 703             (           # $4
 704               (['"])    # quote char = $5
 705               (.*?)     # title = $6
 706               \5        # matching quote
 707               [ \t]*
 708             )?          # title is optional
 709           \)
 710         )
 711     }{
 712         my $result;
 713         my $whole_match = $1;
 714         my $alt_text    = $2;
 715         my $url     = $3;
 716         my $title   = '';
 717         if (defined($6)) {
 718             $title  = $6;
 719         }
 720
 721         $url = _PrefixURL($url);
 722         $alt_text =~ s/"/&quot;/g;
 723         $title    =~ s/"/&quot;/g;
 724         $url =~ s! \* !$g_escape_table{'*'}!gx;     # We've got to encode these to avoid
 725         $url =~ s!  _ !$g_escape_table{'_'}!gx;     # conflicting with italics, bold
 726         $url =~ s!  ~ !$g_escape_table{'~'}!gx;     # and strike through.
 727         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 728         if (defined $title) {
 729             $title =~ s! \* !$g_escape_table{'*'}!gx;
 730             $title =~ s!  _ !$g_escape_table{'_'}!gx;
 731             $title =~ s!  ~ !$g_escape_table{'~'}!gx;
 732             $result .=  " title=\"$title\"";
 733         }
 734         $result .= $g_empty_element_suffix;
 735
 736         $result;
 737     }xsge;
 738
 739     return $text;
 740 }
 741
 742
 743 sub _DoHeaders {
 744     my $text = shift;
 745
 746     # Setext-style headers:
 747     #     Header 1
 748     #     ========
 749     #
 750     #     Header 2
 751     #     --------
 752     #
 753     #     Header 3
 754     #     ~~~~~~~~
 755     #
 756     $text =~ s{ ^(?:=+[ \t]*\n)?(.+)[ \t]*\n=+[ \t]*\n+ }{
 757         "<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n";
 758     }egmx;
 759
 760     $text =~ s{ ^(?:-+[ \t]*\n)?(.+)[ \t]*\n-+[ \t]*\n+ }{
 761         "<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n";
 762     }egmx;
 763
 764     $text =~ s{ ^(?:~+[ \t]*\n)?(.+)[ \t]*\n~+[ \t]*\n+ }{
 765         "<h3>"  .  _RunSpanGamut($1)  .  "</h3>\n\n";
 766     }egmx;
 767
 768
 769     # atx-style headers:
 770     #   # Header 1
 771     #   ## Header 2
 772     #   ## Header 2 with closing hashes ##
 773     #   ...
 774     #   ###### Header 6
 775     #
 776     $text =~ s{
 777             ^(\#{1,6})  # $1 = string of #'s
 778             [ \t]*
 779             (.+?)       # $2 = Header text
 780             [ \t]*
 781             \#*         # optional closing #'s (not counted)
 782             \n+
 783         }{
 784             my $h_level = length($1);
 785             "<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n";
 786         }egmx;
 787
 788     return $text;
 789 }
 790
 791
 792 sub _DoLists {
 793 #
 794 # Form HTML ordered (numbered) and unordered (bulleted) lists.
 795 #
 796     my $text = shift;
 797     my $less_than_tab = $g_tab_width - 1;
 798
 799     # Re-usable patterns to match list item bullets and number markers:
 800     my $marker_ul  = qr/[*+-]/;
 801     my $marker_ol  = qr/\d+[.]/;
 802     my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
 803
 804     # Re-usable pattern to match any entirel ul or ol list:
 805     my $whole_list = qr{
 806         (                               # $1 = whole list
 807           (                             # $2
 808             [ ]{0,$less_than_tab}
 809             (${marker_any})             # $3 = first list item marker
 810             [ \t]+
 811           )
 812           (?s:.+?)
 813           (                             # $4
 814               \z
 815             |
 816               \n{2,}
 817               (?=\S)
 818               (?!                       # Negative lookahead for another list item marker
 819                 [ \t]*
 820                 ${marker_any}[ \t]+
 821               )
 822           )
 823         )
 824     }mx;
 825
 826     # We use a different prefix before nested lists than top-level lists.
 827     # See extended comment in _ProcessListItems().
 828     #
 829     # Note: There's a bit of duplication here. My original implementation
 830     # created a scalar regex pattern as the conditional result of the test on
 831     # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
 832     # substitution once, using the scalar as the pattern. This worked,
 833     # everywhere except when running under MT on my hosting account at Pair
 834     # Networks. There, this caused all rebuilds to be killed by the reaper (or
 835     # perhaps they crashed, but that seems incredibly unlikely given that the
 836     # same script on the same server ran fine *except* under MT. I've spent
 837     # more time trying to figure out why this is happening than I'd like to
 838     # admit. My only guess, backed up by the fact that this workaround works,
 839     # is that Perl optimizes the substition when it can figure out that the
 840     # pattern will never change, and when this optimization isn't on, we run
 841     # afoul of the reaper. Thus, the slightly redundant code to that uses two
 842     # static s/// patterns rather than one conditional pattern.
 843
 844     if ($g_list_level) {
 845         $text =~ s{
 846                 ^
 847                 $whole_list
 848             }{
 849                 my $list = $1;
 850                 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
 851                 # Turn double returns into triple returns, so that we can make a
 852                 # paragraph for the last item in a list, if necessary:
 853                 $list =~ s/\n{2,}/\n\n\n/g;
 854                 my $result = _ProcessListItems($list, $marker_any);
 855                 $result = "<$list_type>\n" . $result . "</$list_type>\n";
 856                 $result;
 857             }egmx;
 858     }
 859     else {
 860         $text =~ s{
 861                 (?:(?<=\n\n)|\A\n?)
 862                 $whole_list
 863             }{
 864                 my $list = $1;
 865                 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
 866                 # Turn double returns into triple returns, so that we can make a
 867                 # paragraph for the last item in a list, if necessary:
 868                 $list =~ s/\n{2,}/\n\n\n/g;
 869                 my $result = _ProcessListItems($list, $marker_any);
 870                 $result = "<$list_type>\n" . $result . "</$list_type>\n";
 871                 $result;
 872             }egmx;
 873     }
 874
 875
 876     return $text;
 877 }
 878
 879
 880 sub _ProcessListItems {
 881 #
 882 #   Process the contents of a single ordered or unordered list, splitting it
 883 #   into individual list items.
 884 #
 885
 886     my $list_str = shift;
 887     my $marker_any = shift;
 888
 889
 890     # The $g_list_level global keeps track of when we're inside a list.
 891     # Each time we enter a list, we increment it; when we leave a list,
 892     # we decrement. If it's zero, we're not in a list anymore.
 893     #
 894     # We do this because when we're not inside a list, we want to treat
 895     # something like this:
 896     #
 897     #       I recommend upgrading to version
 898     #       8. Oops, now this line is treated
 899     #       as a sub-list.
 900     #
 901     # As a single paragraph, despite the fact that the second line starts
 902     # with a digit-period-space sequence.
 903     #
 904     # Whereas when we're inside a list (or sub-list), that line will be
 905     # treated as the start of a sub-list. What a kludge, huh? This is
 906     # an aspect of Markdown's syntax that's hard to parse perfectly
 907     # without resorting to mind-reading. Perhaps the solution is to
 908     # change the syntax rules such that sub-lists must start with a
 909     # starting cardinal number; e.g. "1." or "a.".
 910
 911     $g_list_level++;
 912
 913     # trim trailing blank lines:
 914     $list_str =~ s/\n{2,}\z/\n/;
 915
 916
 917     $list_str =~ s{
 918         (\n)?                           # leading line = $1
 919         (^[ \t]*)                       # leading whitespace = $2
 920         ($marker_any) [ \t]+            # list marker = $3
 921         ((?s:.+?)                       # list item text   = $4
 922         (\n{1,2}))
 923         (?= \n* (\z | \2 ($marker_any) [ \t]+))
 924     }{
 925         my $item = $4;
 926         my $leading_line = $1;
 927         my $leading_space = $2;
 928
 929         if ($leading_line or ($item =~ m/\n{2,}/)) {
 930             $item = _RunBlockGamut(_Outdent($item));
 931         }
 932         else {
 933             # Recursion for sub-lists:
 934             $item = _DoLists(_Outdent($item));
 935             chomp $item;
 936             $item = _RunSpanGamut($item);
 937         }
 938
 939         "<li>" . $item . "</li>\n";
 940     }egmx;
 941
 942     $g_list_level--;
 943     return $list_str;
 944 }
 945
 946
 947
 948 sub _DoCodeBlocks {
 949 #
 950 #   Process Markdown `<pre><code>` blocks.
 951 #
 952
 953     my $text = shift;
 954
 955     $text =~ s{
 956             (?:\n\n|\A)
 957             (               # $1 = the code block -- one or more lines, starting with a space/tab
 958               (?:
 959                 (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
 960                 .*\n+
 961               )+
 962             )
 963             ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
 964         }{
 965             my $codeblock = $1;
 966             my $result; # return value
 967
 968             $codeblock = _EncodeCode(_Outdent($codeblock));
 969             $codeblock = _Detab($codeblock);
 970             $codeblock =~ s/\A\n+//; # trim leading newlines
 971             $codeblock =~ s/\s+\z//; # trim trailing whitespace
 972
 973             $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
 974
 975             $result;
 976         }egmx;
 977
 978     $text =~ s{
 979             (?:\n|\A)
 980                 ``(`+)[ \t]*(?:[\w.-]+[ \t]*)?\n
 981             (               # $1 = the code block -- one or more lines, starting with ```
 982               (?:
 983                 .*\n+
 984               )+?
 985             )
 986             (?:(?:``\1[ \t]*(?:\n|\Z))|\Z) # and ending with ``` or end of document
 987         }{
 988             my $codeblock = $2;
 989             my $result; # return value
 990
 991             $codeblock = _EncodeCode($codeblock);
 992             $codeblock = _Detab($codeblock);
 993             $codeblock =~ s/\A\n+//; # trim leading newlines
 994             $codeblock =~ s/\s+\z//; # trim trailing whitespace
 995
 996             $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
 997
 998             $result;
 999         }egmx;
1000
1001     return $text;
1002 }
1003
1004
1005 sub _DoCodeSpans {
1006 #
1007 #   *   Backtick quotes are used for <code></code> spans.
1008 #
1009 #   *   You can use multiple backticks as the delimiters if you want to
1010 #       include literal backticks in the code span. So, this input:
1011 #
1012 #         Just type ``foo `bar` baz`` at the prompt.
1013 #
1014 #   Will translate to:
1015 #
1016 #         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1017 #
1018 #       There's no arbitrary limit to the number of backticks you
1019 #       can use as delimters. If you need three consecutive backticks
1020 #       in your code, use four for delimiters, etc.
1021 #
1022 #   *   You can use spaces to get literal backticks at the edges:
1023 #
1024 #         ... type `` `bar` `` ...
1025 #
1026 #   Turns to:
1027 #
1028 #         ... type <code>`bar`</code> ...
1029 #
1030
1031     my $text = shift;
1032
1033     $text =~ s@
1034             (`+)        # $1 = Opening run of `
1035             (.+?)       # $2 = The code block
1036             (?<!`)
1037             \1          # Matching closer
1038             (?!`)
1039         @
1040             my $c = "$2";
1041             $c =~ s/^[ \t]*//g; # leading whitespace
1042             $c =~ s/[ \t]*$//g; # trailing whitespace
1043             $c = _EncodeCode($c);
1044             "<code>$c</code>";
1045         @egsx;
1046
1047     return $text;
1048 }
1049
1050
1051 sub _EncodeCode {
1052 #
1053 # Encode/escape certain characters inside Markdown code runs.
1054 # The point is that in code, these characters are literals,
1055 # and lose their special Markdown meanings.
1056 #
1057     local $_ = shift;
1058
1059     # Encode all ampersands; HTML entities are not
1060     # entities within a Markdown code span.
1061     s/&/&amp;/g;
1062
1063     # Encode $'s, but only if we're running under Blosxom.
1064     # (Blosxom interpolates Perl variables in article bodies.)
1065     {
1066         no warnings 'once';
1067     if (defined($blosxom::version)) {
1068         s/\$/&#036;/g;
1069     }
1070     }
1071
1072
1073     # Do the angle bracket song and dance:
1074     s! <  !&lt;!gx;
1075     s! >  !&gt;!gx;
1076
1077     # Now, escape characters that are magic in Markdown:
1078     s! \* !$g_escape_table{'*'}!gx;
1079     s! _  !$g_escape_table{'_'}!gx;
1080     s! ~  !$g_escape_table{'~'}!gx;
1081     s! {  !$g_escape_table{'{'}!gx;
1082     s! }  !$g_escape_table{'}'}!gx;
1083     s! \[ !$g_escape_table{'['}!gx;
1084     s! \] !$g_escape_table{']'}!gx;
1085     s! \\ !$g_escape_table{'\\'}!gx;
1086
1087     return $_;
1088 }
1089
1090
1091 sub _DoItalicsAndBoldAndStrike {
1092     my $text = shift;
1093
1094     # <strong> must go first:
1095     $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
1096         {<strong>$1</strong>}gsx;
1097     $text =~ s{ (?<!\w) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\w) }
1098         {<strong>$1</strong>}gsx;
1099
1100     $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
1101         {<strike>$1</strike>}gsx;
1102
1103     $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
1104         {<em>$1</em>}gsx;
1105     $text =~ s{ (?<!\w) _ (?=\S) (.+?) (?<=\S) _ (?!\w) }
1106         {<em>$1</em>}gsx;
1107
1108     return $text;
1109 }
1110
1111
1112 sub _DoBlockQuotes {
1113     my $text = shift;
1114
1115     $text =~ s{
1116           (                             # Wrap whole match in $1
1117             (
1118               ^[ \t]*>[ \t]?            # '>' at the start of a line
1119                 .+\n                    # rest of the first line
1120               (.+\n)*                   # subsequent consecutive lines
1121               \n*                       # blanks
1122             )+
1123           )
1124         }{
1125             my $bq = $1;
1126             $bq =~ s/^[ \t]*>[ \t]?//gm;    # trim one level of quoting
1127             $bq =~ s/^[ \t]+$//mg;          # trim whitespace-only lines
1128             $bq = _RunBlockGamut($bq);      # recurse
1129
1130             $bq =~ s/^/  /g;
1131             # These leading spaces screw with <pre> content, so we need to fix that:
1132             $bq =~ s{
1133                     (\s*<pre>.+?</pre>)
1134                 }{
1135                     my $pre = $1;
1136                     $pre =~ s/^  //mg;
1137                     $pre;
1138                 }egsx;
1139
1140             "<blockquote>\n$bq\n</blockquote>\n\n";
1141         }egmx;
1142
1143
1144     return $text;
1145 }
1146
1147
1148 sub _FormParagraphs {
1149 #
1150 #   Params:
1151 #       $text - string to process with html <p> tags
1152 #
1153     my $text = shift;
1154
1155     # Strip leading and trailing lines:
1156     $text =~ s/\A\n+//;
1157     $text =~ s/\n+\z//;
1158
1159     my @grafs = split(/\n{2,}/, $text);
1160
1161     #
1162     # Wrap <p> tags.
1163     #
1164     foreach (@grafs) {
1165         unless (defined( $g_html_blocks{$_} )) {
1166             $_ = _RunSpanGamut($_);
1167             s/^([ \t]*)/<p>/;
1168             $_ .= "</p>";
1169         }
1170     }
1171
1172     #
1173     # Unhashify HTML blocks
1174     #
1175     foreach (@grafs) {
1176         if (defined( $g_html_blocks{$_} )) {
1177             $_ = $g_html_blocks{$_};
1178         }
1179     }
1180
1181     return join "\n\n", @grafs;
1182 }
1183
1184
1185 sub _EncodeAmpsAndAngles {
1186 # Smart processing for ampersands and angle brackets that need to be encoded.
1187
1188     my $text = shift;
1189
1190     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1191     #   http://bumppo.net/projects/amputator/
1192     $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
1193
1194     # Encode naked <'s
1195     $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
1196
1197     return $text;
1198 }
1199
1200
1201 sub _EncodeBackslashEscapes {
1202 #
1203 #   Parameter:  String.
1204 #   Returns:    The string, with after processing the following backslash
1205 #               escape sequences.
1206 #
1207     local $_ = shift;
1208
1209     s! \\\\  !$g_escape_table{'\\'}!gx;     # Must process escaped backslashes first.
1210     s! \\`   !$g_escape_table{'`'}!gx;
1211     s! \\\*  !$g_escape_table{'*'}!gx;
1212     s! \\_   !$g_escape_table{'_'}!gx;
1213     s! \\~   !$g_escape_table{'~'}!gx;
1214     s! \\\{  !$g_escape_table{'{'}!gx;
1215     s! \\\}  !$g_escape_table{'}'}!gx;
1216     s! \\\[  !$g_escape_table{'['}!gx;
1217     s! \\\]  !$g_escape_table{']'}!gx;
1218     s! \\\(  !$g_escape_table{'('}!gx;
1219     s! \\\)  !$g_escape_table{')'}!gx;
1220     s! \\>   !$g_escape_table{'>'}!gx;
1221     s! \\\#  !$g_escape_table{'#'}!gx;
1222     s! \\\+  !$g_escape_table{'+'}!gx;
1223     s! \\\-  !$g_escape_table{'-'}!gx;
1224     s! \\\.  !$g_escape_table{'.'}!gx;
1225     s{ \\!  }{$g_escape_table{'!'}}gx;
1226
1227     return $_;
1228 }
1229
1230
1231 sub _DoAutoLinks {
1232     my $text = shift;
1233
1234     $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1235
1236     # Email addresses: <address@domain.foo>
1237     $text =~ s{
1238         <
1239         (?:mailto:)?
1240         (
1241             [-.\w]+
1242             \@
1243             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1244         )
1245         >
1246     }{
1247         _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1248     }egix;
1249
1250     return $text;
1251 }
1252
1253
1254 sub _EncodeEmailAddress {
1255 #
1256 #   Input: an email address, e.g. "foo@example.com"
1257 #
1258 #   Output: the email address as a mailto link, with each character
1259 #       of the address encoded as either a decimal or hex entity, in
1260 #       the hopes of foiling most address harvesting spam bots. E.g.:
1261 #
1262 #     <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1263 #       x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1264 #       &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1265 #
1266 #   Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1267 #   mailing list: <http://tinyurl.com/yu7ue>
1268 #
1269
1270     my $addr = shift;
1271
1272     srand;
1273     my @encode = (
1274         sub { '&#' .                 ord(shift)   . ';' },
1275         sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1276         sub {                            shift          },
1277     );
1278
1279     $addr = "mailto:" . $addr;
1280
1281     $addr =~ s{(.)}{
1282         my $char = $1;
1283         if ( $char eq '@' ) {
1284             # this *must* be encoded. I insist.
1285             $char = $encode[int rand 1]->($char);
1286         } elsif ( $char ne ':' ) {
1287             # leave ':' alone (to spot mailto: later)
1288             my $r = rand;
1289             # roughly 10% raw, 45% hex, 45% dec
1290             $char = (
1291                 $r > .9   ?  $encode[2]->($char)  :
1292                 $r < .45  ?  $encode[1]->($char)  :
1293                              $encode[0]->($char)
1294             );
1295         }
1296         $char;
1297     }gex;
1298
1299     $addr = qq{<a href="$addr">$addr</a>};
1300     $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1301
1302     return $addr;
1303 }
1304
1305
1306 sub _UnescapeSpecialChars {
1307 #
1308 # Swap back in all the special characters we've hidden.
1309 #
1310     my $text = shift;
1311
1312     while( my($char, $hash) = each(%g_escape_table) ) {
1313         $text =~ s/$hash/$char/g;
1314     }
1315     return $text;
1316 }
1317
1318
1319 sub _TokenizeHTML {
1320 #
1321 #   Parameter:  String containing HTML markup.
1322 #   Returns:    Reference to an array of the tokens comprising the input
1323 #               string. Each token is either a tag (possibly with nested,
1324 #               tags contained therein, such as <a href="<MTFoo>">, or a
1325 #               run of text between tags. Each element of the array is a
1326 #               two-element array; the first is either 'tag' or 'text';
1327 #               the second is the actual value.
1328 #
1329 #
1330 #   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1331 #       <http://www.bradchoate.com/past/mtregex.php>
1332 #
1333
1334     my $str = shift;
1335     my $pos = 0;
1336     my $len = length $str;
1337     my @tokens;
1338
1339     my $depth = 6;
1340     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
1341     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
1342                    (?s: <\? .*? \?> ) |              # processing instruction
1343                    $nested_tags/ix;                   # nested tags
1344
1345     while ($str =~ m/($match)/g) {
1346         my $whole_tag = $1;
1347         my $sec_start = pos $str;
1348         my $tag_start = $sec_start - length $whole_tag;
1349         if ($pos < $tag_start) {
1350             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1351         }
1352         push @tokens, ['tag', $whole_tag];
1353         $pos = pos $str;
1354     }
1355     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1356     \@tokens;
1357 }
1358
1359
1360 sub _Outdent {
1361 #
1362 # Remove one level of line-leading tabs or spaces
1363 #
1364     my $text = shift;
1365
1366     $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1367     return $text;
1368 }
1369
1370
1371 sub _Detab {
1372 #
1373 # Cribbed from a post by Bart Lateur:
1374 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1375 #
1376     my $text = shift;
1377
1378     $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1379     return $text;
1380 }
1381
1382
1383 sub _PrefixURL {
1384 #
1385 # Add URL prefix if needed
1386 #
1387     my $url = shift;
1388
1389     return $url unless $g_url_prefix ne '' || $g_img_prefix ne '';
1390     return $url if $url =~ m,^//, || $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/;
1391     my $ans = $g_url_prefix;
1392     $ans = $g_img_prefix
1393         if $g_img_prefix ne '' && $url =~ /\.(?:png|gif|jpe?g|svg?z)$/i;
1394     return $url unless $ans ne '';
1395     $ans .= '/' if substr($ans, -1, 1) ne '/';
1396     $ans .= substr($url, 0, 1) eq '/' ? substr($url, 1) : $url;
1397     return $ans;
1398 }
1399
1400
1401 1;
1402
1403 __END__
1404
1405
1406 =pod
1407
1408 =head1 NAME
1409
1410 B<Markdown>
1411
1412
1413 =head1 SYNOPSIS
1414
1415 B<Markdown.pl> [ B<--help> ] [ B<--html4tags> ] [ B<--htmlroot>=I<prefix> ]
1416     [ B<--imageroot>=I<prefix> ] [ B<--version> ] [ B<--shortversion> ]
1417     [ I<file> ... ]
1418
1419
1420 =head1 DESCRIPTION
1421
1422 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1423 easy-to-write structured text format into HTML. Markdown's text format
1424 is most similar to that of plain text email, and supports features such
1425 as headers, *emphasis*, code blocks, blockquotes, and links.
1426
1427 Markdown's syntax is designed not as a generic markup language, but
1428 specifically to serve as a front-end to (X)HTML. You can  use span-level
1429 HTML tags anywhere in a Markdown document, and you can use block level
1430 HTML tags (like <div> and <table> as well).
1431
1432 For more information about Markdown's syntax, see the `basics.text`
1433 and `syntax.text` files included with `Markdown.pl`.
1434
1435
1436 =head1 OPTIONS
1437
1438 Use "--" to end switch parsing. For example, to open a file named "-z", use:
1439
1440     Markdown.pl -- -z
1441
1442 =over 4
1443
1444
1445 =item B<--html4tags>
1446
1447 Use HTML 4 style for empty element tags, e.g.:
1448
1449     <br>
1450
1451 instead of Markdown's default XHTML style tags, e.g.:
1452
1453     <br />
1454
1455
1456 =item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
1457
1458 Any non-absolute URLs have I<prefix> prepended.
1459
1460
1461 =item B<-i> I<prefix>, B<--imageroot>=I<prefix>
1462
1463 Any non-absolute URLs have I<prefix> prepended (overriding the B<-r> prefix
1464 if any) but only if they end in an image suffix.
1465
1466
1467 =item B<-V>, B<--version>
1468
1469 Display Markdown's version number and copyright information.
1470
1471
1472 =item B<-s>, B<--shortversion>
1473
1474 Display the short-form version number.
1475
1476
1477 =item B<-h>, B<--help>
1478
1479 Display Markdown's help.
1480
1481
1482 =back
1483
1484
1485 =head1 VERSION HISTORY
1486
1487 See the readme file for detailed release notes for this version.
1488
1489 1.0.3 - 06 Sep 2015
1490
1491 1.0.2 - 03 Sep 2015
1492
1493 1.0.1 - 14 Dec 2004
1494
1495 1.0 - 28 Aug 2004
1496
1497
1498 =head1 AUTHORS
1499
1500     John Gruber
1501     http://daringfireball.net
1502     http://daringfireball.net/projects/markdown/
1503
1504     PHP port and other contributions by Michel Fortin
1505     http://michelf.com
1506
1507     Additional enhancements and tweaks by Kyle J. McKay
1508     mackyle<at>gmail.com
1509
1510
1511 =head1 COPYRIGHT AND LICENSE
1512
1513  Copyright (C) 2003-2004 John Gruber
1514  Copyright (C) 2015 Kyle J. McKay
1515  All rights reserved.
1516
1517 Redistribution and use in source and binary forms, with or without
1518 modification, are permitted provided that the following conditions are
1519 met:
1520
1521 * Redistributions of source code must retain the above copyright
1522   notice, this list of conditions and the following disclaimer.
1523
1524 * Redistributions in binary form must reproduce the above copyright
1525   notice, this list of conditions and the following disclaimer in the
1526   documentation and/or other materials provided with the distribution.
1527
1528 * Neither the name "Markdown" nor the names of its contributors may
1529   be used to endorse or promote products derived from this software
1530   without specific prior written permission.
1531
1532 This software is provided by the copyright holders and contributors "as
1533 is" and any express or implied warranties, including, but not limited
1534 to, the implied warranties of merchantability and fitness for a
1535 particular purpose are disclaimed. In no event shall the copyright owner
1536 or contributors be liable for any direct, indirect, incidental, special,
1537 exemplary, or consequential damages (including, but not limited to,
1538 procurement of substitute goods or services; loss of use, data, or
1539 profits; or business interruption) however caused and on any theory of
1540 liability, whether in contract, strict liability, or tort (including
1541 negligence or otherwise) arising in any way out of the use of this
1542 software, even if advised of the possibility of such damage.
1543
1544 =cut