Markdown.pl: next version is 1.1.5
[markdown.git] / Markdown.pl
blobd0c13d46e7fd8fd5689d998660619cdc0b0e2b25
1 #!/usr/bin/env perl
4 # Markdown -- A text-to-HTML conversion tool for web writers
6 # Copyright (C) 2004 John Gruber
7 # Copyright (C) 2015,2016,2017 Kyle J. McKay
8 # All rights reserved.
9 # License is Modified BSD (aka 3-clause BSD) License\n";
10 # See LICENSE file (or <https://opensource.org/licenses/BSD-3-Clause>)
13 package Markdown;
15 require 5.008;
16 use strict;
17 use warnings;
19 use Encode;
21 use vars qw($COPYRIGHT $VERSION @ISA @EXPORT_OK);
23 BEGIN {*COPYRIGHT =
24 \"Copyright (C) 2004 John Gruber
25 Copyright (C) 2015,2016,2017 Kyle J. McKay
26 All rights reserved.
28 *VERSION = \"1.1.5-PRE"
31 require Exporter;
32 use Digest::MD5 qw(md5 md5_hex);
33 use File::Basename qw(basename);
34 use Scalar::Util qw(refaddr looks_like_number);
35 use Pod::Usage;
36 @ISA = qw(Exporter);
37 @EXPORT_OK = qw(Markdown);
38 $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'};
40 close(DATA) if fileno(DATA);
41 exit(&_main(@ARGV)||0) unless caller;
43 my $encoder;
44 BEGIN {
45 $encoder = Encode::find_encoding('Windows-1252') ||
46 Encode::find_encoding('ISO-8859-1') or
47 die "failed to load ISO-8859-1 encoder\n";
51 # Global default settings:
53 my ($g_style_prefix, $g_empty_element_suffix, $g_indent_width, $g_tab_width);
54 BEGIN {
55 $g_style_prefix = "_markdown-"; # Prefix for markdown css class styles
56 $g_empty_element_suffix = " />"; # Change to ">" for HTML output
57 $g_indent_width = 4; # Number of spaces considered new level
58 $g_tab_width = 4; # Legacy even though it's wrong
63 # Globals:
66 # Style sheet template
67 my $g_style_sheet;
69 # Permanent block id table
70 my %g_perm_block_ids;
72 # Global hashes, used by various utility routines
73 my %g_urls;
74 my %g_titles;
75 my %g_anchors;
76 my %g_block_ids;
77 my %g_html_blocks;
78 my %g_code_blocks;
79 my %opt;
81 # Return a "block id" to use to identify the block that does not contain
82 # any characters that could be misinterpreted by the rest of the code
83 # Originally this used md5_hex but that's unnecessarily slow
84 # Instead just use the refaddr of the scalar ref of the entry for that
85 # key in either the global or, if the optional second argument is true,
86 # permanent table. To avoid the result being confused with anything
87 # else, it's prefixed with a control character and suffixed with another
88 # both of which are not allowed by the XML standard or Unicode.
89 sub block_id {
90 $_[1] ?
91 "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" :
92 "\5".refaddr(\$g_block_ids{$_[0]})."\6";
95 # Regex to match balanced [brackets]. See Friedl's
96 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
97 my $g_nested_brackets;
98 BEGIN {
99 $g_nested_brackets = qr{
100 (?> # Atomic matching
101 [^\[\]]+ # Anything other than brackets
104 (??{ $g_nested_brackets }) # Recursive set of nested brackets
111 # Table of hash values for escaped characters:
112 my %g_escape_table;
113 BEGIN {
114 foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") {
115 $g_escape_table{$char} = block_id($char,1);
119 # Used to track when we're inside an ordered or unordered list
120 # (see _ProcessListItems() for details):
121 my $g_list_level;
122 BEGIN {
123 $g_list_level = 0;
127 #### Blosxom plug-in interface ##########################################
128 my $_haveBX;
129 BEGIN {
130 no warnings 'once';
131 $_haveBX = defined($blosxom::version);
134 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
135 # which posts Markdown should process, using a "meta-markup: markdown"
136 # header. If it's set to 0 (the default), Markdown will process all
137 # entries.
138 my $g_blosxom_use_meta;
139 BEGIN {
140 $g_blosxom_use_meta = 0;
143 sub start { 1; }
144 sub story {
145 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
147 if ((! $g_blosxom_use_meta) or
148 (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
150 $$body_ref = Markdown($$body_ref);
156 #### Movable Type plug-in interface #####################################
157 my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT
158 my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0.
160 unless ($_haveMT) {
161 require MT;
162 import MT;
163 require MT::Template::Context;
164 import MT::Template::Context;
166 unless ($_haveMT3) {
167 require MT::Plugin;
168 import MT::Plugin;
169 my $plugin = new MT::Plugin({
170 name => "Markdown",
171 description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
172 doc_link => 'http://daringfireball.net/projects/markdown/'
174 MT->add_plugin( $plugin );
177 MT::Template::Context->add_container_tag(MarkdownOptions => sub {
178 my $ctx = shift;
179 my $args = shift;
180 my $builder = $ctx->stash('builder');
181 my $tokens = $ctx->stash('tokens');
183 if (defined ($args->{'output'}) ) {
184 $ctx->stash('markdown_output', lc $args->{'output'});
187 defined (my $str = $builder->build($ctx, $tokens) )
188 or return $ctx->error($builder->errstr);
189 $str; # return value
192 MT->add_text_filter('markdown' => {
193 label => 'Markdown',
194 docs => 'http://daringfireball.net/projects/markdown/',
195 on_format => sub {
196 my $text = shift;
197 my $ctx = shift;
198 my $raw = 0;
199 if (defined $ctx) {
200 my $output = $ctx->stash('markdown_output');
201 if (defined $output && $output =~ m/^html/i) {
202 $g_empty_element_suffix = ">";
203 $ctx->stash('markdown_output', '');
205 elsif (defined $output && $output eq 'raw') {
206 $raw = 1;
207 $ctx->stash('markdown_output', '');
209 else {
210 $raw = 0;
211 $g_empty_element_suffix = " />";
214 $text = $raw ? $text : Markdown($text);
215 $text;
219 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
220 my $smartypants;
223 no warnings "once";
224 $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
227 if ($smartypants) {
228 MT->add_text_filter('markdown_with_smartypants' => {
229 label => 'Markdown With SmartyPants',
230 docs => 'http://daringfireball.net/projects/markdown/',
231 on_format => sub {
232 my $text = shift;
233 my $ctx = shift;
234 if (defined $ctx) {
235 my $output = $ctx->stash('markdown_output');
236 if (defined $output && $output eq 'html') {
237 $g_empty_element_suffix = ">";
239 else {
240 $g_empty_element_suffix = " />";
243 $text = Markdown($text);
244 $text = $smartypants->($text, '1');
250 sub _strip {
251 my $str = shift;
252 defined($str) or return undef;
253 $str =~ s/^\s+//;
254 $str =~ s/\s+$//;
255 $str =~ s/\s+/ /g;
256 $str;
259 #### BBEdit/command-line text filter interface ##########################
260 sub _main {
261 local *ARGV = \@_;
264 #### Check for command-line switches: #################
265 my %options = ();
266 my %cli_opts;
267 use Getopt::Long;
268 Getopt::Long::Configure(qw(bundling require_order pass_through));
269 GetOptions(\%cli_opts,
270 'help','h',
271 'version|V',
272 'shortversion|short-version|s',
273 'html4tags',
274 'deprecated',
275 'htmlroot|r=s',
276 'imageroot|i=s',
277 'tabwidth|tab-width=s',
278 'stylesheet|style-sheet',
279 'no-stylesheet|no-style-sheet',
280 'stub',
282 if ($cli_opts{'help'}) {
283 pod2usage(-verbose => 2, -exitval => 0);
285 if ($cli_opts{'h'}) {
286 pod2usage(-verbose => 0, -exitval => 0);
288 if ($cli_opts{'version'}) { # Version info
289 print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT;
290 print "License is Modified BSD (aka 3-clause BSD) License\n";
291 print "<https://opensource.org/licenses/BSD-3-Clause>\n";
292 exit 0;
294 if ($cli_opts{'shortversion'}) { # Just the version number string.
295 print $VERSION;
296 exit 0;
298 my $stub = 0;
299 if ($cli_opts{'stub'}) {
300 $stub = 1;
302 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
303 $options{empty_element_suffix} = ">";
304 $stub = -$stub;
306 if ($cli_opts{'deprecated'}) { # Allow <dir> and <menu> tags to pass through
307 _SetAllowedTag("dir");
308 _SetAllowedTag("menu");
310 if ($cli_opts{'tabwidth'}) {
311 my $tw = $cli_opts{'tabwidth'};
312 die "invalid tab width (must be integer)\n" unless looks_like_number $tw;
313 die "invalid tab width (must be >= 2 and <= 32)\n" unless $tw >= 2 && $tw <= 32;
314 $options{tab_width} = int(0+$tw);
316 if ($cli_opts{'htmlroot'}) { # Use URL prefix
317 $options{url_prefix} = $cli_opts{'htmlroot'};
319 if ($cli_opts{'imageroot'}) { # Use image URL prefix
320 $options{img_prefix} = $cli_opts{'imageroot'};
322 if ($cli_opts{'stylesheet'}) { # Display the style sheet
323 $options{show_styles} = 1;
325 if ($cli_opts{'no-stylesheet'}) { # Do not display the style sheet
326 $options{show_styles} = 0;
328 $options{show_styles} = 1 if $stub && !defined($options{show_styles});
329 $options{tab_width} = 8 unless defined($options{tab_width});
331 my $hdr = sub {
332 if ($stub > 0) {
333 print <<'HTML5';
334 <!DOCTYPE html>
335 <html xmlns="http://www.w3.org/1999/xhtml">
336 <head>
337 <meta charset="utf-8" />
338 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
339 HTML5
340 } elsif ($stub < 0) {
341 print <<'HTML4';
342 <html>
343 <head>
344 <meta charset="utf-8">
345 <meta http-equiv="content-type" content="text/html; charset=utf-8">
346 HTML4
348 if ($stub && ($options{title} || $options{h1})) {
349 my $title = $options{title};
350 defined($title) && $title ne "" or $title = $options{h1};
351 if (defined($title) && $title ne "") {
352 $title =~ s/&/&amp;/g;
353 $title =~ s/</&lt;/g;
354 print "<title>$title</title>\n";
357 if ($options{show_styles}) {
358 my $stylesheet = $g_style_sheet;
359 $stylesheet =~ s/%\(base\)/$g_style_prefix/g;
360 print $stylesheet;
362 if ($stub) {
363 print "</head>\n<body style=\"text-align:center\">\n",
364 "<div style=\"display:inline-block;text-align:left;max-width:42pc\">\n";
368 #### Process incoming text: ###########################
369 my $didhdr;
370 for (;;) {
371 local $_;
373 local $/; # Slurp the whole file
374 $_ = <>;
376 defined($_) or last;
377 my $result = Markdown($_, \%options);
378 if ($result ne "") {
379 if (!$didhdr) {
380 &$hdr();
381 $didhdr = 1;
383 print $result;
386 &$hdr() unless $didhdr;
387 print "</div>\n</body>\n</html>\n" if $stub;
389 exit 0;
393 sub Markdown {
395 # Primary function. The order in which other subs are called here is
396 # essential. Link and image substitutions need to happen before
397 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
398 # and <img> tags get encoded.
400 my $_text = shift;
401 defined $_text or $_text='';
403 my $text;
404 if (Encode::is_utf8($_text) || utf8::decode($_text)) {
405 $text = $_text;
406 } else {
407 $text = $encoder->decode($_text, Encode::FB_DEFAULT);
409 $_text = undef;
411 # Any remaining arguments after the first are options; either a single
412 # hashref or a list of name, value paurs.
413 %opt = (
414 # set initial defaults
415 style_prefix => $g_style_prefix,
416 empty_element_suffix => $g_empty_element_suffix,
417 tab_width => $g_tab_width,
418 indent_width => $g_indent_width,
419 url_prefix => "", # Prefixed to non-absolute URLs
420 img_prefix => "", # Prefixed to non-absolute image URLs
422 my %args = ();
423 if (ref($_[0]) eq "HASH") {
424 %args = %{$_[0]};
425 } else {
426 %args = @_;
428 while (my ($k,$v) = each %args) {
429 $opt{$k} = $v;
432 # Clear the globals. If we don't clear these, you get conflicts
433 # from other articles when generating a page which contains more than
434 # one article (e.g. an index page that shows the N most recent
435 # articles):
436 %g_urls = ();
437 %g_titles = ();
438 %g_anchors = ();
439 %g_block_ids = ();
440 %g_html_blocks = ();
441 %g_code_blocks = ();
442 $g_list_level = 0;
444 # Standardize line endings:
445 $text =~ s{\r\n}{\n}g; # DOS to Unix
446 $text =~ s{\r}{\n}g; # Mac to Unix
448 # Make sure $text ends with a couple of newlines:
449 $text .= "\n\n";
451 # Handle backticks-delimited code blocks
452 $text = _HashBTCodeBlocks($text);
454 # Convert all tabs to spaces.
455 $text = _Detab($text);
457 # Strip any lines consisting only of spaces.
458 # This makes subsequent regexen easier to write, because we can
459 # match consecutive blank lines with /\n+/ instead of something
460 # contorted like / *\n+/ .
461 $text =~ s/^ +$//mg;
463 # Turn block-level HTML blocks into hash entries
464 $text = _HashHTMLBlocks($text);
466 # Strip link definitions, store in hashes.
467 $text = _StripLinkDefinitions($text);
469 $text = _RunBlockGamut($text, 1);
471 # Unhashify code blocks
472 $text =~ s/(\005\d+\006)/$g_code_blocks{$1}/g;
474 $text = _UnescapeSpecialChars($text);
476 $text .= "\n" unless $text eq "";
478 utf8::encode($text);
479 if (defined($opt{h1}) && $opt{h1} ne "" && ref($_[0]) eq "HASH") {
480 utf8::encode($opt{h1});
481 ${$_[0]}{h1} = $opt{h1}
483 return $text;
487 sub _HashBTCodeBlocks {
489 # Process Markdown backticks (```) delimited code blocks
491 my $text = shift;
493 $text =~ s{
494 (?:(?<=\n)|\A)
495 ``(`+)[ \t]*(?:([\w.+-]+)[ \t]*)?\n
496 ( # $3 = the code block -- one or more lines, starting with ```
498 .*\n+
501 (?:(?:``\1[ \t]*(?:\n|\Z))|\Z) # and ending with ``` or end of document
503 # $2 contains syntax highlighting to use if defined
504 my $codeblock = $3;
505 $codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines
506 $codeblock = _Detab($codeblock, 8); # physical tab stops are always 8
507 $codeblock =~ s/\A\n+//; # trim leading newlines
508 $codeblock =~ s/\s+\z//; # trim trailing whitespace
509 $codeblock = _EncodeCode($codeblock); # or run highlighter here
510 $codeblock = "<div class=\"$opt{style_prefix}code-bt\"><pre style=\"display:none\"></pre><pre><code>"
511 . $codeblock . "\n</code></pre></div>";
513 my $key = block_id($codeblock);
514 $g_html_blocks{$key} = $codeblock;
515 "\n\n" . $key . "\n\n";
516 }egmx;
518 return $text;
522 sub _StripLinkDefinitions {
524 # Strips link definitions from text, stores the URLs and titles in
525 # hash references.
527 my $text = shift;
528 my $less_than_indent = $opt{indent_width} - 1;
530 # Link defs are in the form: ^[id]: url "optional title"
531 while ($text =~ s{
532 ^[ ]{0,$less_than_indent}\[(.+)\]: # id = $1
533 [ ]*
534 \n? # maybe *one* newline
535 [ ]*
536 <?(\S+?)>? # url = $2
537 [ ]*
538 \n? # maybe one newline
539 [ ]*
541 (?<=\s) # lookbehind for whitespace
542 ["(]
543 (.+?) # title = $3
544 [")]
545 [ ]*
546 )? # title is optional
547 (?:\n+|\Z)
549 {}mx) {
550 my $id = _strip(lc $1); # Link IDs are case-insensitive
551 my $url = $2;
552 my $title = _strip($3);
553 if ($id ne "") {
554 $g_urls{$id} = _EncodeAmpsAndAngles($url);
555 if (defined($title) && $title ne "") {
556 $g_titles{$id} = $title;
557 $g_titles{$id} =~ s/\042/&quot;/g;
562 return $text;
565 my ($block_tags_a, $block_tags_b);
566 BEGIN {
567 $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o;
568 $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o;
571 sub _HashHTMLBlocks {
572 my $text = shift;
573 my $less_than_indent = $opt{indent_width} - 1;
575 # Hashify HTML blocks:
576 # We only want to do this for block-level HTML tags, such as headers,
577 # lists, and tables. That's because we still want to wrap <p>s around
578 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
579 # phrase emphasis, and spans. The list of tags we're looking for is
580 # hard-coded:
582 # First, look for nested blocks, e.g.:
583 # <div>
584 # <div>
585 # tags for inner block must be indented.
586 # </div>
587 # </div>
589 # The outermost tags must start at the left margin for this to match, and
590 # the inner nested divs must be indented.
591 # We need to do this before the next, more liberal match, because the next
592 # match will start at the first `<div>` and stop at the first `</div>`.
593 $text =~ s{
594 ( # save in $1
595 ^ # start of line (with /m)
596 <($block_tags_a) # start tag = $2
597 \b # word break
598 (.*\n)*? # any number of lines, minimally matching
599 </\2> # the matching end tag
600 [ ]* # trailing spaces
601 (?=\n+|\Z) # followed by a newline or end of document
604 my $key = block_id($1);
605 $g_html_blocks{$key} = $1;
606 "\n\n" . $key . "\n\n";
607 }egmx;
611 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
613 $text =~ s{
614 ( # save in $1
615 ^ # start of line (with /m)
616 <($block_tags_b) # start tag = $2
617 \b # word break
618 (.*\n)*? # any number of lines, minimally matching
619 .*</\2> # the matching end tag
620 [ ]* # trailing spaces
621 (?=\n+|\Z) # followed by a newline or end of document
624 my $key = block_id($1);
625 $g_html_blocks{$key} = $1;
626 "\n\n" . $key . "\n\n";
627 }egmx;
628 # Special case just for <hr />. It was easier to make a special case than
629 # to make the other regex more complicated.
630 $text =~ s{
632 (?<=\n\n) # Starting after a blank line
633 | # or
634 \A\n? # the beginning of the doc
636 ( # save in $1
637 [ ]{0,$less_than_indent}
638 <(hr) # start tag = $2
639 \b # word break
640 ([^<>])*? #
641 /?> # the matching end tag
642 [ ]*
643 (?=\n{2,}|\Z) # followed by a blank line or end of document
646 my $key = block_id($1);
647 $g_html_blocks{$key} = $1;
648 "\n\n" . $key . "\n\n";
649 }egx;
651 # Special case for standalone HTML comments:
652 $text =~ s{
654 (?<=\n\n) # Starting after a blank line
655 | # or
656 \A\n? # the beginning of the doc
658 ( # save in $1
659 [ ]{0,$less_than_indent}
660 (?s:
662 (--.*?--\s*)+
665 [ ]*
666 (?=\n{2,}|\Z) # followed by a blank line or end of document
669 my $key = block_id($1);
670 $g_html_blocks{$key} = $1;
671 "\n\n" . $key . "\n\n";
672 }egx;
675 return $text;
679 sub _RunBlockGamut {
681 # These are all the transformations that form block-level
682 # tags like paragraphs, headers, and list items.
684 my ($text, $anchors) = @_;
686 $text = _DoHeaders($text, $anchors);
688 # Do Horizontal Rules:
689 $text =~ s{^ {0,3}\*(?: {0,2}\*){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
690 $text =~ s{^ {0,3}\_(?: {0,2}\_){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
691 $text =~ s{^ {0,3}\-(?: {0,2}\-){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm;
693 $text = _DoLists($text);
695 $text = _DoCodeBlocks($text);
697 $text = _DoBlockQuotes($text);
699 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
700 # was to escape raw HTML in the original Markdown source. This time,
701 # we're escaping the markup we've just created, so that we don't wrap
702 # <p> tags around block-level tags.
703 $text = _HashHTMLBlocks($text);
705 $text = _FormParagraphs($text);
707 return $text;
711 sub _RunSpanGamut {
713 # These are all the transformations that occur *within* block-level
714 # tags like paragraphs, headers, and list items.
716 my $text = shift;
718 $text = _DoCodeSpans($text);
720 $text = _EscapeSpecialChars($text);
722 # Process anchor and image tags. Images must come first,
723 # because ![foo][f] looks like an anchor.
724 $text = _DoImages($text);
725 $text = _DoAnchors($text);
727 # Make links out of things like `<http://example.com/>`
728 # Must come after _DoAnchors(), because you can use < and >
729 # delimiters in inline links like [this](<url>).
730 $text = _DoAutoLinks($text);
732 $text = _EncodeAmpsAndAngles($text);
734 $text = _DoItalicsAndBoldAndStrike($text);
736 # Do hard breaks:
737 $text =~ s/ {2,}\n/<br$opt{empty_element_suffix}\n/g;
739 return $text;
743 sub _EscapeSpecialChars {
744 my $text = shift;
745 my $tokens ||= _TokenizeHTML($text);
747 $text = ''; # rebuild $text from the tokens
748 # my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
749 # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
751 foreach my $cur_token (@$tokens) {
752 if ($cur_token->[0] eq "tag") {
753 # Within tags, encode *, _ and ~ so they don't conflict
754 # with their use in Markdown for italics and strong.
755 # We're replacing each such character with its
756 # corresponding block id value; this is likely
757 # overkill, but it should prevent us from colliding
758 # with the escape values by accident.
759 $cur_token->[1] =~ s!([*_~])!$g_escape_table{$1}!g;
760 $text .= $cur_token->[1];
761 } else {
762 my $t = $cur_token->[1];
763 $t = _EncodeBackslashEscapes($t);
764 $text .= $t;
767 return $text;
771 sub _ProcessWikiLink {
772 my ($link_text, $link_loc) = @_;
773 if (defined($link_loc) && $link_loc =~ m{^(?:http|ftp)s?://\S+$}i) {
774 # Just rewrite it to [...](...) form
775 return "[".$link_text."](".$link_loc.")";
777 if (defined($link_loc)) {
778 # We don't handle any other kind of "bar" links yet
779 return undef;
781 if ($link_text =~ m{^(?:http|ftp)s?://\S+$}i) {
782 # Just rewrite it to [...](...) form
783 return "[".$link_text."](".$link_text.")";
785 # We don't handle any other wiki-style links yet
786 return undef;
790 sub _DoAnchors {
792 # Turn Markdown link shortcuts into XHTML <a> tags.
794 my $text = shift;
797 # First, handle wiki-style links: [[wiki style link]]
799 $text =~ s{
800 ( # wrap whole match in $1
801 \[\[
802 ($g_nested_brackets) # link text and id = $2
803 \]\]
806 my $result;
807 my $whole_match = $1;
808 my $link_text = $2;
809 my $link_loc = undef;
811 if ($link_text =~ /^(.*)\|(.*)$/s) {
812 $link_text = $1;
813 $link_loc = $2;
816 $result = _ProcessWikiLink($link_text, $link_loc);
817 defined($result) or $result = $whole_match;
818 $result;
819 }xsge;
822 # Next, handle reference-style links: [link text] [id]
824 $text =~ s{
825 ( # wrap whole match in $1
827 ($g_nested_brackets) # link text = $2
830 [ ]? # one optional space
831 (?:\n[ ]*)? # one optional newline followed by spaces
834 (.*?) # id = $3
838 my $result;
839 my $whole_match = $1;
840 my $link_text = $2;
841 my $link_id = _strip(lc $3);
843 if ($link_id eq "") {
844 $link_id = _strip(lc $link_text); # for shortcut links like [this][].
847 if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
848 my $url = $g_urls{$link_id};
849 $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
850 # We've got to encode these to avoid conflicting
851 # with italics, bold and strike through.
852 $url =~ s!([*_~])!$g_escape_table{$1}!g;
853 $result = "<a href=\"$url\"";
854 if ( defined $g_titles{$link_id} ) {
855 my $title = $g_titles{$link_id};
856 $title =~ s!([*_~])!$g_escape_table{$1}!g;
857 $result .= " title=\"$title\"";
859 $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
860 $result .= ">$link_text</a>";
862 else {
863 $result = $whole_match;
865 $result;
866 }xsge;
869 # Subsequently, inline-style links: [link text](url "optional title")
871 $text =~ s{
872 ( # wrap whole match in $1
874 ($g_nested_brackets) # link text = $2
876 \( # literal paren
877 [ ]*
878 <?(.*?)>? # href = $3
879 [ ]*
880 ( # $4
881 (['\042]) # quote char = $5
882 (.*?) # Title = $6
883 \5 # matching quote
884 )? # title is optional
888 my $result;
889 my $whole_match = $1;
890 my $link_text = $2;
891 my $url = $3;
892 my $title = _strip($6);
894 $url = _PrefixURL($url);
895 # We've got to encode these to avoid conflicting
896 # with italics, bold and strike through.
897 $url =~ s!([*_~])!$g_escape_table{$1}!g;
898 $result = "<a href=\"$url\"";
900 if (defined $title) {
901 $title =~ s/\042/&quot;/g;
902 $title =~ s!([*_~])!$g_escape_table{$1}!g;
903 $result .= " title=\"$title\"";
906 $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
907 $result .= ">$link_text</a>";
909 $result;
910 }xsge;
913 # Finally, handle reference-style implicit shortcut links: [link text]
915 $text =~ s{
916 ( # wrap whole match in $1
918 ($g_nested_brackets) # link text = $2
922 my $result;
923 my $whole_match = $1;
924 my $link_text = $2;
925 my $link_id = _strip(lc $2);
927 if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
928 my $url = $g_urls{$link_id};
929 $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
930 # We've got to encode these to avoid conflicting
931 # with italics, bold and strike through.
932 $url =~ s!([*_~])!$g_escape_table{$1}!g;
933 $result = "<a href=\"$url\"";
934 if ( defined $g_titles{$link_id} ) {
935 my $title = $g_titles{$link_id};
936 $title =~ s!([*_~])!$g_escape_table{$1}!g;
937 $result .= " title=\"$title\"";
939 $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
940 $result .= ">$link_text</a>";
942 else {
943 $result = $whole_match;
945 $result;
946 }xsge;
948 return $text;
952 sub _DoImages {
954 # Turn Markdown image shortcuts into <img> tags.
956 my $text = shift;
959 # First, handle reference-style labeled images: ![alt text][id]
961 $text =~ s{
962 ( # wrap whole match in $1
964 (.*?) # alt text = $2
967 [ ]? # one optional space
968 (?:\n[ ]*)? # one optional newline followed by spaces
971 (.*?) # id = $3
976 my $result;
977 my $whole_match = $1;
978 my $alt_text = _strip($2);
979 my $link_id = _strip(lc $3);
981 if ($link_id eq "") {
982 $link_id = lc $alt_text; # for shortcut links like ![this][].
985 $alt_text =~ s/"/&quot;/g;
986 if (defined $g_urls{$link_id}) {
987 my $url = _PrefixURL($g_urls{$link_id});
988 # We've got to encode these to avoid conflicting
989 # with italics, bold and strike through.
990 $url =~ s!([*_~])!$g_escape_table{$1}!g;
991 $result = "<img src=\"$url\" alt=\"$alt_text\"";
992 if (defined $g_titles{$link_id}) {
993 my $title = $g_titles{$link_id};
994 $title =~ s!([*_~])!$g_escape_table{$1}!g;
995 $result .= " title=\"$title\"";
997 $result .= $opt{empty_element_suffix};
999 else {
1000 # If there's no such link ID, leave intact:
1001 $result = $whole_match;
1004 $result;
1005 }xsge;
1008 # Next, handle inline images: ![alt text](url "optional title")
1009 # Don't forget: encode * and _
1011 $text =~ s{
1012 ( # wrap whole match in $1
1014 (.*?) # alt text = $2
1016 \( # literal paren
1017 [ ]*
1018 <?(\S+?)>? # src url = $3
1019 [ ]*
1020 ( # $4
1021 (['\042]) # quote char = $5
1022 (.*?) # title = $6
1023 \5 # matching quote
1024 [ ]*
1025 )? # title is optional
1029 my $result;
1030 my $whole_match = $1;
1031 my $alt_text = _strip($2);
1032 my $url = $3;
1033 my $title = '';
1034 if (defined($6)) {
1035 $title = _strip($6);
1038 $url = _PrefixURL($url);
1039 $alt_text =~ s/"/&quot;/g;
1040 $title =~ s/"/&quot;/g;
1041 # We've got to encode these to avoid conflicting
1042 # with italics, bold and strike through.
1043 $url =~ s!([*_~])!$g_escape_table{$1}!g;
1044 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1045 if (defined $title) {
1046 $title =~ s!([*_~])!$g_escape_table{$1}!g;
1047 $result .= " title=\"$title\"";
1049 $result .= $opt{empty_element_suffix};
1051 $result;
1052 }xsge;
1055 # Finally, handle reference-style implicitly labeled links: ![alt text]
1057 $text =~ s{
1058 ( # wrap whole match in $1
1060 (.*?) # alt text = $2
1064 my $result;
1065 my $whole_match = $1;
1066 my $alt_text = _strip($2);
1067 my $link_id = lc $alt_text;
1069 $alt_text =~ s/"/&quot;/g;
1070 if (defined $g_urls{$link_id}) {
1071 my $url = _PrefixURL($g_urls{$link_id});
1072 # We've got to encode these to avoid conflicting
1073 # with italics, bold and strike through.
1074 $url =~ s!([*_~])!$g_escape_table{$1}!g;
1075 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1076 if (defined $g_titles{$link_id}) {
1077 my $title = $g_titles{$link_id};
1078 $title =~ s!([*_~])!$g_escape_table{$1}!g;
1079 $result .= " title=\"$title\"";
1081 $result .= $opt{empty_element_suffix};
1083 else {
1084 # If there's no such link ID, leave intact:
1085 $result = $whole_match;
1088 $result;
1089 }xsge;
1091 return $text;
1095 sub _MakeAnchorId {
1096 use bytes;
1097 my $link = shift;
1098 $link =~ tr/-a-z0-9_/_/cs;
1099 return '' unless $link ne '';
1100 $link = md5_hex($link) if length($link) > 64;
1101 "_".$link."_";
1105 sub _GetNewAnchorId {
1106 my $link = _strip(lc(shift));
1107 return '' if defined($g_anchors{$link});
1108 my $id = _MakeAnchorId($link);
1109 return '' unless $id;
1110 $g_anchors{$link} = '#'.$id;
1111 $id;
1115 sub _DoHeaders {
1116 my ($text, $anchors) = @_;
1117 my $h1;
1118 my $geth1 = $anchors && !defined($opt{h1}) ? sub {
1119 return unless !defined($h1);
1120 my $h = shift;
1121 $h =~ s/^\s+//;
1122 $h =~ s/\s+$//;
1123 $h =~ s/\s+/ /g;
1124 $h1 = $h if $h ne "";
1125 } : sub {};
1127 # Setext-style headers:
1128 # Header 1
1129 # ========
1131 # Header 2
1132 # --------
1134 # Header 3
1135 # ~~~~~~~~
1137 $text =~ s{ ^(?:=+[ ]*\n)?[ ]*(.+?)[ ]*\n=+[ ]*\n+ }{
1138 my $h = $1;
1139 my $id = _GetNewAnchorId($h);
1140 &$geth1($h);
1141 $id = " id=\"$id\"" if $id ne "";
1142 "<h1$id>" . _RunSpanGamut($h) . "</h1>\n\n";
1143 }egmx;
1145 $text =~ s{ ^(?:-+[ ]*\n)?[ ]*(.+?)[ ]*\n-+[ ]*\n+ }{
1146 my $h = $1;
1147 my $id = _GetNewAnchorId($h);
1148 $id = " id=\"$id\"" if $id ne "";
1149 "<h2$id>" . _RunSpanGamut($h) . "</h2>\n\n";
1150 }egmx;
1152 $text =~ s{ ^(?:~+[ ]*\n)?[ ]*(.+?)[ ]*\n~+[ ]*\n+ }{
1153 my $h = $1;
1154 my $id = _GetNewAnchorId($h);
1155 $id = " id=\"$id\"" if $id ne "";
1156 "<h3$id>" . _RunSpanGamut($h) . "</h3>\n\n";
1157 }egmx;
1160 # atx-style headers:
1161 # # Header 1
1162 # ## Header 2
1163 # ## Header 2 with closing hashes ##
1164 # ...
1165 # ###### Header 6
1167 $text =~ s{
1168 ^(\#{1,6}) # $1 = string of #'s
1169 [ ]*
1170 (.+?) # $2 = Header text
1171 [ ]*
1172 \#* # optional closing #'s (not counted)
1175 my $h = $2;
1176 my $h_level = length($1);
1177 my $id = $h_level <= 3 ? _GetNewAnchorId($h) : '';
1178 &$geth1($h) if $h_level == 1;
1179 $id = " id=\"$id\"" if $id ne "";
1180 "<h$h_level$id>" . _RunSpanGamut($h) . "</h$h_level>\n\n";
1181 }egmx;
1183 $opt{h1} = $h1 if defined($h1) && $h1 ne "";
1184 return $text;
1188 my ($marker_ul, $marker_ol, $marker_any, $roman_numeral, $greek_lower);
1189 BEGIN {
1190 # Re-usable patterns to match list item bullets and number markers:
1191 $roman_numeral = qr/(?:
1192 [IiVvXx]|[Ii]{2,3}|[Ii][VvXx]|[VvXx][Ii]{1,3}|[Xx][Vv][Ii]{0,3}|
1193 [Xx][Ii][VvXx]|[Xx]{2}[Ii]{0,3}|[Xx]{2}[Ii]?[Vv]|[Xx]{2}[Vv][Ii]{1,2})/ox;
1194 $greek_lower = qr/(?:[\x{03b1}-\x{03c9}])/o;
1195 $marker_ul = qr/[*+-]/o;
1196 $marker_ol = qr/(?:\d+|[A-Za-z]|$roman_numeral|$greek_lower)[.\)]/o;
1197 $marker_any = qr/(?:$marker_ul|$marker_ol)/o;
1201 sub _GetListMarkerType {
1202 my ($list_type, $list_marker, $last_marker) = @_;
1203 return "" unless $list_type && $list_marker && lc($list_type) eq "ol";
1204 my $last_marker_type = '';
1205 $last_marker_type = _GetListMarkerType($list_type, $last_marker)
1206 if defined($last_marker) &&
1207 # these are roman unless $last_marker type case matches and is 'a' or 'A'
1208 $list_marker =~ /^[IiVvXx][.\)]?$/;
1209 return "I" if $list_marker =~ /^[IVX]/ && $last_marker_type ne 'A';
1210 return "i" if $list_marker =~ /^[ivx]/ && $last_marker_type ne 'a';
1211 return "A" if $list_marker =~ /^[A-Z]/;
1212 return "a" if $list_marker =~ /^[a-z]/ || $list_marker =~ /^$greek_lower/o;
1213 return "1";
1217 sub _GetListItemTypeClass {
1218 my ($list_type, $list_marker, $last_marker) = @_;
1219 my $list_marker_type = _GetListMarkerType($list_type, $list_marker, $last_marker);
1220 my $ans = &{sub{
1221 return "" unless length($list_marker) >= 2 && $list_marker_type =~ /^[IiAa1]$/;
1222 return "lower-greek" if $list_marker_type eq "a" && $list_marker =~ /^$greek_lower/o;
1223 return "" unless $list_marker =~ /\)$/;
1224 return "upper-roman" if $list_marker_type eq "I";
1225 return "lower-roman" if $list_marker_type eq "i";
1226 return "upper-alpha" if $list_marker_type eq "A";
1227 return "lower-alpha" if $list_marker_type eq "a";
1228 return "decimal";
1230 return ($list_marker_type, $ans);
1234 my %_roman_number_table;
1235 BEGIN {
1236 %_roman_number_table = (
1237 i => 1,
1238 ii => 2,
1239 iii => 3,
1240 iv => 4,
1241 v => 5,
1242 vi => 6,
1243 vii => 7,
1244 viii => 8,
1245 ix => 9,
1246 x => 10,
1247 xi => 11,
1248 xii => 12,
1249 xiii => 13,
1250 xiv => 14,
1251 xv => 15,
1252 xvi => 16,
1253 xvii => 17,
1254 xviii => 18,
1255 xix => 19,
1256 xx => 20,
1257 xxi => 21,
1258 xxii => 22,
1259 xxiii => 23,
1260 xxiv => 24,
1261 xxv => 25,
1262 xxvi => 26,
1263 xxvii => 27
1268 # Necessary because ς and σ are the same value grrr
1269 my %_greek_number_table;
1270 BEGIN {
1271 %_greek_number_table = (
1272 "\x{03b1}" => 1, # α
1273 "\x{03b2}" => 2, # β
1274 "\x{03b3}" => 3, # γ
1275 "\x{03b4}" => 4, # δ
1276 "\x{03b5}" => 5, # ε
1277 "\x{03b6}" => 6, # ζ
1278 "\x{03b7}" => 7, # η
1279 "\x{03b8}" => 8, # θ
1280 "\x{03b9}" => 9, # ι
1281 "\x{03ba}" => 10, # κ
1282 "\x{03bb}" => 11, # λ
1283 #"\x{00b5}"=> 12, # µ is "micro" not "mu"
1284 "\x{03bc}" => 12, # μ
1285 "\x{03bd}" => 13, # ν
1286 "\x{03be}" => 14, # ξ
1287 "\x{03bf}" => 15, # ο
1288 "\x{03c0}" => 16, # π
1289 "\x{03c1}" => 17, # ρ
1290 "\x{03c2}" => 18, # ς
1291 "\x{03c3}" => 18, # σ
1292 "\x{03c4}" => 19, # τ
1293 "\x{03c5}" => 20, # υ
1294 "\x{03c6}" => 21, # φ
1295 "\x{03c7}" => 22, # χ
1296 "\x{03c8}" => 23, # ψ
1297 "\x{03c9}" => 24 # ω
1302 sub _GetMarkerIntegerNum {
1303 my ($list_marker_type, $marker_val) = @_;
1304 my $ans = &{sub{
1305 return 0 + $marker_val if $list_marker_type eq "1";
1306 $list_marker_type = lc($list_marker_type);
1307 return $_greek_number_table{$marker_val}
1308 if $list_marker_type eq "a" &&
1309 defined($_greek_number_table{$marker_val});
1310 $marker_val = lc($marker_val);
1311 return ord($marker_val) - ord("a") + 1 if $list_marker_type eq "a";
1312 return 1 unless $list_marker_type eq "i";
1313 defined($_roman_number_table{$marker_val}) and
1314 return $_roman_number_table{$marker_val};
1315 return 1;
1317 return $ans if $ans == 0 && $list_marker_type eq "1";
1318 return $ans >= 1 ? $ans : 1;
1322 sub _IncrList {
1323 my ($from, $to, $extra) = @_;
1324 $extra = defined($extra) ? " $extra" : "";
1325 my $result = "";
1326 while ($from + 10 <= $to) {
1327 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-10\"></span>\n";
1328 $from += 10;
1330 while ($from + 5 <= $to) {
1331 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-5\"></span>\n";
1332 $from += 5;
1334 while ($from + 2 <= $to) {
1335 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-2\"></span>\n";
1336 $from += 2;
1338 while ($from < $to) {
1339 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr\"></span>\n";
1340 ++$from;
1342 return $result;
1346 sub _DoLists {
1348 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1350 my $text = shift;
1351 my $indent = $opt{indent_width};
1352 my $less_than_indent = $indent - 1;
1353 my $less_than_double_indent = 2 * $indent - 1;
1355 # Re-usable pattern to match any entire ul or ol list:
1356 my $whole_list = qr{
1357 ( # $1 (or $_[0]) = whole list
1358 ( # $2 (or $_[1])
1359 (?:(?<=\n)|\A)
1360 [ ]{0,$less_than_indent}
1361 (${marker_any}) # $3 (or $_[2]) = first list item marker
1362 [ ]+
1364 (?s:.+?)
1365 ( # $4 (or $_[3])
1368 \n{2,}
1369 (?=\S)
1370 (?! # Negative lookahead for another list item marker
1371 ${marker_any}[ ]
1375 }mx;
1377 my $list_item_sub = sub {
1378 my $list = $_[0];
1379 my $list_type = ($_[2] =~ m/$marker_ul/) ? "ul" : "ol";
1380 my $list_att = "";
1381 my $list_class = "";
1382 my $list_incr = "";
1383 # Turn double returns into triple returns, so that we can make a
1384 # paragraph for the last item in a list, if necessary:
1385 $list =~ s/\n\n/\n\n\n/g;
1386 my ($result, $first_marker, $fancy) = _ProcessListItems($list_type, $list);
1387 my $list_marker_type = _GetListMarkerType($list_type, $first_marker);
1388 if ($list_marker_type) {
1389 $first_marker =~ s/[.\)]$//;
1390 my $first_marker_num = _GetMarkerIntegerNum($list_marker_type, $first_marker);
1391 $list_att = $list_marker_type eq "1" ? "" : " type=\"$list_marker_type\"";
1392 if ($fancy) {
1393 $list_class = " class=\"$opt{style_prefix}ol\"";
1394 my $start = $first_marker_num;
1395 $start = 10 if $start > 10;
1396 $start = 5 if $start > 5 && $start < 10;
1397 $start = 1 if $start > 1 && $start < 5;
1398 $list_att .= " start=\"$start\"" unless $start == 1;
1399 $list_incr = _IncrList($start, $first_marker_num);
1400 } else {
1401 $list_class = " class=\"$opt{style_prefix}lc-greek\""
1402 if $list_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1403 $list_att .= " start=\"$first_marker_num\"" unless $first_marker_num == 1;
1406 $result = "<$list_type$list_att$list_class>\n$list_incr" . $result . "</$list_type>\n";
1407 $result;
1410 # We use a different prefix before nested lists than top-level lists.
1411 # See extended comment in _ProcessListItems().
1413 # Note: (jg) There's a bit of duplication here. My original implementation
1414 # created a scalar regex pattern as the conditional result of the test on
1415 # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1416 # substitution once, using the scalar as the pattern. This worked,
1417 # everywhere except when running under MT on my hosting account at Pair
1418 # Networks. There, this caused all rebuilds to be killed by the reaper (or
1419 # perhaps they crashed, but that seems incredibly unlikely given that the
1420 # same script on the same server ran fine *except* under MT. I've spent
1421 # more time trying to figure out why this is happening than I'd like to
1422 # admit. My only guess, backed up by the fact that this workaround works,
1423 # is that Perl optimizes the substition when it can figure out that the
1424 # pattern will never change, and when this optimization isn't on, we run
1425 # afoul of the reaper. Thus, the slightly redundant code to that uses two
1426 # static s/// patterns rather than one conditional pattern.
1428 # Note: (kjm) With the addition of the two-of-the-same-kind-in-a-row-
1429 # starts-a-list-at-the-top-level rule the two patterns really are somewhat
1430 # different now, but the duplication has pretty much been eliminated via
1431 # use of a separate sub which has the side-effect of making the below
1432 # two cases much easier to grok all at once.
1434 if ($g_list_level) {
1435 $text =~ s{
1437 $whole_list
1439 &$list_item_sub($1, $2, $3, $4);
1440 }egmx;
1442 else {
1443 $text =~ s{
1444 (?: (?<=\n\n) |
1445 \A\n? |
1446 (?:(?<=\n) # two of the same kind of marker lines
1447 (?=[ ]{0,$less_than_indent}$marker_ul[ ].*\n
1448 [ ]{0,$less_than_indent}$marker_ul[ ])) |
1449 (?:(?<=\n) # in a row will start a list
1450 (?=[ ]{0,$less_than_indent}$marker_ol[ ].*\n
1451 [ ]{0,$less_than_indent}$marker_ol[ ])) |
1452 (?:(?<=\n) # or any marker and a sublist marker
1453 (?=[ ]{0,$less_than_indent}$marker_any[ ].*\n
1454 [ ]{$indent,$less_than_double_indent}$marker_any[ ]))
1456 $whole_list
1458 &$list_item_sub($1, $2, $3, $4);
1459 }egmx;
1462 return $text;
1466 sub _ProcessListItems {
1468 # Process the contents of a single ordered or unordered list, splitting it
1469 # into individual list items.
1472 my $list_type = shift;
1473 my $list_str = shift;
1475 # The $g_list_level global keeps track of when we're inside a list.
1476 # Each time we enter a list, we increment it; when we leave a list,
1477 # we decrement. If it's zero, we're not in a list anymore.
1479 # We do this because when we're not inside a list, we want to treat
1480 # something like this:
1482 # I recommend upgrading to version
1483 # 8. Oops, now this line is treated
1484 # as a sub-list.
1486 # As a single paragraph, despite the fact that the second line starts
1487 # with a digit-period-space sequence.
1489 # Whereas when we're inside a list (or sub-list), that line will be
1490 # treated as the start of a sub-list. What a kludge, huh? This is
1491 # an aspect of Markdown's syntax that's hard to parse perfectly
1492 # without resorting to mind-reading. Perhaps the solution is to
1493 # change the syntax rules such that sub-lists must start with a
1494 # starting cardinal number; e.g. "1." or "a.".
1496 $g_list_level++;
1497 my $marker_kind = $list_type eq "ul" ? $marker_ul : $marker_ol;
1498 my $first_marker;
1499 my $first_marker_type;
1500 my $first_marker_num;
1501 my $last_marker;
1502 my $fancy;
1503 my $skipped;
1504 my $typechanged;
1505 my $next_num = 1;
1507 # trim trailing blank lines:
1508 $list_str =~ s/\n{2,}\z/\n/;
1510 my $result = "";
1511 my $oldpos = 0;
1512 pos($list_str) = 0;
1513 while ($list_str =~ m{\G # start where we left off
1514 (\n+)? # leading line = $1
1515 (^[ ]*) # leading whitespace = $2
1516 ($marker_any) [ ] ([ ]*) # list marker = $3 leading item space = $4
1517 }cgmx) {
1518 my $leading_line = $1;
1519 my $leading_space = $2;
1520 my $list_marker = $3;
1521 my $list_marker_len = length($list_marker);
1522 my $leading_item_space = $4;
1523 if ($-[0] > $oldpos) {
1524 $result .= substr($list_str, $oldpos, $-[0] - $oldpos); # Sort-of $`
1525 $oldpos = $-[0]; # point at start of this entire match
1527 if (!defined($first_marker)) {
1528 $first_marker = $list_marker;
1529 $first_marker_type = _GetListMarkerType($list_type, $first_marker);
1530 if ($first_marker_type) {
1531 (my $marker_val = $first_marker) =~ s/[.\)]$//;
1532 $first_marker_num = _GetMarkerIntegerNum($first_marker_type, $marker_val);
1533 $next_num = $first_marker_num;
1534 $skipped = 1 if $next_num != 1;
1536 } elsif ($list_marker !~ /$marker_kind/) {
1537 # Wrong marker kind, "fix up" the marker to a correct "lazy" marker
1538 # But keep the old length in $list_marker_len
1539 $list_marker = $last_marker;
1542 # Now grab the rest of this item's data upto but excluding the next
1543 # list marker at the SAME indent level, but sublists must be INCLUDED
1545 my $item = "";
1546 while ($list_str =~ m{\G
1547 ((?:.+?)(?:\n{1,2})) # list item text = $1
1548 (?= \n* (?: \z | # end of string OR
1549 (^[ ]*) # leading whitespace = $2
1550 ($marker_any) # next list marker = $3
1551 ([ ]+) )) # one or more spaces after marker = $4
1552 }cgmxs) {
1554 # If $3 has a left edge that is at the left edge of the previous
1555 # marker OR $3 has a right edge that is at the right edge of the
1556 # previous marker then we stop; otherwise we go on
1558 $item .= substr($list_str, $-[0], $+[0] - $-[0]); # $&
1559 last if !defined($4) || length($2) == length($leading_space) ||
1560 length($2) + length($3) == length($leading_space) + $list_marker_len;
1561 # move along, you're not the marker droid we're looking for...
1562 $item .= substr($list_str, $+[0], $+[4] - $+[0]);
1563 pos($list_str) = $+[4]; # ...move along over the marker droid
1565 # Remember where we parked
1566 $oldpos = pos($list_str);
1568 # Process the $list_marker $item
1570 my $liatt = '';
1571 my $checkbox = '';
1572 my $incr = '';
1574 if ($list_type eq "ul" && !$leading_item_space && $item =~ /^\[([ xX])\] +(.*)$/s) {
1575 my $checkmark = lc $1;
1576 $item = $2;
1577 my ($checkbox_class, $checkbox_val);
1578 if ($checkmark eq "x") {
1579 ($checkbox_class, $checkbox_val) = ("checkbox-on", "x");
1580 } else {
1581 ($checkbox_class, $checkbox_val) = ("checkbox-off", "&#160;");
1583 $liatt = " class=\"$opt{style_prefix}$checkbox_class\"";
1584 $checkbox = "<span><span></span></span><span></span><span>[<tt>$checkbox_val</tt>]&#160;</span>";
1585 } else {
1586 my $list_marker_type;
1587 ($list_marker_type, $liatt) = _GetListItemTypeClass($list_type, $list_marker, $last_marker);
1588 if ($list_type eq "ol" && defined($first_marker)) {
1589 my $styled = $fancy = 1 if $liatt && $list_marker =~ /\)$/;
1590 my ($sfx, $dash) = ("", "");
1591 ($sfx, $dash) = ("li", "-") if $styled;
1592 if ($liatt =~ /lower/) {
1593 $sfx .= "${dash}lc";
1594 } elsif ($liatt =~ /upper/) {
1595 $sfx .= "${dash}uc";
1597 $sfx .= "-greek" if $liatt =~ /greek/;
1598 $liatt = " class=\"$opt{style_prefix}$sfx\"" if $sfx;
1599 $typechanged = 1 if $list_marker_type ne $first_marker_type;
1600 (my $marker_val = $list_marker) =~ s/[.\)]$//;
1601 my $marker_num = _GetMarkerIntegerNum($list_marker_type, $marker_val);
1602 $marker_num = $next_num if $marker_num < $next_num;
1603 $skipped = 1 if $next_num < $marker_num;
1604 $incr = _IncrList($next_num, $marker_num, "incrlevel=$g_list_level");
1605 $liatt = " value=\"$marker_num\"$liatt" if $fancy || $skipped;
1606 $liatt = " type=\"$list_marker_type\"$liatt" if $styled || $typechanged;
1607 $next_num = $marker_num + 1;
1610 $last_marker = $list_marker;
1612 if ($leading_line or ($item =~ m/\n{2,}/)) {
1613 $item = _RunBlockGamut(_Outdent($item));
1615 else {
1616 # Recursion for sub-lists:
1617 $item = _DoLists(_Outdent($item));
1618 chomp $item;
1619 $item = _RunSpanGamut($item);
1622 # Append to $result
1623 $result .= "$incr<li$liatt>" . $checkbox . $item . "</li>\n";
1625 if ($fancy) {
1626 # remove "incrlevel=$g_list_level " parts
1627 $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr((?:-\d{1,2})?)">}
1628 {<span class="$opt{style_prefix}ol-incr$1">}g;
1629 } else {
1630 # remove the $g_list_level incr spans entirely
1631 $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr(?:-\d{1,2})?"></span>\n}{}g;
1632 # remove the class="$opt{style_prefix}lc-greek" if first_marker is greek
1633 $result =~ s{(<li[^>]*?) class="$opt{style_prefix}lc-greek">}{$1>}g
1634 if defined($first_marker_type) && $first_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1637 # Anything left over (similar to $') goes into result, but this should always be empty
1638 $result .= _RunBlockGamut(substr($list_str, pos($list_str)));
1640 $g_list_level--;
1641 return ($result, $first_marker, $fancy);
1645 sub _DoCodeBlocks {
1647 # Process Markdown `<pre><code>` blocks.
1650 my $text = shift;
1652 $text =~ s{
1653 (?:\n\n|\A\n?)
1654 ( # $1 = the code block -- one or more lines, starting with indent_width spaces
1656 (?:[ ]{$opt{indent_width}}) # Lines must start with indent_width of spaces
1657 .*\n+
1660 ((?=^[ ]{0,$opt{indent_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1662 my $codeblock = $1;
1664 $codeblock =~ s/\n\n\n/\n\n/g; # undo "paragraph for last list item" change
1665 $codeblock = _EncodeCode(_Outdent($codeblock));
1666 $codeblock =~ s/\A\n+//; # trim leading newlines
1667 $codeblock =~ s/\s+\z//; # trim trailing whitespace
1669 my $result = "<div class=\"$opt{style_prefix}code\"><pre style=\"display:none\"></pre><pre><code>"
1670 . $codeblock . "\n</code></pre></div>";
1671 my $key = block_id($result);
1672 $g_code_blocks{$key} = $result;
1673 "\n\n" . $key . "\n\n";
1674 }egmx;
1676 return $text;
1680 sub _DoCodeSpans {
1682 # * Backtick quotes are used for <code></code> spans.
1684 # * You can use multiple backticks as the delimiters if you want to
1685 # include literal backticks in the code span. So, this input:
1687 # Just type ``foo `bar` baz`` at the prompt.
1689 # Will translate to:
1691 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1693 # There's no arbitrary limit to the number of backticks you
1694 # can use as delimters. If you need three consecutive backticks
1695 # in your code, use four for delimiters, etc.
1697 # * You can use spaces to get literal backticks at the edges:
1699 # ... type `` `bar` `` ...
1701 # Turns to:
1703 # ... type <code>`bar`</code> ...
1706 my $text = shift;
1708 $text =~ s@
1709 (`+) # $1 = Opening run of `
1710 (.+?) # $2 = The code block
1711 (?<!`)
1712 \1 # Matching closer
1713 (?!`)
1715 my $c = "$2";
1716 $c =~ s/^[ ]+//g; # leading whitespace
1717 $c =~ s/[ ]+$//g; # trailing whitespace
1718 $c = _EncodeCode($c);
1719 "<code>$c</code>";
1720 @egsx;
1722 return $text;
1726 sub _EncodeCode {
1728 # Encode/escape certain characters inside Markdown code runs.
1729 # The point is that in code, these characters are literals,
1730 # and lose their special Markdown meanings.
1732 local $_ = shift;
1734 # Encode all ampersands; HTML entities are not
1735 # entities within a Markdown code span.
1736 s/&/&amp;/g;
1738 # Encode $'s, but only if we're running under Blosxom.
1739 # (Blosxom interpolates Perl variables in article bodies.)
1740 s/\$/&#036;/g if $_haveBX;
1742 # Do the angle bracket song and dance:
1743 s! < !&lt;!gx;
1744 s! > !&gt;!gx;
1746 # Now, escape characters that are magic in Markdown:
1747 s!([*_~{}\[\]\\])!$g_escape_table{$1}!g;
1749 return $_;
1753 sub _DoItalicsAndBoldAndStrike {
1754 my $text = shift;
1756 # <strong> must go first:
1757 $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
1758 {<strong>$1</strong>}gsx;
1759 $text =~ s{ (?<!\w) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\w) }
1760 {<strong>$1</strong>}gsx;
1762 $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
1763 {<strike>$1</strike>}gsx;
1765 $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
1766 {<em>$1</em>}gsx;
1767 $text =~ s{ (?<!\w) _ (?=\S) (.+?) (?<=\S) _ (?!\w) }
1768 {<em>$1</em>}gsx;
1770 return $text;
1774 sub _DoBlockQuotes {
1775 my $text = shift;
1777 $text =~ s{
1778 ( # Wrap whole match in $1
1780 ^[ ]*>[ ]? # '>' at the start of a line
1781 .+\n # rest of the first line
1782 (.+\n)* # subsequent consecutive lines
1783 \n* # blanks
1787 my $bq = $1;
1788 $bq =~ s/^[ ]*>[ ]?//gm; # trim one level of quoting
1789 $bq =~ s/^[ ]+$//mg; # trim whitespace-only lines
1790 $bq = _RunBlockGamut($bq); # recurse
1792 $bq =~ s/^/ /mg;
1793 "<blockquote>\n$bq\n</blockquote>\n\n";
1794 }egmx;
1797 return $text;
1801 sub _FormParagraphs {
1803 # Params:
1804 # $text - string to process with html <p> tags
1806 my $text = shift;
1808 # Strip leading and trailing lines:
1809 $text =~ s/\A\n+//;
1810 $text =~ s/\n+\z//;
1812 my @grafs = split(/\n{2,}/, $text);
1815 # Wrap <p> tags.
1817 foreach (@grafs) {
1818 unless (defined($g_html_blocks{$_}) || defined($g_code_blocks{$_})) {
1819 $_ = _RunSpanGamut($_);
1820 s/^([ ]*)/<p>/;
1821 $_ .= "</p>";
1826 # Unhashify HTML blocks
1828 foreach (@grafs) {
1829 if (defined( $g_html_blocks{$_} )) {
1830 $_ = $g_html_blocks{$_};
1834 return join "\n\n", @grafs;
1838 my $g_possible_tag_name;
1839 my %ok_tag_name;
1840 BEGIN {
1841 # note: length("blockquote") == 10
1842 $g_possible_tag_name = qr/(?i:[a-z]{1,10}|h[1-6])/o;
1843 %ok_tag_name = map({$_ => 1} qw(
1844 a abbr acronym address
1845 b basefont bdo big blockquote br
1846 caption center cite code col colgroup
1847 dd del dfn div dl dt
1849 font
1850 h1 h2 h3 h4 h5 h6 hr
1851 i img ins
1855 p pre
1857 s samp small span strike strong sub sup
1858 table tbody td tfoot th thead tr tt
1859 u ul
1862 $ok_tag_name{$_} = 0 foreach (qw(
1863 dir menu
1868 sub _SetAllowedTag {
1869 my ($tag, $forbid) = @_;
1870 $ok_tag_name{$tag} = $forbid ? 0 : 1
1871 if defined($tag) && exists($ok_tag_name{$tag});
1875 # Encode leading '<' of any non-tags
1876 # However, "<?", "<!" and "<$" are passed through (legacy on that "<$" thing)
1877 sub _DoTag {
1878 my $tag = shift;
1879 return $tag if $tag =~ /^<[?\$!]/;
1880 if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
1881 $ok_tag_name{lc($1)}) {
1883 return $tag;
1885 $tag =~ s/</&lt;/g;
1886 return $tag;
1890 sub _EncodeAmpsAndAngles {
1891 # Smart processing for ampersands and angle brackets that need to be encoded.
1893 my $text = shift;
1895 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1896 # http://bumppo.net/projects/amputator/
1897 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
1899 # Encode naked <'s
1900 $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
1901 $text =~ s{<(?=[^>]*$)}{&lt;}g;
1903 # Encode <'s that cannot possibly be a start or end tag
1904 $text =~ s{(<[^>]*>)}{_DoTag($1)}ige;
1906 return $text;
1910 sub _EncodeBackslashEscapes {
1912 # Parameter: String.
1913 # Returns: String after processing the following backslash escape sequences.
1915 local $_ = shift;
1917 s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
1918 s{\\([`*_~{}\[\]()>#+\-.!`])}{$g_escape_table{$1}}g;
1920 return $_;
1924 sub _DoAutoLinks {
1925 local $_ = shift;
1927 s{<((https?|ftps?):[^'\042>\s]+)>}{<a href="$1">&lt;$1&gt;</a>}gi;
1929 # Email addresses: <address@domain.foo>
1932 (?:mailto:)?
1934 [-.\w]+
1936 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1940 _EncodeEmailAddress(_UnescapeSpecialChars($1), "&#x3c;", "&#62;");
1941 }egix;
1943 # (kjm) I don't do "x" patterns
1944 s{(?<![\042'<>])(?<!&[Ll][Tt];)(?<!&#60;)(?<!&#x3[Cc];)\b((?:https?|ftps?)://(?:[-a-zA-Z0-9./?\&\%=_~!*;:\@+\$,\x23](?:(?<![.,:;])|(?=[^\s])))+)}
1945 {<a href="$1">$1</a>}sog;
1946 s{(?<![][])(?<!\] )\[RFC( ?)([0-9]{1,5})\](?![][])(?! \[)}
1947 {[<a href="http://tools.ietf.org/html/rfc$2">RFC$1$2</a>]}sog;
1949 return $_;
1953 sub _EncodeEmailAddress {
1955 # Input: an email address, e.g. "foo@example.com"
1957 # Output: the email address as a mailto link, with each character
1958 # of the address encoded as either a decimal or hex entity, in
1959 # the hopes of foiling most address harvesting spam bots. E.g.:
1961 # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1962 # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1963 # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1965 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1966 # mailing list: <http://tinyurl.com/yu7ue>
1969 my ($addr, $prefix, $suffix) = @_;
1970 $prefix = "" unless defined($prefix);
1971 $suffix = "" unless defined($suffix);
1973 srand(unpack('N',md5($addr)));
1974 my @encode = (
1975 sub { '&#' . ord(shift) . ';' },
1976 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1977 sub { shift },
1980 $addr = "mailto:" . $addr;
1982 $addr =~ s{(.)}{
1983 my $char = $1;
1984 if ( $char eq '@' ) {
1985 # this *must* be encoded. I insist.
1986 $char = $encode[int rand 1]->($char);
1987 } elsif ( $char ne ':' ) {
1988 # leave ':' alone (to spot mailto: later)
1989 my $r = rand;
1990 # roughly 10% raw, 45% hex, 45% dec
1991 $char = (
1992 $r > .9 ? $encode[2]->($char) :
1993 $r < .45 ? $encode[1]->($char) :
1994 $encode[0]->($char)
1997 $char;
1998 }gex;
2000 # strip the mailto: from the visible part
2001 (my $bareaddr = $addr) =~ s/^.+?://;
2002 $addr = qq{<a href="$addr">$prefix$bareaddr$suffix</a>};
2004 return $addr;
2008 sub _UnescapeSpecialChars {
2010 # Swap back in all the special characters we've hidden.
2012 my $text = shift;
2014 while( my($char, $hash) = each(%g_escape_table) ) {
2015 $text =~ s/$hash/$char/g;
2017 return $text;
2021 sub _TokenizeHTML {
2023 # Parameter: String containing HTML markup.
2024 # Returns: Reference to an array of the tokens comprising the input
2025 # string. Each token is either a tag (possibly with nested,
2026 # tags contained therein, such as <a href="<MTFoo>">, or a
2027 # run of text between tags. Each element of the array is a
2028 # two-element array; the first is either 'tag' or 'text';
2029 # the second is the actual value.
2032 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
2033 # <http://www.bradchoate.com/past/mtregex.php>
2036 my $str = shift;
2037 my $pos = 0;
2038 my $len = length $str;
2039 my @tokens;
2041 my $depth = 6;
2042 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
2043 my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
2044 (?s: <\? .*? \?> ) | # processing instruction
2045 $nested_tags/iox; # nested tags
2047 while ($str =~ m/($match)/g) {
2048 my $whole_tag = $1;
2049 my $sec_start = pos $str;
2050 my $tag_start = $sec_start - length $whole_tag;
2051 if ($pos < $tag_start) {
2052 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
2054 push @tokens, ['tag', $whole_tag];
2055 $pos = pos $str;
2057 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
2058 \@tokens;
2062 sub _Outdent {
2064 # Remove one level of line-leading indent_width of spaces
2066 my $text = shift;
2068 $text =~ s/^ {1,$opt{indent_width}}//gm;
2069 return $text;
2073 sub _Detab {
2075 # Expand tabs to spaces using $opt{tab_width} if no second argument
2077 my $text = shift;
2078 my $ts = shift || $opt{tab_width};
2079 # From the Perl camel book "Fluent Perl" section (slightly modified)
2080 $text =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ge;
2081 return $text;
2085 sub _PrefixURL {
2087 # Add URL prefix if needed
2089 my $url = shift;
2091 return $url unless $opt{url_prefix} ne '' || $opt{img_prefix} ne '';
2092 return $url if $url =~ m,^//, || $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/;
2093 my $ans = $opt{url_prefix};
2094 $ans = $opt{img_prefix}
2095 if $opt{img_prefix} ne '' && $url =~ /\.(?:png|gif|jpe?g|svg?z)$/i;
2096 return $url unless $ans ne '';
2097 $ans .= '/' if substr($ans, -1, 1) ne '/';
2098 $ans .= substr($url, 0, 1) eq '/' ? substr($url, 1) : $url;
2099 return $ans;
2103 BEGIN {
2104 $g_style_sheet = <<'STYLESHEET';
2106 <style type="text/css">
2107 /* <![CDATA[ */
2109 /* Markdown.pl fancy style sheet
2110 ** Copyright (C) 2017 Kyle J. McKay.
2111 ** All rights reserved.
2113 ** Redistribution and use in source and binary forms, with or without
2114 ** modification, are permitted provided that the following conditions are met:
2116 ** 1. Redistributions of source code must retain the above copyright notice,
2117 ** this list of conditions and the following disclaimer.
2119 ** 2. Redistributions in binary form must reproduce the above copyright
2120 ** notice, this list of conditions and the following disclaimer in the
2121 ** documentation and/or other materials provided with the distribution.
2123 ** 3. Neither the name of the copyright holder nor the names of its
2124 ** contributors may be used to endorse or promote products derived from
2125 ** this software without specific prior written permission.
2127 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2128 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2129 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2130 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2131 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2132 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2133 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2134 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2135 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2136 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2137 ** POSSIBILITY OF SUCH DAMAGE.
2140 div.%(base)code-bt > pre, div.%(base)code > pre {
2141 margin: 0;
2142 padding: 0;
2143 overflow: auto;
2146 div.%(base)code-bt > pre > code, div.%(base)code > pre > code {
2147 display: inline-block;
2148 margin: 0;
2149 padding: 0.5em 0;
2150 border-top: thin dotted;
2151 border-bottom: thin dotted;
2154 ol.%(base)ol {
2155 counter-reset: %(base)item;
2157 ol.%(base)ol[start="0"] {
2158 counter-reset: %(base)item -1;
2160 ol.%(base)ol[start="5"] {
2161 counter-reset: %(base)item 4;
2163 ol.%(base)ol[start="10"] {
2164 counter-reset: %(base)item 9;
2166 ol.%(base)ol > span.%(base)ol-incr {
2167 counter-increment: %(base)item;
2169 ol.%(base)ol > span.%(base)ol-incr-2 {
2170 counter-increment: %(base)item 2;
2172 ol.%(base)ol > span.%(base)ol-incr-5 {
2173 counter-increment: %(base)item 5;
2175 ol.%(base)ol > span.%(base)ol-incr-10 {
2176 counter-increment: %(base)item 10;
2178 ol.%(base)lc-greek, li.%(base)lc-greek {
2179 list-style-type: lower-greek;
2181 ol.%(base)ol > li {
2182 counter-increment: %(base)item;
2184 ol.%(base)ol > li.%(base)li,
2185 ol.%(base)ol > li.%(base)li-lc,
2186 ol.%(base)ol > li.%(base)li-lc-greek,
2187 ol.%(base)ol > li.%(base)li-uc {
2188 list-style-type: none;
2189 display: block;
2191 ol.%(base)ol > li.%(base)li:before,
2192 ol.%(base)ol > li.%(base)li-lc:before,
2193 ol.%(base)ol > li.%(base)li-lc-greek:before,
2194 ol.%(base)ol > li.%(base)li-uc:before {
2195 position: absolute;
2196 text-align: right;
2197 white-space: nowrap;
2198 margin-left: -9ex;
2199 width: 9ex;
2201 ol.%(base)ol > li.%(base)li[type="1"]:before {
2202 content: counter(%(base)item, decimal) ")\A0 \A0 ";
2204 ol.%(base)ol > li.%(base)li-lc[type="i"]:before,
2205 ol.%(base)ol > li.%(base)li-lc[type="I"]:before {
2206 content: counter(%(base)item, lower-roman) ")\A0 \A0 ";
2208 ol.%(base)ol > li.%(base)li-uc[type="I"]:before,
2209 ol.%(base)ol > li.%(base)li-uc[type="i"]:before {
2210 content: counter(%(base)item, upper-roman) ")\A0 \A0 ";
2212 ol.%(base)ol > li.%(base)li-lc[type="a"]:before,
2213 ol.%(base)ol > li.%(base)li-lc[type="A"]:before {
2214 content: counter(%(base)item, lower-alpha) ")\A0 \A0 ";
2216 ol.%(base)ol > li.%(base)li-lc-greek[type="a"]:before,
2217 ol.%(base)ol > li.%(base)li-lc-greek[type="A"]:before {
2218 content: counter(%(base)item, lower-greek) ")\A0 \A0 ";
2220 ol.%(base)ol > li.%(base)li-uc[type="A"]:before,
2221 ol.%(base)ol > li.%(base)li-uc[type="a"]:before {
2222 content: counter(%(base)item, upper-alpha) ")\A0 \A0 ";
2225 li.%(base)checkbox-on,
2226 li.%(base)checkbox-off {
2227 list-style-type: none;
2228 display: block;
2230 li.%(base)checkbox-on > span:first-child + span + span,
2231 li.%(base)checkbox-off > span:first-child + span + span {
2232 position: absolute;
2233 clip: rect(0,0,0,0);
2235 li.%(base)checkbox-on > span:first-child,
2236 li.%(base)checkbox-off > span:first-child,
2237 li.%(base)checkbox-on > span:first-child + span,
2238 li.%(base)checkbox-off > span:first-child + span {
2239 display: block;
2240 position: absolute;
2241 margin-left: -3ex;
2242 width: 1em;
2243 height: 1em;
2245 li.%(base)checkbox-on > span:first-child > span:first-child,
2246 li.%(base)checkbox-off > span:first-child > span:first-child {
2247 display: block;
2248 position: absolute;
2249 left: 0.75pt; top: 0.75pt; right: 0.75pt; bottom: 0.75pt;
2251 li.%(base)checkbox-on > span:first-child > span:first-child:before,
2252 li.%(base)checkbox-off > span:first-child > span:first-child:before {
2253 display: inline-block;
2254 position: relative;
2255 right: 1pt;
2256 width: 100%;
2257 height: 100%;
2258 border: 1pt solid;
2259 content: "";
2261 li.%(base)checkbox-on > span:first-child + span:before {
2262 position: relative;
2263 left: 2pt;
2264 bottom: 1pt;
2265 font-size: 125%;
2266 line-height: 80%;
2267 content: "\2713";
2270 /* ]]> */
2271 </style>
2273 STYLESHEET
2274 $g_style_sheet =~ s/^\s+//g;
2275 $g_style_sheet =~ s/\s+$//g;
2276 $g_style_sheet .= "\n";
2281 __DATA__
2283 =head1 NAME
2285 Markdown.pl - convert Markdown format text files to HTML
2287 =head1 SYNOPSIS
2289 B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
2290 [B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>]
2291 [B<--tabwidth>=I<num>] [B<--stylesheet>] [B<--stub>] [--]
2292 [I<file>...]
2294 Options:
2295 -h show short usage help
2296 --help show long detailed help
2297 --html4tags use <br> instead of <br />
2298 --deprecated allow <dir> and <menu> tags
2299 --tabwidth=num expand tabs to num instead of 8
2300 -r prefix | --htmlroot=prefix append relative non-img URLs
2301 to prefix
2302 -i prefix | --imageroot=prefix append relative img URLs to
2303 prefix
2304 -V | --version show version, authors, license
2305 and copyright
2306 -s | --shortversion show just the version number
2307 --stylesheet output the fancy style sheet
2308 --no-stylesheet do not output fancy style sheet
2309 --stub wrap output in stub document
2310 implies --stylesheet
2311 -- end options and treat next
2312 argument as file
2314 =head1 DESCRIPTION
2316 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2317 easy-to-write structured text format into HTML. Markdown's text format
2318 is most similar to that of plain text email, and supports features such
2319 as headers, *emphasis*, code blocks, blockquotes, and links.
2321 Markdown's syntax is designed not as a generic markup language, but
2322 specifically to serve as a front-end to (X)HTML. You can use span-level
2323 HTML tags anywhere in a Markdown document, and you can use block level
2324 HTML tags (like <div> and <table> as well).
2326 For more information about Markdown's syntax, see the F<basics.md>
2327 and F<syntax.md> files included with F<Markdown.pl>.
2329 Input (auto-detected) may be either ISO-8859-1 or UTF-8. Output is always
2330 converted to the UTF-8 character set.
2333 =head1 OPTIONS
2335 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2337 Markdown.pl -- -z
2339 =over
2342 =item B<--html4tags>
2344 Use HTML 4 style for empty element tags, e.g.:
2346 <br>
2348 instead of Markdown's default XHTML style tags, e.g.:
2350 <br />
2353 =item B<--deprecated>
2355 Both "<dir>" and "<menu>" are normally taken as literal text and the leading
2356 "<" will be automatically escaped.
2358 If this option is used, they are recognized as valid tags and passed through
2359 without being escaped.
2361 When dealing with program argument descriptions "<dir>" can be particularly
2362 problematic therefore use of this option is not recommended.
2364 Other deprecated tags (such as "<font>" and "<center>" for example) continue
2365 to be recognized and passed through even without using this option.
2368 =item B<--tabwidth>=I<num>
2370 Expand tabs to I<num> character wide tab stop positions instead of the default
2371 8. Don't use this; physical tabs should always be expanded to 8-character
2372 positions. This option does I<not> affect the number of spaces needed to
2373 start a new "indent level". That will always be 4 no matter what value is
2374 used (or implied by default) with this option. Also note that tabs inside
2375 backticks-delimited code blocks will always be expanded to 8-character tab
2376 stop positions no matter what value is used for this option.
2378 The value must be S<2 <= I<num> <= 32>.
2381 =item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
2383 Any non-absolute URLs have I<prefix> prepended.
2386 =item B<-i> I<prefix>, B<--imageroot>=I<prefix>
2388 Any non-absolute URLs have I<prefix> prepended (overriding the B<-r> prefix
2389 if any) but only if they end in an image suffix.
2392 =item B<-V>, B<--version>
2394 Display Markdown's version number and copyright information.
2397 =item B<-s>, B<--shortversion>
2399 Display the short-form version number.
2402 =item B<--stylesheet>
2404 Include the fancy style sheet at the beginning of the output (or in the
2405 C<head> section with B<--stub>). This style sheet makes fancy checkboxes
2406 and makes a right parenthesis C<)> show instead of a C<.> for ordered lists
2407 that use them. Without it things will still look fine except that the
2408 fancy stuff won't be there.
2410 Use this option with no other arguments and redirect standard input to
2411 /dev/null to get just the style sheet and nothing else.
2414 =item B<--no-stylesheet>
2416 Overrides a previous B<--stylesheet> and disables implicit inclusion
2417 of the style sheet by the B<--stub> option.
2420 =item B<--stub>
2422 Wrap the output in a full document stub (i.e. has C<html>, C<head> and C<body>
2423 tags). The style sheet I<will> be included in the C<head> section unless the
2424 B<--no-stylesheet> option is also used.
2427 =item B<-h>, B<--help>
2429 Display Markdown's help. With B<--help> full help is shown, with B<-h> only
2430 the usage and options are shown.
2433 =back
2436 =head1 VERSION HISTORY
2438 Z<> See the F<README> file for detailed release notes for this version.
2440 =over
2442 =item Z<> 1.1.4 - 24 Jun 2017
2444 =item Z<> 1.1.3 - 13 Feb 2017
2446 =item Z<> 1.1.2 - 19 Jan 2017
2448 =item Z<> 1.1.1 - 12 Jan 2017
2450 =item Z<> 1.1.0 - 11 Jan 2017
2452 =item Z<> 1.0.4 - 05 Jun 2016
2454 =item Z<> 1.0.3 - 06 Sep 2015
2456 =item Z<> 1.0.2 - 03 Sep 2015
2458 =item Z<> 1.0.1 - 14 Dec 2004
2460 =item Z<> 1.0.0 - 28 Aug 2004
2462 =back
2464 =head1 AUTHORS
2466 =over
2468 =item John Gruber
2470 =item L<http://daringfireball.net>
2472 =item L<http://daringfireball.net/projects/markdown/>
2474 =item E<160>
2476 =back
2478 =over
2480 =item PHP port and other contributions by Michel Fortin
2482 =item L<http://michelf.com>
2484 =item E<160>
2486 =back
2488 =over
2490 =item Additional enhancements and tweaks by Kyle J. McKay
2492 =item mackyle<at>gmail.com
2494 =back
2496 =head1 COPYRIGHT AND LICENSE
2498 =over
2500 =item Copyright (C) 2003-2004 John Gruber
2502 =item Copyright (C) 2015-2017 Kyle J. McKay
2504 =item All rights reserved.
2506 =back
2508 Redistribution and use in source and binary forms, with or without
2509 modification, are permitted provided that the following conditions are
2510 met:
2512 =over
2514 =item *
2516 Redistributions of source code must retain the above copyright
2517 notice, this list of conditions and the following disclaimer.
2519 =item *
2521 Redistributions in binary form must reproduce the above copyright
2522 notice, this list of conditions and the following disclaimer in the
2523 documentation and/or other materials provided with the distribution.
2525 =item *
2527 Neither the name "Markdown" nor the names of its contributors may
2528 be used to endorse or promote products derived from this software
2529 without specific prior written permission.
2531 =back
2533 This software is provided by the copyright holders and contributors "as
2534 is" and any express or implied warranties, including, but not limited
2535 to, the implied warranties of merchantability and fitness for a
2536 particular purpose are disclaimed. In no event shall the copyright owner
2537 or contributors be liable for any direct, indirect, incidental, special,
2538 exemplary, or consequential damages (including, but not limited to,
2539 procurement of substitute goods or services; loss of use, data, or
2540 profits; or business interruption) however caused and on any theory of
2541 liability, whether in contract, strict liability, or tort (including
2542 negligence or otherwise) arising in any way out of the use of this
2543 software, even if advised of the possibility of such damage.
2545 =cut