4 # Markdown -- A text-to-HTML conversion tool for web writers
6 # Copyright (C) 2004 John Gruber
7 # Copyright (C) 2015,2016,2017 Kyle J. McKay
9 # License is Modified BSD (aka 3-clause BSD) License\n";
10 # See LICENSE file (or <https://opensource.org/licenses/BSD-3-Clause>)
21 use vars
qw($COPYRIGHT $VERSION @ISA @EXPORT_OK);
24 \"Copyright (C) 2004 John Gruber
25 Copyright (C) 2015,2016,2017 Kyle J. McKay
28 *VERSION = \"1.1.5-PRE"
32 use Digest::MD5 qw(md5 md5_hex);
33 use File
::Basename
qw(basename);
34 use Scalar
::Util
qw(refaddr looks_like_number);
37 @EXPORT_OK = qw(Markdown);
38 $INC{__PACKAGE__
.'.pm'} = $INC{basename
(__FILE__
)} unless exists $INC{__PACKAGE__
.'.pm'};
40 close(DATA
) if fileno(DATA
);
41 exit(&_main
(@ARGV)||0) unless caller;
45 $encoder = Encode
::find_encoding
('Windows-1252') ||
46 Encode
::find_encoding
('ISO-8859-1') or
47 die "failed to load ISO-8859-1 encoder\n";
51 # Global default settings:
53 my ($g_style_prefix, $g_empty_element_suffix, $g_indent_width, $g_tab_width);
55 $g_style_prefix = "_markdown-"; # Prefix for markdown css class styles
56 $g_empty_element_suffix = " />"; # Change to ">" for HTML output
57 $g_indent_width = 4; # Number of spaces considered new level
58 $g_tab_width = 4; # Legacy even though it's wrong
66 # Style sheet template
69 # Permanent block id table
72 # Global hashes, used by various utility routines
81 # Return a "block id" to use to identify the block that does not contain
82 # any characters that could be misinterpreted by the rest of the code
83 # Originally this used md5_hex but that's unnecessarily slow
84 # Instead just use the refaddr of the scalar ref of the entry for that
85 # key in either the global or, if the optional second argument is true,
86 # permanent table. To avoid the result being confused with anything
87 # else, it's prefixed with a control character and suffixed with another
88 # both of which are not allowed by the XML standard or Unicode.
91 "\2".refaddr
(\
$g_perm_block_ids{$_[0]})."\3" :
92 "\5".refaddr
(\
$g_block_ids{$_[0]})."\6";
95 # Regex to match balanced [brackets]. See Friedl's
96 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
97 my $g_nested_brackets;
99 $g_nested_brackets = qr{
100 (?
> # Atomic matching
101 [^\
[\
]]+ # Anything other than brackets
104 (??
{ $g_nested_brackets }) # Recursive set of nested brackets
111 # Table of hash values for escaped characters:
114 foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") {
115 $g_escape_table{$char} = block_id
($char,1);
119 # Used to track when we're inside an ordered or unordered list
120 # (see _ProcessListItems() for details):
127 #### Blosxom plug-in interface ##########################################
131 $_haveBX = defined($blosxom::version
);
134 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
135 # which posts Markdown should process, using a "meta-markup: markdown"
136 # header. If it's set to 0 (the default), Markdown will process all
138 my $g_blosxom_use_meta;
140 $g_blosxom_use_meta = 0;
145 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
147 if ((! $g_blosxom_use_meta) or
148 (defined($meta::markup
) and ($meta::markup
=~ /^\s*markdown\s*$/i))
150 $$body_ref = Markdown
($$body_ref);
156 #### Movable Type plug-in interface #####################################
157 my $_haveMT = eval {require MT
; 1;}; # Test to see if we're running in MT
158 my $_haveMT3 = $_haveMT && eval {require MT
::Plugin
; 1;}; # and MT >= MT 3.0.
163 require MT
::Template
::Context
;
164 import MT
::Template
::Context
;
169 my $plugin = new MT
::Plugin
({
171 description
=> "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
172 doc_link
=> 'http://daringfireball.net/projects/markdown/'
174 MT
->add_plugin( $plugin );
177 MT
::Template
::Context
->add_container_tag(MarkdownOptions
=> sub {
180 my $builder = $ctx->stash('builder');
181 my $tokens = $ctx->stash('tokens');
183 if (defined ($args->{'output'}) ) {
184 $ctx->stash('markdown_output', lc $args->{'output'});
187 defined (my $str = $builder->build($ctx, $tokens) )
188 or return $ctx->error($builder->errstr);
192 MT
->add_text_filter('markdown' => {
194 docs
=> 'http://daringfireball.net/projects/markdown/',
200 my $output = $ctx->stash('markdown_output');
201 if (defined $output && $output =~ m/^html/i) {
202 $g_empty_element_suffix = ">";
203 $ctx->stash('markdown_output', '');
205 elsif (defined $output && $output eq 'raw') {
207 $ctx->stash('markdown_output', '');
211 $g_empty_element_suffix = " />";
214 $text = $raw ?
$text : Markdown
($text);
219 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
224 $smartypants = $MT::Template
::Context
::Global_filters
{'smarty_pants'};
228 MT
->add_text_filter('markdown_with_smartypants' => {
229 label
=> 'Markdown With SmartyPants',
230 docs
=> 'http://daringfireball.net/projects/markdown/',
235 my $output = $ctx->stash('markdown_output');
236 if (defined $output && $output eq 'html') {
237 $g_empty_element_suffix = ">";
240 $g_empty_element_suffix = " />";
243 $text = Markdown
($text);
244 $text = $smartypants->($text, '1');
252 defined($str) or return undef;
259 #### BBEdit/command-line text filter interface ##########################
264 #### Check for command-line switches: #################
268 Getopt
::Long
::Configure
(qw(bundling require_order pass_through));
269 GetOptions
(\
%cli_opts,
272 'shortversion|short-version|s',
277 'tabwidth|tab-width=s',
278 'stylesheet|style-sheet',
279 'no-stylesheet|no-style-sheet',
282 if ($cli_opts{'help'}) {
283 pod2usage
(-verbose
=> 2, -exitval
=> 0);
285 if ($cli_opts{'h'}) {
286 pod2usage
(-verbose
=> 0, -exitval
=> 0);
288 if ($cli_opts{'version'}) { # Version info
289 print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT;
290 print "License is Modified BSD (aka 3-clause BSD) License\n";
291 print "<https://opensource.org/licenses/BSD-3-Clause>\n";
294 if ($cli_opts{'shortversion'}) { # Just the version number string.
299 if ($cli_opts{'stub'}) {
302 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
303 $options{empty_element_suffix
} = ">";
306 if ($cli_opts{'deprecated'}) { # Allow <dir> and <menu> tags to pass through
307 _SetAllowedTag
("dir");
308 _SetAllowedTag
("menu");
310 if ($cli_opts{'tabwidth'}) {
311 my $tw = $cli_opts{'tabwidth'};
312 die "invalid tab width (must be integer)\n" unless looks_like_number
$tw;
313 die "invalid tab width (must be >= 2 and <= 32)\n" unless $tw >= 2 && $tw <= 32;
314 $options{tab_width
} = int(0+$tw);
316 if ($cli_opts{'htmlroot'}) { # Use URL prefix
317 $options{url_prefix
} = $cli_opts{'htmlroot'};
319 if ($cli_opts{'imageroot'}) { # Use image URL prefix
320 $options{img_prefix
} = $cli_opts{'imageroot'};
322 if ($cli_opts{'stylesheet'}) { # Display the style sheet
323 $options{show_styles
} = 1;
325 if ($cli_opts{'no-stylesheet'}) { # Do not display the style sheet
326 $options{show_styles
} = 0;
328 $options{show_styles
} = 1 if $stub && !defined($options{show_styles
});
329 $options{tab_width
} = 8 unless defined($options{tab_width
});
335 <html xmlns="http://www.w3.org/1999/xhtml">
337 <meta charset="utf-8" />
338 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
340 } elsif ($stub < 0) {
344 <meta charset="utf-8">
345 <meta http-equiv="content-type" content="text/html; charset=utf-8">
348 if ($stub && ($options{title} || $options{h1})) {
349 my $title = $options{title};
350 defined($title) && $title ne "" or $title = $options{h1};
351 if (defined($title) && $title ne "") {
352 $title =~ s/&/&/g;
353 $title =~ s/</</g;
354 print "<title>$title</title>\n";
357 if ($options{show_styles}) {
358 my $stylesheet = $g_style_sheet;
359 $stylesheet =~ s/%\(base\)/$g_style_prefix/g;
363 print "</head>\n<body style=\"text-align:center\">\n",
364 "<div style=\"display:inline-block;text-align:left;max-width:42pc\">\n";
368 #### Process incoming text: ###########################
373 local $/; # Slurp the whole file
377 my $result = Markdown($_, \%options);
386 &$hdr() unless $didhdr;
387 print "</div>\n</body>\n</html>\n" if $stub;
395 # Primary function. The order in which other subs are called here is
396 # essential. Link and image substitutions need to happen before
397 # _EscapeSpecialChars(), so that any *'s
or _
's in the <a>
398 # and <img> tags get encoded.
401 defined $_text or $_text='';
404 if (Encode::is_utf8($_text) || utf8::decode($_text)) {
407 $text = $encoder->decode($_text, Encode::FB_DEFAULT);
411 # Any remaining arguments after the first are options; either a single
412 # hashref or a list of name, value paurs.
414 # set initial defaults
415 style_prefix => $g_style_prefix,
416 empty_element_suffix => $g_empty_element_suffix,
417 tab_width => $g_tab_width,
418 indent_width => $g_indent_width,
419 url_prefix => "", # Prefixed to non-absolute URLs
420 img_prefix => "", # Prefixed to non-absolute image URLs
423 if (ref($_[0]) eq "HASH") {
428 while (my ($k,$v) = each %args) {
432 # Clear the globals. If we don't clear these
, you get conflicts
433 # from other articles when generating a page which contains more than
434 # one article (e.g. an index page that shows the N most recent
444 # Standardize line endings:
445 $text =~ s{\r\n}{\n}g; # DOS to Unix
446 $text =~ s{\r}{\n}g; # Mac to Unix
448 # Make sure $text ends with a couple of newlines:
451 # Handle backticks-delimited code blocks
452 $text = _HashBTCodeBlocks
($text);
454 # Convert all tabs to spaces.
455 $text = _Detab
($text);
457 # Strip any lines consisting only of spaces.
458 # This makes subsequent regexen easier to write, because we can
459 # match consecutive blank lines with /\n+/ instead of something
460 # contorted like / *\n+/ .
463 # Turn block-level HTML blocks into hash entries
464 $text = _HashHTMLBlocks
($text);
466 # Strip link definitions, store in hashes.
467 $text = _StripLinkDefinitions
($text);
469 $text = _RunBlockGamut
($text, 1);
471 # Unhashify code blocks
472 $text =~ s/(\005\d+\006)/$g_code_blocks{$1}/g;
474 $text = _UnescapeSpecialChars
($text);
476 $text .= "\n" unless $text eq "";
479 if (defined($opt{h1
}) && $opt{h1
} ne "" && ref($_[0]) eq "HASH") {
480 utf8
::encode
($opt{h1
});
481 ${$_[0]}{h1
} = $opt{h1
}
487 sub _HashBTCodeBlocks
{
489 # Process Markdown backticks (```) delimited code blocks
495 ``(`+)[ \t]*(?:([\w.+-]+)[ \t]*)?\n
496 ( # $3 = the code block -- one or more lines, starting with ```
501 (?
:(?
:``\
1[ \t]*(?
:\n|\Z
))|\Z
) # and ending with ``` or end of document
503 # $2 contains syntax highlighting to use if defined
505 $codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines
506 $codeblock = _Detab
($codeblock, 8); # physical tab stops are always 8
507 $codeblock =~ s/\A\n+//; # trim leading newlines
508 $codeblock =~ s/\s+\z//; # trim trailing whitespace
509 $codeblock = _EncodeCode
($codeblock); # or run highlighter here
510 $codeblock = "<div class=\"$opt{style_prefix}code-bt\"><pre style=\"display:none\"></pre><pre><code>"
511 . $codeblock . "\n</code></pre></div>";
513 my $key = block_id
($codeblock);
514 $g_html_blocks{$key} = $codeblock;
515 "\n\n" . $key . "\n\n";
522 sub _StripLinkDefinitions
{
524 # Strips link definitions from text, stores the URLs and titles in
528 my $less_than_indent = $opt{indent_width
} - 1;
530 # Link defs are in the form: ^[id]: url "optional title"
532 ^[ ]{0,$less_than_indent}\
[(.+)\
]: # id = $1
534 \n?
# maybe *one* newline
536 <?
(\S
+?
)>?
# url = $2
538 \n?
# maybe one newline
541 (?
<=\s
) # lookbehind for whitespace
546 )?
# title is optional
550 my $id = _strip
(lc $1); # Link IDs are case-insensitive
552 my $title = _strip
($3);
554 $g_urls{$id} = _EncodeAmpsAndAngles
($url);
555 if (defined($title) && $title ne "") {
556 $g_titles{$id} = $title;
557 $g_titles{$id} =~ s/\042/"/g;
565 my ($block_tags_a, $block_tags_b);
567 $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o;
568 $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o;
571 sub _HashHTMLBlocks
{
573 my $less_than_indent = $opt{indent_width
} - 1;
575 # Hashify HTML blocks:
576 # We only want to do this for block-level HTML tags, such as headers,
577 # lists, and tables. That's because we still want to wrap <p>s around
578 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
579 # phrase emphasis, and spans. The list of tags we're looking for is
582 # First, look for nested blocks, e.g.:
585 # tags for inner block must be indented.
589 # The outermost tags must start at the left margin for this to match, and
590 # the inner nested divs must be indented.
591 # We need to do this before the next, more liberal match, because the next
592 # match will start at the first `<div>` and stop at the first `</div>`.
595 ^ # start of line (with /m)
596 <($block_tags_a) # start tag = $2
598 (.*\n)*?
# any number of lines, minimally matching
599 </\
2> # the matching end tag
600 [ ]* # trailing spaces
601 (?
=\n+|\Z
) # followed by a newline or end of document
604 my $key = block_id
($1);
605 $g_html_blocks{$key} = $1;
606 "\n\n" . $key . "\n\n";
611 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
615 ^ # start of line (with /m)
616 <($block_tags_b) # start tag = $2
618 (.*\n)*?
# any number of lines, minimally matching
619 .*</\
2> # the matching end tag
620 [ ]* # trailing spaces
621 (?
=\n+|\Z
) # followed by a newline or end of document
624 my $key = block_id
($1);
625 $g_html_blocks{$key} = $1;
626 "\n\n" . $key . "\n\n";
628 # Special case just for <hr />. It was easier to make a special case than
629 # to make the other regex more complicated.
632 (?
<=\n\n) # Starting after a blank line
634 \A
\n?
# the beginning of the doc
637 [ ]{0,$less_than_indent}
638 <(hr
) # start tag = $2
641 /?
> # the matching end tag
643 (?
=\n{2,}|\Z
) # followed by a blank line or end of document
646 my $key = block_id
($1);
647 $g_html_blocks{$key} = $1;
648 "\n\n" . $key . "\n\n";
651 # Special case for standalone HTML comments:
654 (?
<=\n\n) # Starting after a blank line
656 \A
\n?
# the beginning of the doc
659 [ ]{0,$less_than_indent}
666 (?
=\n{2,}|\Z
) # followed by a blank line or end of document
669 my $key = block_id
($1);
670 $g_html_blocks{$key} = $1;
671 "\n\n" . $key . "\n\n";
681 # These are all the transformations that form block-level
682 # tags like paragraphs, headers, and list items.
684 my ($text, $anchors) = @_;
686 $text = _DoHeaders
($text, $anchors);
688 # Do Horizontal Rules:
689 $text =~ s{^ {0,3}\*(?: {0,2}\*){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm
;
690 $text =~ s{^ {0,3}\_(?: {0,2}\_){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm
;
691 $text =~ s{^ {0,3}\-(?: {0,2}\-){2,}[ ]*$}{\n<hr$opt{empty_element_suffix}\n}gm
;
693 $text = _DoLists
($text);
695 $text = _DoCodeBlocks
($text);
697 $text = _DoBlockQuotes
($text);
699 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
700 # was to escape raw HTML in the original Markdown source. This time,
701 # we're escaping the markup we've just created, so that we don't wrap
702 # <p> tags around block-level tags.
703 $text = _HashHTMLBlocks
($text);
705 $text = _FormParagraphs
($text);
713 # These are all the transformations that occur *within* block-level
714 # tags like paragraphs, headers, and list items.
718 $text = _DoCodeSpans
($text);
720 $text = _EscapeSpecialChars
($text);
722 # Process anchor and image tags. Images must come first,
723 # because ![foo][f] looks like an anchor.
724 $text = _DoImages
($text);
725 $text = _DoAnchors
($text);
727 # Make links out of things like `<http://example.com/>`
728 # Must come after _DoAnchors(), because you can use < and >
729 # delimiters in inline links like [this](<url>).
730 $text = _DoAutoLinks
($text);
732 $text = _EncodeAmpsAndAngles
($text);
734 $text = _DoItalicsAndBoldAndStrike
($text);
737 $text =~ s/ {2,}\n/<br$opt{empty_element_suffix}\n/g;
743 sub _EscapeSpecialChars
{
745 my $tokens ||= _TokenizeHTML
($text);
747 $text = ''; # rebuild $text from the tokens
748 # my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
749 # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
751 foreach my $cur_token (@
$tokens) {
752 if ($cur_token->[0] eq "tag") {
753 # Within tags, encode *, _ and ~ so they don't conflict
754 # with their use in Markdown for italics and strong.
755 # We're replacing each such character with its
756 # corresponding block id value; this is likely
757 # overkill, but it should prevent us from colliding
758 # with the escape values by accident.
759 $cur_token->[1] =~ s!([*_~])!$g_escape_table{$1}!g;
760 $text .= $cur_token->[1];
762 my $t = $cur_token->[1];
763 $t = _EncodeBackslashEscapes
($t);
771 sub _ProcessWikiLink
{
772 my ($link_text, $link_loc) = @_;
773 if (defined($link_loc) && $link_loc =~ m{^(?:http|ftp)s?://\S+$}i) {
774 # Just rewrite it to [...](...) form
775 return "[".$link_text."](".$link_loc.")";
777 if (defined($link_loc)) {
778 # We don't handle any other kind of "bar" links yet
781 if ($link_text =~ m{^(?:http|ftp)s?://\S+$}i) {
782 # Just rewrite it to [...](...) form
783 return "[".$link_text."](".$link_text.")";
785 # We don't handle any other wiki-style links yet
792 # Turn Markdown link shortcuts into XHTML <a> tags.
797 # First, handle wiki-style links: [[wiki style link]]
800 ( # wrap whole match in $1
802 ($g_nested_brackets) # link text and id = $2
807 my $whole_match = $1;
809 my $link_loc = undef;
811 if ($link_text =~ /^(.*)\|(.*)$/s) {
816 $result = _ProcessWikiLink
($link_text, $link_loc);
817 defined($result) or $result = $whole_match;
822 # Next, handle reference-style links: [link text] [id]
825 ( # wrap whole match in $1
827 ($g_nested_brackets) # link text = $2
830 [ ]?
# one optional space
831 (?
:\n[ ]*)?
# one optional newline followed by spaces
839 my $whole_match = $1;
841 my $link_id = _strip
(lc $3);
843 if ($link_id eq "") {
844 $link_id = _strip
(lc $link_text); # for shortcut links like [this][].
847 if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
848 my $url = $g_urls{$link_id};
849 $url = defined($url) ? _PrefixURL
($url) : $g_anchors{$link_id};
850 # We've got to encode these to avoid conflicting
851 # with italics, bold and strike through.
852 $url =~ s!([*_~])!$g_escape_table{$1}!g;
853 $result = "<a href=\"$url\"";
854 if ( defined $g_titles{$link_id} ) {
855 my $title = $g_titles{$link_id};
856 $title =~ s!([*_~])!$g_escape_table{$1}!g;
857 $result .= " title=\"$title\"";
859 $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
860 $result .= ">$link_text</a>";
863 $result = $whole_match;
869 # Subsequently, inline-style links: [link text](url "optional title")
872 ( # wrap whole match in $1
874 ($g_nested_brackets) # link text = $2
878 <?
(.*?
)>?
# href = $3
881 (['\042]) # quote char = $5
884 )? # title is optional
889 my $whole_match = $1;
892 my $title = _strip($6);
894 $url = _PrefixURL($url);
895 # We've got to encode these to avoid conflicting
896 # with italics, bold and strike through.
897 $url =~ s!([*_~])!$g_escape_table{$1}!g;
898 $result = "<a href=\"$url\"";
900 if (defined $title) {
901 $title =~ s/\042/"/g;
902 $title =~ s!([*_~])!$g_escape_table{$1}!g;
903 $result .= " title=\"$title\"";
906 $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
907 $result .= ">$link_text</a>";
913 # Finally, handle reference-style implicit shortcut links: [link text]
916 ( # wrap whole match in $1
918 ($g_nested_brackets) # link text = $2
923 my $whole_match = $1;
925 my $link_id = _strip
(lc $2);
927 if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
928 my $url = $g_urls{$link_id};
929 $url = defined($url) ? _PrefixURL
($url) : $g_anchors{$link_id};
930 # We've got to encode these to avoid conflicting
931 # with italics, bold and strike through.
932 $url =~ s!([*_~])!$g_escape_table{$1}!g;
933 $result = "<a href=\"$url\"";
934 if ( defined $g_titles{$link_id} ) {
935 my $title = $g_titles{$link_id};
936 $title =~ s!([*_~])!$g_escape_table{$1}!g;
937 $result .= " title=\"$title\"";
939 $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
940 $result .= ">$link_text</a>";
943 $result = $whole_match;
954 # Turn Markdown image shortcuts into <img> tags.
959 # First, handle reference-style labeled images: ![alt text][id]
962 ( # wrap whole match in $1
964 (.*?
) # alt text = $2
967 [ ]?
# one optional space
968 (?
:\n[ ]*)?
# one optional newline followed by spaces
977 my $whole_match = $1;
978 my $alt_text = _strip
($2);
979 my $link_id = _strip
(lc $3);
981 if ($link_id eq "") {
982 $link_id = lc $alt_text; # for shortcut links like ![this][].
985 $alt_text =~ s/"/"/g;
986 if (defined $g_urls{$link_id}) {
987 my $url = _PrefixURL
($g_urls{$link_id});
988 # We've got to encode these to avoid conflicting
989 # with italics, bold and strike through.
990 $url =~ s!([*_~])!$g_escape_table{$1}!g;
991 $result = "<img src=\"$url\" alt=\"$alt_text\"";
992 if (defined $g_titles{$link_id}) {
993 my $title = $g_titles{$link_id};
994 $title =~ s!([*_~])!$g_escape_table{$1}!g;
995 $result .= " title=\"$title\"";
997 $result .= $opt{empty_element_suffix
};
1000 # If there's no such link ID, leave intact:
1001 $result = $whole_match;
1008 # Next, handle inline images: ![alt text](url "optional title")
1009 # Don't forget: encode * and _
1012 ( # wrap whole match in $1
1014 (.*?
) # alt text = $2
1018 <?
(\S
+?
)>?
# src url = $3
1021 (['\042]) # quote char = $5
1025 )? # title is optional
1030 my $whole_match = $1;
1031 my $alt_text = _strip($2);
1035 $title = _strip($6);
1038 $url = _PrefixURL($url);
1039 $alt_text =~ s/"/"/g;
1040 $title =~ s/"/"/g;
1041 # We've got to encode these to avoid conflicting
1042 # with italics, bold and strike through.
1043 $url =~ s!([*_~])!$g_escape_table{$1}!g;
1044 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1045 if (defined $title) {
1046 $title =~ s!([*_~])!$g_escape_table{$1}!g;
1047 $result .= " title=\"$title\"";
1049 $result .= $opt{empty_element_suffix
};
1055 # Finally, handle reference-style implicitly labeled links: ![alt text]
1058 ( # wrap whole match in $1
1060 (.*?
) # alt text = $2
1065 my $whole_match = $1;
1066 my $alt_text = _strip
($2);
1067 my $link_id = lc $alt_text;
1069 $alt_text =~ s/"/"/g;
1070 if (defined $g_urls{$link_id}) {
1071 my $url = _PrefixURL
($g_urls{$link_id});
1072 # We've got to encode these to avoid conflicting
1073 # with italics, bold and strike through.
1074 $url =~ s!([*_~])!$g_escape_table{$1}!g;
1075 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1076 if (defined $g_titles{$link_id}) {
1077 my $title = $g_titles{$link_id};
1078 $title =~ s!([*_~])!$g_escape_table{$1}!g;
1079 $result .= " title=\"$title\"";
1081 $result .= $opt{empty_element_suffix
};
1084 # If there's no such link ID, leave intact:
1085 $result = $whole_match;
1098 $link =~ tr/-a-z0-9_/_/cs;
1099 return '' unless $link ne '';
1100 $link = md5_hex
($link) if length($link) > 64;
1105 sub _GetNewAnchorId
{
1106 my $link = _strip
(lc(shift));
1107 return '' if defined($g_anchors{$link});
1108 my $id = _MakeAnchorId
($link);
1109 return '' unless $id;
1110 $g_anchors{$link} = '#'.$id;
1116 my ($text, $anchors) = @_;
1118 my $geth1 = $anchors && !defined($opt{h1
}) ?
sub {
1119 return unless !defined($h1);
1124 $h1 = $h if $h ne "";
1127 # Setext-style headers:
1137 $text =~ s
{ ^(?
:=+[ ]*\n)?
[ ]*(.+?
)[ ]*\n=+[ ]*\n+ }{
1139 my $id = _GetNewAnchorId
($h);
1141 $id = " id=\"$id\"" if $id ne "";
1142 "<h1$id>" . _RunSpanGamut
($h) . "</h1>\n\n";
1145 $text =~ s
{ ^(?
:-+[ ]*\n)?
[ ]*(.+?
)[ ]*\n-+[ ]*\n+ }{
1147 my $id = _GetNewAnchorId
($h);
1148 $id = " id=\"$id\"" if $id ne "";
1149 "<h2$id>" . _RunSpanGamut
($h) . "</h2>\n\n";
1152 $text =~ s
{ ^(?
:~+[ ]*\n)?
[ ]*(.+?
)[ ]*\n~+[ ]*\n+ }{
1154 my $id = _GetNewAnchorId
($h);
1155 $id = " id=\"$id\"" if $id ne "";
1156 "<h3$id>" . _RunSpanGamut
($h) . "</h3>\n\n";
1160 # atx-style headers:
1163 # ## Header 2 with closing hashes ##
1168 ^(\#
{1,6}) # $1 = string of #'s
1170 (.+?
) # $2 = Header text
1172 \#
* # optional closing #'s (not counted)
1176 my $h_level = length($1);
1177 my $id = $h_level <= 3 ? _GetNewAnchorId
($h) : '';
1178 &$geth1($h) if $h_level == 1;
1179 $id = " id=\"$id\"" if $id ne "";
1180 "<h$h_level$id>" . _RunSpanGamut
($h) . "</h$h_level>\n\n";
1183 $opt{h1
} = $h1 if defined($h1) && $h1 ne "";
1188 my ($marker_ul, $marker_ol, $marker_any, $roman_numeral, $greek_lower);
1190 # Re-usable patterns to match list item bullets and number markers:
1191 $roman_numeral = qr
/(?
:
1192 [IiVvXx
]|[Ii
]{2,3}|[Ii
][VvXx
]|[VvXx
][Ii
]{1,3}|[Xx
][Vv
][Ii
]{0,3}|
1193 [Xx
][Ii
][VvXx
]|[Xx
]{2}[Ii
]{0,3}|[Xx
]{2}[Ii
]?
[Vv
]|[Xx
]{2}[Vv
][Ii
]{1,2})/ox
;
1194 $greek_lower = qr/(?:[\x{03b1}-\x{03c9}])/o;
1195 $marker_ul = qr/[*+-]/o;
1196 $marker_ol = qr/(?:\d+|[A-Za-z]|$roman_numeral|$greek_lower)[.\)]/o;
1197 $marker_any = qr/(?:$marker_ul|$marker_ol)/o;
1201 sub _GetListMarkerType
{
1202 my ($list_type, $list_marker, $last_marker) = @_;
1203 return "" unless $list_type && $list_marker && lc($list_type) eq "ol";
1204 my $last_marker_type = '';
1205 $last_marker_type = _GetListMarkerType
($list_type, $last_marker)
1206 if defined($last_marker) &&
1207 # these are roman unless $last_marker type case matches and is 'a' or 'A'
1208 $list_marker =~ /^[IiVvXx][.\)]?$/;
1209 return "I" if $list_marker =~ /^[IVX]/ && $last_marker_type ne 'A';
1210 return "i" if $list_marker =~ /^[ivx]/ && $last_marker_type ne 'a';
1211 return "A" if $list_marker =~ /^[A-Z]/;
1212 return "a" if $list_marker =~ /^[a-z]/ || $list_marker =~ /^$greek_lower/o;
1217 sub _GetListItemTypeClass
{
1218 my ($list_type, $list_marker, $last_marker) = @_;
1219 my $list_marker_type = _GetListMarkerType
($list_type, $list_marker, $last_marker);
1221 return "" unless length($list_marker) >= 2 && $list_marker_type =~ /^[IiAa1]$/;
1222 return "lower-greek" if $list_marker_type eq "a" && $list_marker =~ /^$greek_lower/o;
1223 return "" unless $list_marker =~ /\)$/;
1224 return "upper-roman" if $list_marker_type eq "I";
1225 return "lower-roman" if $list_marker_type eq "i";
1226 return "upper-alpha" if $list_marker_type eq "A";
1227 return "lower-alpha" if $list_marker_type eq "a";
1230 return ($list_marker_type, $ans);
1234 my %_roman_number_table;
1236 %_roman_number_table = (
1268 # Necessary because ς and σ are the same value grrr
1269 my %_greek_number_table;
1271 %_greek_number_table = (
1272 "\x{03b1}" => 1, # α
1273 "\x{03b2}" => 2, # β
1274 "\x{03b3}" => 3, # γ
1275 "\x{03b4}" => 4, # δ
1276 "\x{03b5}" => 5, # ε
1277 "\x{03b6}" => 6, # ζ
1278 "\x{03b7}" => 7, # η
1279 "\x{03b8}" => 8, # θ
1280 "\x{03b9}" => 9, # ι
1281 "\x{03ba}" => 10, # κ
1282 "\x{03bb}" => 11, # λ
1283 #"\x{00b5}"=> 12, # µ is "micro" not "mu"
1284 "\x{03bc}" => 12, # μ
1285 "\x{03bd}" => 13, # ν
1286 "\x{03be}" => 14, # ξ
1287 "\x{03bf}" => 15, # ο
1288 "\x{03c0}" => 16, # π
1289 "\x{03c1}" => 17, # ρ
1290 "\x{03c2}" => 18, # ς
1291 "\x{03c3}" => 18, # σ
1292 "\x{03c4}" => 19, # τ
1293 "\x{03c5}" => 20, # υ
1294 "\x{03c6}" => 21, # φ
1295 "\x{03c7}" => 22, # χ
1296 "\x{03c8}" => 23, # ψ
1297 "\x{03c9}" => 24 # ω
1302 sub _GetMarkerIntegerNum
{
1303 my ($list_marker_type, $marker_val) = @_;
1305 return 0 + $marker_val if $list_marker_type eq "1";
1306 $list_marker_type = lc($list_marker_type);
1307 return $_greek_number_table{$marker_val}
1308 if $list_marker_type eq "a" &&
1309 defined($_greek_number_table{$marker_val});
1310 $marker_val = lc($marker_val);
1311 return ord($marker_val) - ord("a") + 1 if $list_marker_type eq "a";
1312 return 1 unless $list_marker_type eq "i";
1313 defined($_roman_number_table{$marker_val}) and
1314 return $_roman_number_table{$marker_val};
1317 return $ans if $ans == 0 && $list_marker_type eq "1";
1318 return $ans >= 1 ?
$ans : 1;
1323 my ($from, $to, $extra) = @_;
1324 $extra = defined($extra) ?
" $extra" : "";
1326 while ($from + 10 <= $to) {
1327 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-10\"></span>\n";
1330 while ($from + 5 <= $to) {
1331 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-5\"></span>\n";
1334 while ($from + 2 <= $to) {
1335 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr-2\"></span>\n";
1338 while ($from < $to) {
1339 $result .= "<span$extra class=\"$opt{style_prefix}ol-incr\"></span>\n";
1348 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1351 my $indent = $opt{indent_width
};
1352 my $less_than_indent = $indent - 1;
1353 my $less_than_double_indent = 2 * $indent - 1;
1355 # Re-usable pattern to match any entire ul or ol list:
1356 my $whole_list = qr{
1357 ( # $1 (or $_[0]) = whole list
1360 [ ]{0,$less_than_indent}
1361 (${marker_any
}) # $3 (or $_[2]) = first list item marker
1370 (?
! # Negative lookahead for another list item marker
1377 my $list_item_sub = sub {
1379 my $list_type = ($_[2] =~ m/$marker_ul/) ?
"ul" : "ol";
1381 my $list_class = "";
1383 # Turn double returns into triple returns, so that we can make a
1384 # paragraph for the last item in a list, if necessary:
1385 $list =~ s/\n\n/\n\n\n/g;
1386 my ($result, $first_marker, $fancy) = _ProcessListItems
($list_type, $list);
1387 my $list_marker_type = _GetListMarkerType
($list_type, $first_marker);
1388 if ($list_marker_type) {
1389 $first_marker =~ s/[.\)]$//;
1390 my $first_marker_num = _GetMarkerIntegerNum
($list_marker_type, $first_marker);
1391 $list_att = $list_marker_type eq "1" ?
"" : " type=\"$list_marker_type\"";
1393 $list_class = " class=\"$opt{style_prefix}ol\"";
1394 my $start = $first_marker_num;
1395 $start = 10 if $start > 10;
1396 $start = 5 if $start > 5 && $start < 10;
1397 $start = 1 if $start > 1 && $start < 5;
1398 $list_att .= " start=\"$start\"" unless $start == 1;
1399 $list_incr = _IncrList
($start, $first_marker_num);
1401 $list_class = " class=\"$opt{style_prefix}lc-greek\""
1402 if $list_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1403 $list_att .= " start=\"$first_marker_num\"" unless $first_marker_num == 1;
1406 $result = "<$list_type$list_att$list_class>\n$list_incr" . $result . "</$list_type>\n";
1410 # We use a different prefix before nested lists than top-level lists.
1411 # See extended comment in _ProcessListItems().
1413 # Note: (jg) There's a bit of duplication here. My original implementation
1414 # created a scalar regex pattern as the conditional result of the test on
1415 # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1416 # substitution once, using the scalar as the pattern. This worked,
1417 # everywhere except when running under MT on my hosting account at Pair
1418 # Networks. There, this caused all rebuilds to be killed by the reaper (or
1419 # perhaps they crashed, but that seems incredibly unlikely given that the
1420 # same script on the same server ran fine *except* under MT. I've spent
1421 # more time trying to figure out why this is happening than I'd like to
1422 # admit. My only guess, backed up by the fact that this workaround works,
1423 # is that Perl optimizes the substition when it can figure out that the
1424 # pattern will never change, and when this optimization isn't on, we run
1425 # afoul of the reaper. Thus, the slightly redundant code to that uses two
1426 # static s/// patterns rather than one conditional pattern.
1428 # Note: (kjm) With the addition of the two-of-the-same-kind-in-a-row-
1429 # starts-a-list-at-the-top-level rule the two patterns really are somewhat
1430 # different now, but the duplication has pretty much been eliminated via
1431 # use of a separate sub which has the side-effect of making the below
1432 # two cases much easier to grok all at once.
1434 if ($g_list_level) {
1439 &$list_item_sub($1, $2, $3, $4);
1446 (?
:(?
<=\n) # two of the same kind of marker lines
1447 (?
=[ ]{0,$less_than_indent}$marker_ul[ ].*\n
1448 [ ]{0,$less_than_indent}$marker_ul[ ])) |
1449 (?
:(?
<=\n) # in a row will start a list
1450 (?
=[ ]{0,$less_than_indent}$marker_ol[ ].*\n
1451 [ ]{0,$less_than_indent}$marker_ol[ ])) |
1452 (?
:(?
<=\n) # or any marker and a sublist marker
1453 (?
=[ ]{0,$less_than_indent}$marker_any[ ].*\n
1454 [ ]{$indent,$less_than_double_indent}$marker_any[ ]))
1458 &$list_item_sub($1, $2, $3, $4);
1466 sub _ProcessListItems
{
1468 # Process the contents of a single ordered or unordered list, splitting it
1469 # into individual list items.
1472 my $list_type = shift;
1473 my $list_str = shift;
1475 # The $g_list_level global keeps track of when we're inside a list.
1476 # Each time we enter a list, we increment it; when we leave a list,
1477 # we decrement. If it's zero, we're not in a list anymore.
1479 # We do this because when we're not inside a list, we want to treat
1480 # something like this:
1482 # I recommend upgrading to version
1483 # 8. Oops, now this line is treated
1486 # As a single paragraph, despite the fact that the second line starts
1487 # with a digit-period-space sequence.
1489 # Whereas when we're inside a list (or sub-list), that line will be
1490 # treated as the start of a sub-list. What a kludge, huh? This is
1491 # an aspect of Markdown's syntax that's hard to parse perfectly
1492 # without resorting to mind-reading. Perhaps the solution is to
1493 # change the syntax rules such that sub-lists must start with a
1494 # starting cardinal number; e.g. "1." or "a.".
1497 my $marker_kind = $list_type eq "ul" ?
$marker_ul : $marker_ol;
1499 my $first_marker_type;
1500 my $first_marker_num;
1507 # trim trailing blank lines:
1508 $list_str =~ s/\n{2,}\z/\n/;
1513 while ($list_str =~ m
{\G
# start where we left off
1514 (\n+)?
# leading line = $1
1515 (^[ ]*) # leading whitespace = $2
1516 ($marker_any) [ ] ([ ]*) # list marker = $3 leading item space = $4
1518 my $leading_line = $1;
1519 my $leading_space = $2;
1520 my $list_marker = $3;
1521 my $list_marker_len = length($list_marker);
1522 my $leading_item_space = $4;
1523 if ($-[0] > $oldpos) {
1524 $result .= substr($list_str, $oldpos, $-[0] - $oldpos); # Sort-of $`
1525 $oldpos = $-[0]; # point at start of this entire match
1527 if (!defined($first_marker)) {
1528 $first_marker = $list_marker;
1529 $first_marker_type = _GetListMarkerType
($list_type, $first_marker);
1530 if ($first_marker_type) {
1531 (my $marker_val = $first_marker) =~ s/[.\)]$//;
1532 $first_marker_num = _GetMarkerIntegerNum
($first_marker_type, $marker_val);
1533 $next_num = $first_marker_num;
1534 $skipped = 1 if $next_num != 1;
1536 } elsif ($list_marker !~ /$marker_kind/) {
1537 # Wrong marker kind, "fix up" the marker to a correct "lazy" marker
1538 # But keep the old length in $list_marker_len
1539 $list_marker = $last_marker;
1542 # Now grab the rest of this item's data upto but excluding the next
1543 # list marker at the SAME indent level, but sublists must be INCLUDED
1546 while ($list_str =~ m
{\G
1547 ((?
:.+?
)(?
:\n{1,2})) # list item text = $1
1548 (?
= \n* (?
: \z
| # end of string OR
1549 (^[ ]*) # leading whitespace = $2
1550 ($marker_any) # next list marker = $3
1551 ([ ]+) )) # one or more spaces after marker = $4
1554 # If $3 has a left edge that is at the left edge of the previous
1555 # marker OR $3 has a right edge that is at the right edge of the
1556 # previous marker then we stop; otherwise we go on
1558 $item .= substr($list_str, $-[0], $+[0] - $-[0]); # $&
1559 last if !defined($4) || length($2) == length($leading_space) ||
1560 length($2) + length($3) == length($leading_space) + $list_marker_len;
1561 # move along, you're not the marker droid we're looking for...
1562 $item .= substr($list_str, $+[0], $+[4] - $+[0]);
1563 pos($list_str) = $+[4]; # ...move along over the marker droid
1565 # Remember where we parked
1566 $oldpos = pos($list_str);
1568 # Process the $list_marker $item
1574 if ($list_type eq "ul" && !$leading_item_space && $item =~ /^\[([ xX])\] +(.*)$/s) {
1575 my $checkmark = lc $1;
1577 my ($checkbox_class, $checkbox_val);
1578 if ($checkmark eq "x") {
1579 ($checkbox_class, $checkbox_val) = ("checkbox-on", "x");
1581 ($checkbox_class, $checkbox_val) = ("checkbox-off", " ");
1583 $liatt = " class=\"$opt{style_prefix}$checkbox_class\"";
1584 $checkbox = "<span><span></span></span><span></span><span>[<tt>$checkbox_val</tt>] </span>";
1586 my $list_marker_type;
1587 ($list_marker_type, $liatt) = _GetListItemTypeClass
($list_type, $list_marker, $last_marker);
1588 if ($list_type eq "ol" && defined($first_marker)) {
1589 my $styled = $fancy = 1 if $liatt && $list_marker =~ /\)$/;
1590 my ($sfx, $dash) = ("", "");
1591 ($sfx, $dash) = ("li", "-") if $styled;
1592 if ($liatt =~ /lower/) {
1593 $sfx .= "${dash}lc";
1594 } elsif ($liatt =~ /upper/) {
1595 $sfx .= "${dash}uc";
1597 $sfx .= "-greek" if $liatt =~ /greek/;
1598 $liatt = " class=\"$opt{style_prefix}$sfx\"" if $sfx;
1599 $typechanged = 1 if $list_marker_type ne $first_marker_type;
1600 (my $marker_val = $list_marker) =~ s/[.\)]$//;
1601 my $marker_num = _GetMarkerIntegerNum
($list_marker_type, $marker_val);
1602 $marker_num = $next_num if $marker_num < $next_num;
1603 $skipped = 1 if $next_num < $marker_num;
1604 $incr = _IncrList
($next_num, $marker_num, "incrlevel=$g_list_level");
1605 $liatt = " value=\"$marker_num\"$liatt" if $fancy || $skipped;
1606 $liatt = " type=\"$list_marker_type\"$liatt" if $styled || $typechanged;
1607 $next_num = $marker_num + 1;
1610 $last_marker = $list_marker;
1612 if ($leading_line or ($item =~ m/\n{2,}/)) {
1613 $item = _RunBlockGamut
(_Outdent
($item));
1616 # Recursion for sub-lists:
1617 $item = _DoLists
(_Outdent
($item));
1619 $item = _RunSpanGamut
($item);
1623 $result .= "$incr<li$liatt>" . $checkbox . $item . "</li>\n";
1626 # remove "incrlevel=$g_list_level " parts
1627 $result =~ s
{<span incrlevel
=$g_list_level class="$opt{style_prefix}ol-incr((?:-\d{1,2})?)">}
1628 {<span
class="$opt{style_prefix}ol-incr$1">}g
;
1630 # remove the $g_list_level incr spans entirely
1631 $result =~ s{<span incrlevel=$g_list_level class="$opt{style_prefix}ol-incr(?:-\d{1,2})?"></span>\n}{}g;
1632 # remove the class="$opt{style_prefix}lc-greek" if first_marker is greek
1633 $result =~ s{(<li[^>]*?) class="$opt{style_prefix}lc-greek">}{$1>}g
1634 if defined($first_marker_type) && $first_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
1637 # Anything left over (similar to $') goes into result, but this should always be empty
1638 $result .= _RunBlockGamut
(substr($list_str, pos($list_str)));
1641 return ($result, $first_marker, $fancy);
1647 # Process Markdown `<pre><code>` blocks.
1654 ( # $1 = the code block -- one or more lines, starting with indent_width spaces
1656 (?
:[ ]{$opt{indent_width
}}) # Lines must start with indent_width of spaces
1660 ((?
=^[ ]{0,$opt{indent_width
}}\S
)|\Z
) # Lookahead for non-space at line-start, or end of doc
1664 $codeblock =~ s/\n\n\n/\n\n/g; # undo "paragraph for last list item" change
1665 $codeblock = _EncodeCode
(_Outdent
($codeblock));
1666 $codeblock =~ s/\A\n+//; # trim leading newlines
1667 $codeblock =~ s/\s+\z//; # trim trailing whitespace
1669 my $result = "<div class=\"$opt{style_prefix}code\"><pre style=\"display:none\"></pre><pre><code>"
1670 . $codeblock . "\n</code></pre></div>";
1671 my $key = block_id
($result);
1672 $g_code_blocks{$key} = $result;
1673 "\n\n" . $key . "\n\n";
1682 # * Backtick quotes are used for <code></code> spans.
1684 # * You can use multiple backticks as the delimiters if you want to
1685 # include literal backticks in the code span. So, this input:
1687 # Just type ``foo `bar` baz`` at the prompt.
1689 # Will translate to:
1691 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1693 # There's no arbitrary limit to the number of backticks you
1694 # can use as delimters. If you need three consecutive backticks
1695 # in your code, use four for delimiters, etc.
1697 # * You can use spaces to get literal backticks at the edges:
1699 # ... type `` `bar` `` ...
1703 # ... type <code>`bar`</code> ...
1709 (`+) # $1 = Opening run of `
1710 (.+?
) # $2 = The code block
1712 \1 # Matching closer
1716 $c =~ s/^[ ]+//g; # leading whitespace
1717 $c =~ s/[ ]+$//g; # trailing whitespace
1718 $c = _EncodeCode
($c);
1728 # Encode/escape certain characters inside Markdown code runs.
1729 # The point is that in code, these characters are literals,
1730 # and lose their special Markdown meanings.
1734 # Encode all ampersands; HTML entities are not
1735 # entities within a Markdown code span.
1738 # Encode $'s, but only if we're running under Blosxom.
1739 # (Blosxom interpolates Perl variables in article bodies.)
1740 s/\$/$/g if $_haveBX;
1742 # Do the angle bracket song and dance:
1746 # Now, escape characters that are magic in Markdown:
1747 s!([*_~{}\[\]\\])!$g_escape_table{$1}!g;
1753 sub _DoItalicsAndBoldAndStrike
{
1756 # <strong> must go first:
1757 $text =~ s
{ \
*\
* (?
=\S
) (.+?
[*_
]*) (?
<=\S
) \
*\
* }
1758 {<strong
>$1</strong
>}gsx
;
1759 $text =~ s
{ (?
<!\w
) __
(?
=\S
) (.+?
[*_
]*) (?
<=\S
) __
(?
!\w
) }
1760 {<strong
>$1</strong
>}gsx
;
1762 $text =~ s
{ ~~ (?
=\S
) (.+?
[*_
]*) (?
<=\S
) ~~ }
1763 {<strike
>$1</strike
>}gsx
;
1765 $text =~ s
{ \
* (?
=\S
) (.+?
) (?
<=\S
) \
* }
1767 $text =~ s
{ (?
<!\w
) _
(?
=\S
) (.+?
) (?
<=\S
) _
(?
!\w
) }
1774 sub _DoBlockQuotes
{
1778 ( # Wrap whole match in $1
1780 ^[ ]*>[ ]?
# '>' at the start of a line
1781 .+\n # rest of the first line
1782 (.+\n)* # subsequent consecutive lines
1788 $bq =~ s/^[ ]*>[ ]?//gm; # trim one level of quoting
1789 $bq =~ s/^[ ]+$//mg; # trim whitespace-only lines
1790 $bq = _RunBlockGamut
($bq); # recurse
1793 "<blockquote>\n$bq\n</blockquote>\n\n";
1801 sub _FormParagraphs
{
1804 # $text - string to process with html <p> tags
1808 # Strip leading and trailing lines:
1812 my @grafs = split(/\n{2,}/, $text);
1818 unless (defined($g_html_blocks{$_}) || defined($g_code_blocks{$_})) {
1819 $_ = _RunSpanGamut
($_);
1826 # Unhashify HTML blocks
1829 if (defined( $g_html_blocks{$_} )) {
1830 $_ = $g_html_blocks{$_};
1834 return join "\n\n", @grafs;
1838 my $g_possible_tag_name;
1841 # note: length("blockquote") == 10
1842 $g_possible_tag_name = qr/(?i:[a-z]{1,10}|h[1-6])/o;
1843 %ok_tag_name = map({$_ => 1} qw(
1844 a abbr acronym address
1845 b basefont bdo big blockquote br
1846 caption center cite code col colgroup
1847 dd del dfn div dl dt
1850 h1 h2 h3 h4 h5 h6 hr
1857 s samp small span strike strong sub sup
1858 table tbody td tfoot th thead tr tt
1862 $ok_tag_name{$_} = 0 foreach (qw(
1868 sub _SetAllowedTag
{
1869 my ($tag, $forbid) = @_;
1870 $ok_tag_name{$tag} = $forbid ?
0 : 1
1871 if defined($tag) && exists($ok_tag_name{$tag});
1875 # Encode leading '<' of any non-tags
1876 # However, "<?", "<!" and "<$" are passed through (legacy on that "<$" thing)
1879 return $tag if $tag =~ /^<[?\$!]/;
1880 if (($tag =~ m{^<($g_possible_tag_name)(?:[\s>]|/>$)} || $tag =~ m{^</($g_possible_tag_name)\s*>}) &&
1881 $ok_tag_name{lc($1)}) {
1890 sub _EncodeAmpsAndAngles
{
1891 # Smart processing for ampersands and angle brackets that need to be encoded.
1895 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1896 # http://bumppo.net/projects/amputator/
1897 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1900 $text =~ s{<(?![a-z/?\$!])}{<}gi;
1901 $text =~ s{<(?=[^>]*$)}{<}g;
1903 # Encode <'s that cannot possibly be a start or end tag
1904 $text =~ s{(<[^>]*>)}{_DoTag($1)}ige;
1910 sub _EncodeBackslashEscapes
{
1912 # Parameter: String.
1913 # Returns: String after processing the following backslash escape sequences.
1917 s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
1918 s{\\([`*_~{}\[\]()>#+\-.!`])}{$g_escape_table{$1}}g
;
1927 s{<((https?|ftps?):[^'\042>\s]+)>}{<a href="$1"><$1></a>}gi;
1929 # Email addresses: <address@domain.foo>
1936 [-a
-z0
-9]+(\
.[-a
-z0
-9]+)*\
.[a
-z
]+
1940 _EncodeEmailAddress
(_UnescapeSpecialChars
($1), "<", ">");
1943 # (kjm) I don't do "x" patterns
1944 s
{(?
<![\042'<>])(?<!&[Ll][Tt];)(?<!<)(?<![Cc];)\b((?:https?|ftps?)://(?:[-a-zA-Z0-9./?\&\%=_~!*;:\@+\$,\x23](?:(?<![.,:;])|(?=[^\s])))+)}
1945 {<a href="$1">$1</a>}sog;
1946 s{(?<![][])(?<!\] )\[RFC( ?)([0-9]{1,5})\](?![][])(?! \[)}
1947 {[<a href="http://tools.ietf.org/html/rfc$2">RFC$1$2</a>]}sog;
1953 sub _EncodeEmailAddress {
1955 # Input: an email address, e.g. "foo@example.com"
1957 # Output: the email address as a mailto link, with each character
1958 # of the address encoded as either a decimal or hex entity, in
1959 # the hopes of foiling most address harvesting spam bots. E.g.:
1961 # <a href="mailto:foo@e
1962 # xample.com">foo
1963 # @example.com</a>
1965 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1966 # mailing list: <http://tinyurl.com/yu7ue>
1969 my ($addr, $prefix, $suffix) = @_;
1970 $prefix = "" unless defined($prefix);
1971 $suffix = "" unless defined($suffix);
1973 srand(unpack('N
',md5($addr)));
1975 sub { '&#' . ord(shift) . ';' },
1976 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1980 $addr = "mailto:" . $addr;
1984 if ( $char eq '@' ) {
1985 # this *must* be encoded. I insist.
1986 $char = $encode[int rand 1]->($char);
1987 } elsif ( $char ne ':' ) {
1988 # leave ':' alone (to spot mailto: later)
1990 # roughly 10% raw, 45% hex, 45% dec
1992 $r > .9 ?
$encode[2]->($char) :
1993 $r < .45 ?
$encode[1]->($char) :
2000 # strip the mailto: from the visible part
2001 (my $bareaddr = $addr) =~ s/^.+?://;
2002 $addr = qq{<a href
="$addr">$prefix$bareaddr$suffix</a
>};
2008 sub _UnescapeSpecialChars
{
2010 # Swap back in all the special characters we've hidden.
2014 while( my($char, $hash) = each(%g_escape_table) ) {
2015 $text =~ s/$hash/$char/g;
2023 # Parameter: String containing HTML markup.
2024 # Returns: Reference to an array of the tokens comprising the input
2025 # string. Each token is either a tag (possibly with nested,
2026 # tags contained therein, such as <a href="<MTFoo>">, or a
2027 # run of text between tags. Each element of the array is a
2028 # two-element array; the first is either 'tag' or 'text';
2029 # the second is the actual value.
2032 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
2033 # <http://www.bradchoate.com/past/mtregex.php>
2038 my $len = length $str;
2042 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x
$depth) . (')*>)' x
$depth);
2043 my $match = qr
/(?s
: <! ( -- .*?
-- \s
* )+ > ) | # comment
2044 (?s
: <\? .*?
\?> ) | # processing instruction
2045 $nested_tags/iox
; # nested tags
2047 while ($str =~ m/($match)/g) {
2049 my $sec_start = pos $str;
2050 my $tag_start = $sec_start - length $whole_tag;
2051 if ($pos < $tag_start) {
2052 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
2054 push @tokens, ['tag', $whole_tag];
2057 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
2064 # Remove one level of line-leading indent_width of spaces
2068 $text =~ s/^ {1,$opt{indent_width}}//gm;
2075 # Expand tabs to spaces using $opt{tab_width} if no second argument
2078 my $ts = shift || $opt{tab_width
};
2079 # From the Perl camel book "Fluent Perl" section (slightly modified)
2080 $text =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ge;
2087 # Add URL prefix if needed
2091 return $url unless $opt{url_prefix
} ne '' || $opt{img_prefix
} ne '';
2092 return $url if $url =~ m
,^//, || $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/;
2093 my $ans = $opt{url_prefix
};
2094 $ans = $opt{img_prefix
}
2095 if $opt{img_prefix
} ne '' && $url =~ /\.(?:png|gif|jpe?g|svg?z)$/i;
2096 return $url unless $ans ne '';
2097 $ans .= '/' if substr($ans, -1, 1) ne '/';
2098 $ans .= substr($url, 0, 1) eq '/' ?
substr($url, 1) : $url;
2104 $g_style_sheet = <<'STYLESHEET';
2106 <style type="text/css">
2109 /* Markdown.pl fancy style sheet
2110 ** Copyright (C) 2017 Kyle J. McKay.
2111 ** All rights reserved.
2113 ** Redistribution and use in source and binary forms, with or without
2114 ** modification, are permitted provided that the following conditions are met:
2116 ** 1. Redistributions of source code must retain the above copyright notice,
2117 ** this list of conditions and the following disclaimer.
2119 ** 2. Redistributions in binary form must reproduce the above copyright
2120 ** notice, this list of conditions and the following disclaimer in the
2121 ** documentation and/or other materials provided with the distribution.
2123 ** 3. Neither the name of the copyright holder nor the names of its
2124 ** contributors may be used to endorse or promote products derived from
2125 ** this software without specific prior written permission.
2127 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2128 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2129 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2130 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2131 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2132 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2133 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2134 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2135 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2136 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2137 ** POSSIBILITY OF SUCH DAMAGE.
2140 div.%(base)code-bt > pre, div.%(base)code > pre {
2146 div.%(base)code-bt > pre > code, div.%(base)code > pre > code {
2147 display: inline-block;
2150 border-top: thin dotted;
2151 border-bottom: thin dotted;
2155 counter-reset: %(base)item;
2157 ol.%(base)ol[start="0"] {
2158 counter-reset: %(base)item -1;
2160 ol.%(base)ol[start="5"] {
2161 counter-reset: %(base)item 4;
2163 ol.%(base)ol[start="10"] {
2164 counter-reset: %(base)item 9;
2166 ol.%(base)ol > span.%(base)ol-incr {
2167 counter-increment: %(base)item;
2169 ol.%(base)ol > span.%(base)ol-incr-2 {
2170 counter-increment: %(base)item 2;
2172 ol.%(base)ol > span.%(base)ol-incr-5 {
2173 counter-increment: %(base)item 5;
2175 ol.%(base)ol > span.%(base)ol-incr-10 {
2176 counter-increment: %(base)item 10;
2178 ol.%(base)lc-greek, li.%(base)lc-greek {
2179 list-style-type: lower-greek;
2182 counter-increment: %(base)item;
2184 ol.%(base)ol > li.%(base)li,
2185 ol.%(base)ol > li.%(base)li-lc,
2186 ol.%(base)ol > li.%(base)li-lc-greek,
2187 ol.%(base)ol > li.%(base)li-uc {
2188 list-style-type: none;
2191 ol.%(base)ol > li.%(base)li:before,
2192 ol.%(base)ol > li.%(base)li-lc:before,
2193 ol.%(base)ol > li.%(base)li-lc-greek:before,
2194 ol.%(base)ol > li.%(base)li-uc:before {
2197 white-space: nowrap;
2201 ol.%(base)ol > li.%(base)li[type="1"]:before {
2202 content: counter(%(base)item, decimal) ")\A0 \A0 ";
2204 ol.%(base)ol > li.%(base)li-lc[type="i"]:before,
2205 ol.%(base)ol > li.%(base)li-lc[type="I"]:before {
2206 content: counter(%(base)item, lower-roman) ")\A0 \A0 ";
2208 ol.%(base)ol > li.%(base)li-uc[type="I"]:before,
2209 ol.%(base)ol > li.%(base)li-uc[type="i"]:before {
2210 content: counter(%(base)item, upper-roman) ")\A0 \A0 ";
2212 ol.%(base)ol > li.%(base)li-lc[type="a"]:before,
2213 ol.%(base)ol > li.%(base)li-lc[type="A"]:before {
2214 content: counter(%(base)item, lower-alpha) ")\A0 \A0 ";
2216 ol.%(base)ol > li.%(base)li-lc-greek[type="a"]:before,
2217 ol.%(base)ol > li.%(base)li-lc-greek[type="A"]:before {
2218 content: counter(%(base)item, lower-greek) ")\A0 \A0 ";
2220 ol.%(base)ol > li.%(base)li-uc[type="A"]:before,
2221 ol.%(base)ol > li.%(base)li-uc[type="a"]:before {
2222 content: counter(%(base)item, upper-alpha) ")\A0 \A0 ";
2225 li.%(base)checkbox-on,
2226 li.%(base)checkbox-off {
2227 list-style-type: none;
2230 li.%(base)checkbox-on > span:first-child + span + span,
2231 li.%(base)checkbox-off > span:first-child + span + span {
2233 clip: rect(0,0,0,0);
2235 li.%(base)checkbox-on > span:first-child,
2236 li.%(base)checkbox-off > span:first-child,
2237 li.%(base)checkbox-on > span:first-child + span,
2238 li.%(base)checkbox-off > span:first-child + span {
2245 li.%(base)checkbox-on > span:first-child > span:first-child,
2246 li.%(base)checkbox-off > span:first-child > span:first-child {
2249 left: 0.75pt; top: 0.75pt; right: 0.75pt; bottom: 0.75pt;
2251 li.%(base)checkbox-on > span:first-child > span:first-child:before,
2252 li.%(base)checkbox-off > span:first-child > span:first-child:before {
2253 display: inline-block;
2261 li.%(base)checkbox-on > span:first-child + span:before {
2274 $g_style_sheet =~ s/^\s+//g;
2275 $g_style_sheet =~ s/\s+$//g;
2276 $g_style_sheet .= "\n";
2285 Markdown.pl - convert Markdown format text files to HTML
2289 B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
2290 [B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>]
2291 [B<--tabwidth>=I<num>] [B<--stylesheet>] [B<--stub>] [--]
2295 -h show short usage help
2296 --help show long detailed help
2297 --html4tags use <br> instead of <br />
2298 --deprecated allow <dir> and <menu> tags
2299 --tabwidth=num expand tabs to num instead of 8
2300 -r prefix | --htmlroot=prefix append relative non-img URLs
2302 -i prefix | --imageroot=prefix append relative img URLs to
2304 -V | --version show version, authors, license
2306 -s | --shortversion show just the version number
2307 --stylesheet output the fancy style sheet
2308 --no-stylesheet do not output fancy style sheet
2309 --stub wrap output in stub document
2310 implies --stylesheet
2311 -- end options and treat next
2316 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2317 easy-to-write structured text format into HTML. Markdown's text format
2318 is most similar to that of plain text email, and supports features such
2319 as headers, *emphasis*, code blocks, blockquotes, and links.
2321 Markdown's syntax is designed not as a generic markup language, but
2322 specifically to serve as a front-end to (X)HTML. You can use span-level
2323 HTML tags anywhere in a Markdown document, and you can use block level
2324 HTML tags (like <div> and <table> as well).
2326 For more information about Markdown's syntax, see the F<basics.md>
2327 and F<syntax.md> files included with F<Markdown.pl>.
2329 Input (auto-detected) may be either ISO-8859-1 or UTF-8. Output is always
2330 converted to the UTF-8 character set.
2335 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2342 =item B<--html4tags>
2344 Use HTML 4 style for empty element tags, e.g.:
2348 instead of Markdown's default XHTML style tags, e.g.:
2353 =item B<--deprecated>
2355 Both "<dir>" and "<menu>" are normally taken as literal text and the leading
2356 "<" will be automatically escaped.
2358 If this option is used, they are recognized as valid tags and passed through
2359 without being escaped.
2361 When dealing with program argument descriptions "<dir>" can be particularly
2362 problematic therefore use of this option is not recommended.
2364 Other deprecated tags (such as "<font>" and "<center>" for example) continue
2365 to be recognized and passed through even without using this option.
2368 =item B<--tabwidth>=I<num>
2370 Expand tabs to I<num> character wide tab stop positions instead of the default
2371 8. Don't use this; physical tabs should always be expanded to 8-character
2372 positions. This option does I<not> affect the number of spaces needed to
2373 start a new "indent level". That will always be 4 no matter what value is
2374 used (or implied by default) with this option. Also note that tabs inside
2375 backticks-delimited code blocks will always be expanded to 8-character tab
2376 stop positions no matter what value is used for this option.
2378 The value must be S<2 <= I<num> <= 32>.
2381 =item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
2383 Any non-absolute URLs have I<prefix> prepended.
2386 =item B<-i> I<prefix>, B<--imageroot>=I<prefix>
2388 Any non-absolute URLs have I<prefix> prepended (overriding the B<-r> prefix
2389 if any) but only if they end in an image suffix.
2392 =item B<-V>, B<--version>
2394 Display Markdown's version number and copyright information.
2397 =item B<-s>, B<--shortversion>
2399 Display the short-form version number.
2402 =item B<--stylesheet>
2404 Include the fancy style sheet at the beginning of the output (or in the
2405 C<head> section with B<--stub>). This style sheet makes fancy checkboxes
2406 and makes a right parenthesis C<)> show instead of a C<.> for ordered lists
2407 that use them. Without it things will still look fine except that the
2408 fancy stuff won't be there.
2410 Use this option with no other arguments and redirect standard input to
2411 /dev/null to get just the style sheet and nothing else.
2414 =item B<--no-stylesheet>
2416 Overrides a previous B<--stylesheet> and disables implicit inclusion
2417 of the style sheet by the B<--stub> option.
2422 Wrap the output in a full document stub (i.e. has C<html>, C<head> and C<body>
2423 tags). The style sheet I<will> be included in the C<head> section unless the
2424 B<--no-stylesheet> option is also used.
2427 =item B<-h>, B<--help>
2429 Display Markdown's help. With B<--help> full help is shown, with B<-h> only
2430 the usage and options are shown.
2436 =head1 VERSION HISTORY
2438 Z<> See the F<README> file for detailed release notes for this version.
2442 =item Z<> 1.1.4 - 24 Jun 2017
2444 =item Z<> 1.1.3 - 13 Feb 2017
2446 =item Z<> 1.1.2 - 19 Jan 2017
2448 =item Z<> 1.1.1 - 12 Jan 2017
2450 =item Z<> 1.1.0 - 11 Jan 2017
2452 =item Z<> 1.0.4 - 05 Jun 2016
2454 =item Z<> 1.0.3 - 06 Sep 2015
2456 =item Z<> 1.0.2 - 03 Sep 2015
2458 =item Z<> 1.0.1 - 14 Dec 2004
2460 =item Z<> 1.0.0 - 28 Aug 2004
2470 =item L<http://daringfireball.net>
2472 =item L<http://daringfireball.net/projects/markdown/>
2480 =item PHP port and other contributions by Michel Fortin
2482 =item L<http://michelf.com>
2490 =item Additional enhancements and tweaks by Kyle J. McKay
2492 =item mackyle<at>gmail.com
2496 =head1 COPYRIGHT AND LICENSE
2500 =item Copyright (C) 2003-2004 John Gruber
2502 =item Copyright (C) 2015-2017 Kyle J. McKay
2504 =item All rights reserved.
2508 Redistribution and use in source and binary forms, with or without
2509 modification, are permitted provided that the following conditions are
2516 Redistributions of source code must retain the above copyright
2517 notice, this list of conditions and the following disclaimer.
2521 Redistributions in binary form must reproduce the above copyright
2522 notice, this list of conditions and the following disclaimer in the
2523 documentation and/or other materials provided with the distribution.
2527 Neither the name "Markdown" nor the names of its contributors may
2528 be used to endorse or promote products derived from this software
2529 without specific prior written permission.
2533 This software is provided by the copyright holders and contributors "as
2534 is" and any express or implied warranties, including, but not limited
2535 to, the implied warranties of merchantability and fitness for a
2536 particular purpose are disclaimed. In no event shall the copyright owner
2537 or contributors be liable for any direct, indirect, incidental, special,
2538 exemplary, or consequential damages (including, but not limited to,
2539 procurement of substitute goods or services; loss of use, data, or
2540 profits; or business interruption) however caused and on any theory of
2541 liability, whether in contract, strict liability, or tort (including
2542 negligence or otherwise) arising in any way out of the use of this
2543 software, even if advised of the possibility of such damage.