adds my patches to Markdown.pm
[semece.git] / lib / Semece / Markdown.pm
blobc76236bc27eae7de91e43ad2e9de790743202c54
1 # my patched markdown version, it basically adds a ' &nbsp' after each
2 # '<p>'
3 package Text::Markdown;
4 require 5.008_000;
5 use strict;
6 use warnings;
7 use re 'eval';
9 use Digest::MD5 qw(md5_hex);
10 use Encode qw();
11 use Carp qw(croak);
12 use base 'Exporter';
14 our $VERSION = '1.0.24';
15 our @EXPORT_OK = qw(markdown);
17 =head1 NAME
19 Text::Markdown - Convert Markdown syntax to (X)HTML
21 =head1 SYNOPSIS
23 use Text::Markdown 'markdown';
24 my $html = markdown($text);
26 use Text::Markdown 'markdown';
27 my $html = markdown( $text, {
28 empty_element_suffix => '>',
29 tab_width => 2,
30 } );
32 use Text::Markdown;
33 my $m = Text::Markdown->new;
34 my $html = $m->markdown($text);
36 use Text::Markdown;
37 my $m = Text::MultiMarkdown->new(
38 empty_element_suffix => '>',
39 tab_width => 2,
41 my $html = $m->markdown( $text );
43 =head1 DESCRIPTION
45 Markdown is a text-to-HTML filter; it translates an easy-to-read /
46 easy-to-write structured text format into HTML. Markdown's text format
47 is most similar to that of plain text email, and supports features such
48 as headers, *emphasis*, code blocks, blockquotes, and links.
50 Markdown's syntax is designed not as a generic markup language, but
51 specifically to serve as a front-end to (X)HTML. You can use span-level
52 HTML tags anywhere in a Markdown document, and you can use block level
53 HTML tags (like <div> and <table> as well).
55 =head1 SYNTAX
57 This module implements the 'original' Markdown markdown syntax from:
59 http://daringfireball.net/projects/markdown/
61 =head1 OPTIONS
63 Text::Markdown supports a number of options to it's processor which control the behaviour of the output document.
65 These options can be supplied to the constructor, on in a hash with the individual calls to the markdown method.
66 See the synopsis for examples of both of the above styles.
68 The options for the processor are:
70 =over
72 =item empty_element_suffix
74 This option can be used to generate normal HTML output. By default, it is ' />', which is xHTML, change to '>' for normal HTML.
76 =item tab_width
78 Controls indent width in the generated markup, defaults to 4
80 =item markdown_in_html_blocks
82 Controls if Markdown is processed when inside HTML blocks. Defaults to 0.
84 =item trust_list_start_value
86 If true, ordered lists will use the first number as the starting point for
87 numbering. This will let you pick up where you left off by writing:
89 1. foo
90 2. bar
92 some paragraph
94 3. baz
95 6. quux
97 (Note that in the above, quux will be numbered 4.)
99 =back
101 =head1 METHODS
103 =cut
105 # Regex to match balanced [brackets]. See Friedl's
106 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
107 our ($g_nested_brackets, $g_nested_parens);
108 $g_nested_brackets = qr{
109 (?> # Atomic matching
110 [^\[\]]+ # Anything other than brackets
113 (??{ $g_nested_brackets }) # Recursive set of nested brackets
117 # Doesn't allow for whitespace, because we're using it to match URLs:
118 $g_nested_parens = qr{
119 (?> # Atomic matching
120 [^()\s]+ # Anything other than parens or whitespace
123 (??{ $g_nested_parens }) # Recursive set of nested brackets
128 # Table of hash values for escaped characters:
129 our %g_escape_table;
130 foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
131 $g_escape_table{$char} = md5_hex($char);
134 =head1 METHODS
136 =head2 new
138 A simple constructor, see the SYNTAX and OPTIONS sections for more information.
140 =cut
142 sub new {
143 my ($class, %p) = @_;
145 $p{base_url} ||= ''; # This is the base url to be used for WikiLinks
147 $p{tab_width} = 4 unless (defined $p{tab_width} and $p{tab_width} =~ m/^\d+$/);
149 $p{empty_element_suffix} ||= ' />'; # Change to ">" for HTML output
151 # Is markdown processed in HTML blocks? See t/15inlinehtmldonotturnoffmarkdown.t
152 $p{markdown_in_html_blocks} = $p{markdown_in_html_blocks} ? 1 : 0;
154 $p{trust_list_start_value} = $p{trust_list_start_value} ? 1 : 0;
156 my $self = { params => \%p };
157 bless $self, ref($class) || $class;
158 return $self;
161 =head2 markdown
163 The main function as far as the outside world is concerned. See the SYNOPSIS
164 for details on use.
166 =cut
168 sub markdown {
169 my ( $self, $text, $options ) = @_;
171 # Detect functional mode, and create an instance for this run..
172 unless (ref $self) {
173 if ( $self ne __PACKAGE__ ) {
174 my $ob = __PACKAGE__->new();
175 # $self is text, $text is options
176 return $ob->markdown($self, $text);
178 else {
179 croak('Calling ' . $self . '->markdown (as a class method) is not supported.');
183 $options ||= {};
185 %$self = (%{ $self->{params} }, %$options, params => $self->{params});
187 $self->_CleanUpRunData($options);
189 return $self->_Markdown($text);
192 sub _CleanUpRunData {
193 my ($self, $options) = @_;
194 # Clear the global hashes. If we don't clear these, you get conflicts
195 # from other articles when generating a page which contains more than
196 # one article (e.g. an index page that shows the N most recent
197 # articles):
198 $self->{_urls} = $options->{urls} ? $options->{urls} : {}; # FIXME - document passing this option (tested in 05options.t).
199 $self->{_titles} = {};
200 $self->{_html_blocks} = {};
201 # Used to track when we're inside an ordered or unordered list
202 # (see _ProcessListItems() for details)
203 $self->{_list_level} = 0;
207 sub _Markdown {
209 # Main function. The order in which other subs are called here is
210 # essential. Link and image substitutions need to happen before
211 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
212 # and <img> tags get encoded.
214 my ($self, $text, $options) = @_;
216 $text = $self->_CleanUpDoc($text);
218 # Turn block-level HTML blocks into hash entries
219 $text = $self->_HashHTMLBlocks($text) unless $self->{markdown_in_html_blocks};
221 $text = $self->_StripLinkDefinitions($text);
223 $text = $self->_RunBlockGamut($text);
225 $text = $self->_UnescapeSpecialChars($text);
227 $text = $self->_ConvertCopyright($text);
229 return $text . "\n";
232 =head2 urls
234 Returns a reference to a hash with the key being the markdown reference and the value being the URL.
236 Useful for building scripts which preprocess a list of links before the main content. See t/05options.t
237 for an example of this hashref being passed back into the markdown method to create links.
239 =cut
241 sub urls {
242 my ( $self ) = @_;
244 return $self->{_urls};
247 sub _CleanUpDoc {
248 my ($self, $text) = @_;
250 # Standardize line endings:
251 $text =~ s{\r\n}{\n}g; # DOS to Unix
252 $text =~ s{\r}{\n}g; # Mac to Unix
254 # Make sure $text ends with a couple of newlines:
255 $text .= "\n\n";
257 # Convert all tabs to spaces.
258 $text = $self->_Detab($text);
260 # Strip any lines consisting only of spaces and tabs.
261 # This makes subsequent regexen easier to write, because we can
262 # match consecutive blank lines with /\n+/ instead of something
263 # contorted like /[ \t]*\n+/ .
264 $text =~ s/^[ \t]+$//mg;
266 return $text;
269 sub _StripLinkDefinitions {
271 # Strips link definitions from text, stores the URLs and titles in
272 # hash references.
274 my ($self, $text) = @_;
275 my $less_than_tab = $self->{tab_width} - 1;
277 # Link defs are in the form: ^[id]: url "optional title"
278 while ($text =~ s{
279 ^[ ]{0,$less_than_tab}\[(.+)\]: # id = \$1
280 [ \t]*
281 \n? # maybe *one* newline
282 [ \t]*
283 <?(\S+?)>? # url = \$2
284 [ \t]*
285 \n? # maybe one newline
286 [ \t]*
288 (?<=\s) # lookbehind for whitespace
289 ["(]
290 (.+?) # title = \$3
291 [")]
292 [ \t]*
293 )? # title is optional
294 (?:\n+|\Z)
295 }{}omx) {
296 $self->{_urls}{lc $1} = $self->_EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
297 if ($3) {
298 $self->{_titles}{lc $1} = $3;
299 $self->{_titles}{lc $1} =~ s/"/&quot;/g;
304 return $text;
307 sub _md5_utf8 {
308 # Internal function used to safely MD5sum chunks of the input, which might be Unicode in Perl's internal representation.
309 my $input = shift;
310 return unless defined $input;
311 if (Encode::is_utf8 $input) {
312 return md5_hex(Encode::encode('utf8', $input));
314 else {
315 return md5_hex($input);
319 sub _HashHTMLBlocks {
320 my ($self, $text) = @_;
321 my $less_than_tab = $self->{tab_width} - 1;
323 # Hashify HTML blocks:
324 # We only want to do this for block-level HTML tags, such as headers,
325 # lists, and tables. That's because we still want to wrap <p>s around
326 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
327 # phrase emphasis, and spans. The list of tags we're looking for is
328 # hard-coded:
329 my $block_tags = qr{
331 p | div | h[1-6] | blockquote | pre | table |
332 dl | ol | ul | script | noscript | form |
333 fieldset | iframe | math | ins | del
337 my $tag_attrs = qr{
338 (?: # Match one attr name/value pair
339 \s+ # There needs to be at least some whitespace
340 # before each attribute name.
341 [\w.:_-]+ # Attribute name
342 \s*=\s*
344 ".+?" # "Attribute value"
346 '.+?' # 'Attribute value'
348 )* # Zero or more
351 my $empty_tag = qr{< \w+ $tag_attrs \s* />}oxms;
352 my $open_tag = qr{< $block_tags $tag_attrs \s* >}oxms;
353 my $close_tag = undef; # let Text::Balanced handle this
355 use Text::Balanced qw(gen_extract_tagged);
356 my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] });
358 my @chunks;
359 while ($text =~ s{^(([ ]{0,$less_than_tab}<)?.*\n)}{}m) {
360 my $cur_line = $1;
361 if (defined $2) {
362 # current line could be start of code block
364 my ($tag, $remainder) = $extract_block->($cur_line . $text);
365 if ($tag) {
366 my $key = _md5_utf8($tag);
367 $self->{_html_blocks}{$key} = $tag;
368 push @chunks, "\n\n" . $key . "\n\n";
369 $text = $remainder;
371 else {
372 # No tag match, so toss $cur_line into @chunks
373 push @chunks, $cur_line;
376 else {
377 # current line could NOT be start of code block
378 push @chunks, $cur_line;
382 push @chunks, $text; # Whatever is left.
384 $text = join '', @chunks;
386 # Special case just for <hr />. It was easier to make a special case than
387 # to make the other regex more complicated.
388 $text = $self->_HashHR($text);
390 $text = $self->_HashHTMLComments($text);
392 $text = $self->_HashPHPASPBlocks($text);
394 return $text;
397 sub _HashHR {
398 my ($self, $text) = @_;
399 my $less_than_tab = $self->{tab_width} - 1;
401 $text =~ s{
403 (?<=\n\n) # Starting after a blank line
404 | # or
405 \A\n? # the beginning of the doc
407 ( # save in $1
408 [ ]{0,$less_than_tab}
409 <(hr) # start tag = $2
410 \b # word break
411 ([^<>])*? #
412 /?> # the matching end tag
413 [ \t]*
414 (?=\n{2,}|\Z) # followed by a blank line or end of document
417 my $key = _md5_utf8($1);
418 $self->{_html_blocks}{$key} = $1;
419 "\n\n" . $key . "\n\n";
420 }egx;
422 return $text;
425 sub _HashHTMLComments {
426 my ($self, $text) = @_;
427 my $less_than_tab = $self->{tab_width} - 1;
429 # Special case for standalone HTML comments:
430 $text =~ s{
432 (?<=\n\n) # Starting after a blank line
433 | # or
434 \A\n? # the beginning of the doc
436 ( # save in $1
437 [ ]{0,$less_than_tab}
438 (?s:
440 (--.*?--\s*)+
443 [ \t]*
444 (?=\n{2,}|\Z) # followed by a blank line or end of document
447 my $key = _md5_utf8($1);
448 $self->{_html_blocks}{$key} = $1;
449 "\n\n" . $key . "\n\n";
450 }egx;
452 return $text;
455 sub _HashPHPASPBlocks {
456 my ($self, $text) = @_;
457 my $less_than_tab = $self->{tab_width} - 1;
459 # PHP and ASP-style processor instructions (<?…?> and <%…%>)
460 $text =~ s{
462 (?<=\n\n) # Starting after a blank line
463 | # or
464 \A\n? # the beginning of the doc
466 ( # save in $1
467 [ ]{0,$less_than_tab}
468 (?s:
469 <([?%]) # $2
473 [ \t]*
474 (?=\n{2,}|\Z) # followed by a blank line or end of document
477 my $key = _md5_utf8($1);
478 $self->{_html_blocks}{$key} = $1;
479 "\n\n" . $key . "\n\n";
480 }egx;
481 return $text;
484 sub _RunBlockGamut {
486 # These are all the transformations that form block-level
487 # tags like paragraphs, headers, and list items.
489 my ($self, $text) = @_;
491 # Do headers first, as these populate cross-refs
492 $text = $self->_DoHeaders($text);
494 # And now, protect our tables
495 $text = $self->_HashHTMLBlocks($text) unless $self->{markdown_in_html_blocks};
497 # Do Horizontal Rules:
498 my $less_than_tab = $self->{tab_width} - 1;
499 $text =~ s{^[ ]{0,$less_than_tab}(\*[ ]?){3,}[ \t]*$}{\n<hr$self->{empty_element_suffix}\n}gmx;
500 $text =~ s{^[ ]{0,$less_than_tab}(-[ ]?){3,}[ \t]*$}{\n<hr$self->{empty_element_suffix}\n}gmx;
501 $text =~ s{^[ ]{0,$less_than_tab}(_[ ]?){3,}[ \t]*$}{\n<hr$self->{empty_element_suffix}\n}gmx;
503 $text = $self->_DoLists($text);
505 $text = $self->_DoCodeBlocks($text);
507 $text = $self->_DoBlockQuotes($text);
509 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
510 # was to escape raw HTML in the original Markdown source. This time,
511 # we're escaping the markup we've just created, so that we don't wrap
512 # <p> tags around block-level tags.
513 $text = $self->_HashHTMLBlocks($text);
515 $text = $self->_FormParagraphs($text);
517 return $text;
520 sub _RunSpanGamut {
522 # These are all the transformations that occur *within* block-level
523 # tags like paragraphs, headers, and list items.
525 my ($self, $text) = @_;
527 $text = $self->_DoCodeSpans($text);
528 $text = $self->_EscapeSpecialCharsWithinTagAttributes($text);
529 $text = $self->_EscapeSpecialChars($text);
531 # Process anchor and image tags. Images must come first,
532 # because ![foo][f] looks like an anchor.
533 $text = $self->_DoImages($text);
534 $text = $self->_DoAnchors($text);
536 # Make links out of things like `<http://example.com/>`
537 # Must come after _DoAnchors(), because you can use < and >
538 # delimiters in inline links like [this](<url>).
539 $text = $self->_DoAutoLinks($text);
541 $text = $self->_EncodeAmpsAndAngles($text);
543 $text = $self->_DoItalicsAndBold($text);
545 # FIXME - Is hard coding space here sane, or does this want to be related to tab width?
546 # Do hard breaks:
547 $text =~ s/ {2,}\n/ <br$self->{empty_element_suffix}\n/g;
549 return $text;
552 sub _EscapeSpecialChars {
553 my ($self, $text) = @_;
554 my $tokens ||= $self->_TokenizeHTML($text);
556 $text = ''; # rebuild $text from the tokens
557 # my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
558 # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
560 foreach my $cur_token (@$tokens) {
561 if ($cur_token->[0] eq "tag") {
562 # Within tags, encode * and _ so they don't conflict
563 # with their use in Markdown for italics and strong.
564 # We're replacing each such character with its
565 # corresponding MD5 checksum value; this is likely
566 # overkill, but it should prevent us from colliding
567 # with the escape values by accident.
568 $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!ogx;
569 $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!ogx;
570 $text .= $cur_token->[1];
571 } else {
572 my $t = $cur_token->[1];
573 $t = $self->_EncodeBackslashEscapes($t);
574 $text .= $t;
577 return $text;
580 sub _EscapeSpecialCharsWithinTagAttributes {
582 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
583 # don't conflict with their use in Markdown for code, italics and strong.
584 # We're replacing each such character with its corresponding MD5 checksum
585 # value; this is likely overkill, but it should prevent us from colliding
586 # with the escape values by accident.
588 my ($self, $text) = @_;
589 my $tokens ||= $self->_TokenizeHTML($text);
590 $text = ''; # rebuild $text from the tokens
592 foreach my $cur_token (@$tokens) {
593 if ($cur_token->[0] eq "tag") {
594 $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gox;
595 $cur_token->[1] =~ s{ (?<=.)</?code>(?=.) }{$g_escape_table{'`'}}gox;
596 $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gox;
597 $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gox;
599 $text .= $cur_token->[1];
601 return $text;
604 sub _DoAnchors {
606 # Turn Markdown link shortcuts into XHTML <a> tags.
608 my ($self, $text) = @_;
611 # First, handle reference-style links: [link text] [id]
613 $text =~ s{
614 ( # wrap whole match in $1
616 ($g_nested_brackets) # link text = $2
619 [ ]? # one optional space
620 (?:\n[ ]*)? # one optional newline followed by spaces
623 (.*?) # id = $3
627 my $whole_match = $1;
628 my $link_text = $2;
629 my $link_id = lc $3;
631 if ($link_id eq "") {
632 $link_id = lc $link_text; # for shortcut links like [this][].
635 $link_id =~ s{[ ]*\n}{ }g; # turn embedded newlines into spaces
637 $self->_GenerateAnchor($whole_match, $link_text, $link_id);
638 }xsge;
641 # Next, inline-style links: [link text](url "optional title")
643 $text =~ s{
644 ( # wrap whole match in $1
646 ($g_nested_brackets) # link text = $2
648 \( # literal paren
649 [ \t]*
650 ($g_nested_parens) # href = $3
651 [ \t]*
652 ( # $4
653 (['"]) # quote char = $5
654 (.*?) # Title = $6
655 \5 # matching quote
656 [ \t]* # ignore any spaces/tabs between closing quote and )
657 )? # title is optional
661 my $result;
662 my $whole_match = $1;
663 my $link_text = $2;
664 my $url = $3;
665 my $title = $6;
667 $self->_GenerateAnchor($whole_match, $link_text, undef, $url, $title);
668 }xsge;
671 # Last, handle reference-style shortcuts: [link text]
672 # These must come last in case you've also got [link test][1]
673 # or [link test](/foo)
675 $text =~ s{
676 ( # wrap whole match in $1
678 ([^\[\]]+) # link text = $2; can't contain '[' or ']'
682 my $result;
683 my $whole_match = $1;
684 my $link_text = $2;
685 (my $link_id = lc $2) =~ s{[ ]*\n}{ }g; # lower-case and turn embedded newlines into spaces
687 $self->_GenerateAnchor($whole_match, $link_text, $link_id);
688 }xsge;
690 return $text;
693 sub _GenerateAnchor {
694 # FIXME - Fugly, change to named params?
695 my ($self, $whole_match, $link_text, $link_id, $url, $title, $attributes) = @_;
697 my $result;
699 $attributes = '' unless defined $attributes;
701 if ( !defined $url && defined $self->{_urls}{$link_id}) {
702 $url = $self->{_urls}{$link_id};
705 if (!defined $url) {
706 return $whole_match;
709 $url =~ s! \* !$g_escape_table{'*'}!gox; # We've got to encode these to avoid
710 $url =~ s! _ !$g_escape_table{'_'}!gox; # conflicting with italics/bold.
711 $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
713 $result = qq{<a href="$url"};
715 if ( !defined $title && defined $link_id && defined $self->{_titles}{$link_id} ) {
716 $title = $self->{_titles}{$link_id};
719 if ( defined $title ) {
720 $title =~ s/"/&quot;/g;
721 $title =~ s! \* !$g_escape_table{'*'}!gox;
722 $title =~ s! _ !$g_escape_table{'_'}!gox;
723 $result .= qq{ title="$title"};
726 $result .= "$attributes>$link_text</a>";
728 return $result;
731 sub _DoImages {
733 # Turn Markdown image shortcuts into <img> tags.
735 my ($self, $text) = @_;
738 # First, handle reference-style labeled images: ![alt text][id]
740 $text =~ s{
741 ( # wrap whole match in $1
743 (.*?) # alt text = $2
746 [ ]? # one optional space
747 (?:\n[ ]*)? # one optional newline followed by spaces
750 (.*?) # id = $3
755 my $result;
756 my $whole_match = $1;
757 my $alt_text = $2;
758 my $link_id = lc $3;
760 if ($link_id eq '') {
761 $link_id = lc $alt_text; # for shortcut links like ![this][].
764 $self->_GenerateImage($whole_match, $alt_text, $link_id);
765 }xsge;
768 # Next, handle inline images: ![alt text](url "optional title")
769 # Don't forget: encode * and _
771 $text =~ s{
772 ( # wrap whole match in $1
774 (.*?) # alt text = $2
776 \( # literal paren
777 [ \t]*
778 ($g_nested_parens) # src url - href = $3
779 [ \t]*
780 ( # $4
781 (['"]) # quote char = $5
782 (.*?) # title = $6
783 \5 # matching quote
784 [ \t]*
785 )? # title is optional
789 my $result;
790 my $whole_match = $1;
791 my $alt_text = $2;
792 my $url = $3;
793 my $title = '';
794 if (defined($6)) {
795 $title = $6;
798 $self->_GenerateImage($whole_match, $alt_text, undef, $url, $title);
799 }xsge;
801 return $text;
804 sub _GenerateImage {
805 # FIXME - Fugly, change to named params?
806 my ($self, $whole_match, $alt_text, $link_id, $url, $title, $attributes) = @_;
808 my $result;
810 $attributes = '' unless defined $attributes;
812 $alt_text ||= '';
813 $alt_text =~ s/"/&quot;/g;
814 # FIXME - how about >
816 if ( !defined $url && defined $self->{_urls}{$link_id}) {
817 $url = $self->{_urls}{$link_id};
820 # If there's no such link ID, leave intact:
821 return $whole_match unless defined $url;
823 $url =~ s! \* !$g_escape_table{'*'}!ogx; # We've got to encode these to avoid
824 $url =~ s! _ !$g_escape_table{'_'}!ogx; # conflicting with italics/bold.
825 $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
827 if (!defined $title && length $link_id && defined $self->{_titles}{$link_id} && length $self->{_titles}{$link_id}) {
828 $title = $self->{_titles}{$link_id};
831 $result = qq{<img src="$url" alt="$alt_text"};
832 if (defined $title && length $title) {
833 $title =~ s! \* !$g_escape_table{'*'}!ogx;
834 $title =~ s! _ !$g_escape_table{'_'}!ogx;
835 $title =~ s/"/&quot;/g;
836 $result .= qq{ title="$title"};
838 $result .= $attributes . $self->{empty_element_suffix};
840 return $result;
843 sub _DoHeaders {
844 my ($self, $text) = @_;
846 # Setext-style headers:
847 # Header 1
848 # ========
850 # Header 2
851 # --------
853 $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
854 $self->_GenerateHeader('1', $1);
855 }egmx;
857 $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
858 $self->_GenerateHeader('2', $1);
859 }egmx;
862 # atx-style headers:
863 # # Header 1
864 # ## Header 2
865 # ## Header 2 with closing hashes ##
866 # ...
867 # ###### Header 6
869 my $l;
870 $text =~ s{
871 ^(\#{1,6}) # $1 = string of #'s
872 [ \t]*
873 (.+?) # $2 = Header text
874 [ \t]*
875 \#* # optional closing #'s (not counted)
878 my $h_level = length($1);
879 $self->_GenerateHeader($h_level, $2);
880 }egmx;
882 return $text;
885 sub _GenerateHeader {
886 my ($self, $level, $id) = @_;
888 return "<h$level>" . $self->_RunSpanGamut($id) . "</h$level>\n\n";
891 sub _DoLists {
893 # Form HTML ordered (numbered) and unordered (bulleted) lists.
895 my ($self, $text) = @_;
896 my $less_than_tab = $self->{tab_width} - 1;
898 # Re-usable patterns to match list item bullets and number markers:
899 my $marker_ul = qr/[*+-]/;
900 my $marker_ol = qr/\d+[.]/;
901 my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
903 # Re-usable pattern to match any entirel ul or ol list:
904 my $whole_list = qr{
905 ( # $1 = whole list
906 ( # $2
907 [ ]{0,$less_than_tab}
908 (${marker_any}) # $3 = first list item marker
909 [ \t]+
911 (?s:.+?)
912 ( # $4
915 \n{2,}
916 (?=\S)
917 (?! # Negative lookahead for another list item marker
918 [ \t]*
919 ${marker_any}[ \t]+
923 }mx;
925 # We use a different prefix before nested lists than top-level lists.
926 # See extended comment in _ProcessListItems().
928 # Note: There's a bit of duplication here. My original implementation
929 # created a scalar regex pattern as the conditional result of the test on
930 # $self->{_list_level}, and then only ran the $text =~ s{...}{...}egmx
931 # substitution once, using the scalar as the pattern. This worked,
932 # everywhere except when running under MT on my hosting account at Pair
933 # Networks. There, this caused all rebuilds to be killed by the reaper (or
934 # perhaps they crashed, but that seems incredibly unlikely given that the
935 # same script on the same server ran fine *except* under MT. I've spent
936 # more time trying to figure out why this is happening than I'd like to
937 # admit. My only guess, backed up by the fact that this workaround works,
938 # is that Perl optimizes the substition when it can figure out that the
939 # pattern will never change, and when this optimization isn't on, we run
940 # afoul of the reaper. Thus, the slightly redundant code to that uses two
941 # static s/// patterns rather than one conditional pattern.
943 if ($self->{_list_level}) {
944 $text =~ s{
946 $whole_list
948 my $list = $1;
949 my $marker = $3;
950 my $list_type = ($marker =~ m/$marker_ul/) ? "ul" : "ol";
951 # Turn double returns into triple returns, so that we can make a
952 # paragraph for the last item in a list, if necessary:
953 $list =~ s/\n{2,}/\n\n\n/g;
954 my $result = ( $list_type eq 'ul' ) ?
955 $self->_ProcessListItemsUL($list, $marker_ul)
956 : $self->_ProcessListItemsOL($list, $marker_ol);
958 $result = $self->_MakeList($list_type, $result, $marker);
959 $result;
960 }egmx;
962 else {
963 $text =~ s{
964 (?:(?<=\n\n)|\A\n?)
965 $whole_list
967 my $list = $1;
968 my $marker = $3;
969 my $list_type = ($marker =~ m/$marker_ul/) ? "ul" : "ol";
970 # Turn double returns into triple returns, so that we can make a
971 # paragraph for the last item in a list, if necessary:
972 $list =~ s/\n{2,}/\n\n\n/g;
973 my $result = ( $list_type eq 'ul' ) ?
974 $self->_ProcessListItemsUL($list, $marker_ul)
975 : $self->_ProcessListItemsOL($list, $marker_ol);
976 $result = $self->_MakeList($list_type, $result, $marker);
977 $result;
978 }egmx;
982 return $text;
985 sub _MakeList {
986 my ($self, $list_type, $content, $marker) = @_;
988 if ($list_type eq 'ol' and $self->{trust_list_start_value}) {
989 my ($num) = $marker =~ /^(\d+)[.]/;
990 return "<ol start='$num'>\n" . $content . "</ol>\n";
993 return "<$list_type>\n" . $content . "</$list_type>\n";
996 sub _ProcessListItemsOL {
998 # Process the contents of a single ordered list, splitting it
999 # into individual list items.
1002 my ($self, $list_str, $marker_any) = @_;
1005 # The $self->{_list_level} global keeps track of when we're inside a list.
1006 # Each time we enter a list, we increment it; when we leave a list,
1007 # we decrement. If it's zero, we're not in a list anymore.
1009 # We do this because when we're not inside a list, we want to treat
1010 # something like this:
1012 # I recommend upgrading to version
1013 # 8. Oops, now this line is treated
1014 # as a sub-list.
1016 # As a single paragraph, despite the fact that the second line starts
1017 # with a digit-period-space sequence.
1019 # Whereas when we're inside a list (or sub-list), that line will be
1020 # treated as the start of a sub-list. What a kludge, huh? This is
1021 # an aspect of Markdown's syntax that's hard to parse perfectly
1022 # without resorting to mind-reading. Perhaps the solution is to
1023 # change the syntax rules such that sub-lists must start with a
1024 # starting cardinal number; e.g. "1." or "a.".
1026 $self->{_list_level}++;
1028 # trim trailing blank lines:
1029 $list_str =~ s/\n{2,}\z/\n/;
1032 $list_str =~ s{
1033 (\n)? # leading line = $1
1034 (^[ \t]*) # leading whitespace = $2
1035 ($marker_any) [ \t]+ # list marker = $3
1036 ((?s:.+?) # list item text = $4
1037 (\n{1,2}))
1038 (?= \n* (\z | \2 ($marker_any) [ \t]+))
1040 my $item = $4;
1041 my $leading_line = $1;
1042 my $leading_space = $2;
1044 if ($leading_line or ($item =~ m/\n{2,}/)) {
1045 $item = $self->_RunBlockGamut($self->_Outdent($item));
1047 else {
1048 # Recursion for sub-lists:
1049 $item = $self->_DoLists($self->_Outdent($item));
1050 chomp $item;
1051 $item = $self->_RunSpanGamut($item);
1054 "<li>" . $item . "</li>\n";
1055 }egmxo;
1057 $self->{_list_level}--;
1058 return $list_str;
1061 sub _ProcessListItemsUL {
1063 # Process the contents of a single unordered list, splitting it
1064 # into individual list items.
1067 my ($self, $list_str, $marker_any) = @_;
1070 # The $self->{_list_level} global keeps track of when we're inside a list.
1071 # Each time we enter a list, we increment it; when we leave a list,
1072 # we decrement. If it's zero, we're not in a list anymore.
1074 # We do this because when we're not inside a list, we want to treat
1075 # something like this:
1077 # I recommend upgrading to version
1078 # 8. Oops, now this line is treated
1079 # as a sub-list.
1081 # As a single paragraph, despite the fact that the second line starts
1082 # with a digit-period-space sequence.
1084 # Whereas when we're inside a list (or sub-list), that line will be
1085 # treated as the start of a sub-list. What a kludge, huh? This is
1086 # an aspect of Markdown's syntax that's hard to parse perfectly
1087 # without resorting to mind-reading. Perhaps the solution is to
1088 # change the syntax rules such that sub-lists must start with a
1089 # starting cardinal number; e.g. "1." or "a.".
1091 $self->{_list_level}++;
1093 # trim trailing blank lines:
1094 $list_str =~ s/\n{2,}\z/\n/;
1097 $list_str =~ s{
1098 (\n)? # leading line = $1
1099 (^[ \t]*) # leading whitespace = $2
1100 ($marker_any) [ \t]+ # list marker = $3
1101 ((?s:.+?) # list item text = $4
1102 (\n{1,2}))
1103 (?= \n* (\z | \2 ($marker_any) [ \t]+))
1105 my $item = $4;
1106 my $leading_line = $1;
1107 my $leading_space = $2;
1109 if ($leading_line or ($item =~ m/\n{2,}/)) {
1110 $item = $self->_RunBlockGamut($self->_Outdent($item));
1112 else {
1113 # Recursion for sub-lists:
1114 $item = $self->_DoLists($self->_Outdent($item));
1115 chomp $item;
1116 $item = $self->_RunSpanGamut($item);
1119 "<li>" . $item . "</li>\n";
1120 }egmxo;
1122 $self->{_list_level}--;
1123 return $list_str;
1126 sub _DoCodeBlocks {
1128 # Process Markdown `<pre><code>` blocks.
1131 my ($self, $text) = @_;
1133 $text =~ s{
1134 (?:\n\n|\A)
1135 ( # $1 = the code block -- one or more lines, starting with a space/tab
1137 (?:[ ]{$self->{tab_width}} | \t) # Lines must start with a tab or a tab-width of spaces
1138 .*\n+
1141 ((?=^[ ]{0,$self->{tab_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1143 my $codeblock = $1;
1144 my $result; # return value
1146 $codeblock = $self->_EncodeCode($self->_Outdent($codeblock));
1147 $codeblock = $self->_Detab($codeblock);
1148 $codeblock =~ s/\A\n+//; # trim leading newlines
1149 $codeblock =~ s/\n+\z//; # trim trailing newlines
1151 $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
1153 $result;
1154 }egmx;
1156 return $text;
1159 sub _DoCodeSpans {
1161 # * Backtick quotes are used for <code></code> spans.
1163 # * You can use multiple backticks as the delimiters if you want to
1164 # include literal backticks in the code span. So, this input:
1166 # Just type ``foo `bar` baz`` at the prompt.
1168 # Will translate to:
1170 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1172 # There's no arbitrary limit to the number of backticks you
1173 # can use as delimters. If you need three consecutive backticks
1174 # in your code, use four for delimiters, etc.
1176 # * You can use spaces to get literal backticks at the edges:
1178 # ... type `` `bar` `` ...
1180 # Turns to:
1182 # ... type <code>`bar`</code> ...
1185 my ($self, $text) = @_;
1187 $text =~ s@
1188 (?<!\\) # Character before opening ` can't be a backslash
1189 (`+) # $1 = Opening run of `
1190 (.+?) # $2 = The code block
1191 (?<!`)
1192 \1 # Matching closer
1193 (?!`)
1195 my $c = "$2";
1196 $c =~ s/^[ \t]*//g; # leading whitespace
1197 $c =~ s/[ \t]*$//g; # trailing whitespace
1198 $c = $self->_EncodeCode($c);
1199 "<code>$c</code>";
1200 @egsx;
1202 return $text;
1205 sub _EncodeCode {
1207 # Encode/escape certain characters inside Markdown code runs.
1208 # The point is that in code, these characters are literals,
1209 # and lose their special Markdown meanings.
1211 my $self = shift;
1212 local $_ = shift;
1214 # Encode all ampersands; HTML entities are not
1215 # entities within a Markdown code span.
1216 s/&/&amp;/g;
1218 # Encode $'s, but only if we're running under Blosxom.
1219 # (Blosxom interpolates Perl variables in article bodies.)
1221 no warnings 'once';
1222 if (defined($blosxom::version)) {
1223 s/\$/&#036;/g;
1228 # Do the angle bracket song and dance:
1229 s! < !&lt;!gx;
1230 s! > !&gt;!gx;
1232 # Now, escape characters that are magic in Markdown:
1233 s! \* !$g_escape_table{'*'}!ogx;
1234 s! _ !$g_escape_table{'_'}!ogx;
1235 s! { !$g_escape_table{'{'}!ogx;
1236 s! } !$g_escape_table{'}'}!ogx;
1237 s! \[ !$g_escape_table{'['}!ogx;
1238 s! \] !$g_escape_table{']'}!ogx;
1239 s! \\ !$g_escape_table{'\\'}!ogx;
1241 return $_;
1244 sub _DoItalicsAndBold {
1245 my ($self, $text) = @_;
1247 # Handle at beginning of lines:
1248 $text =~ s{ ^(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1249 {<strong>$2</strong>}gsx;
1251 $text =~ s{ ^(\*|_) (?=\S) (.+?) (?<=\S) \1 }
1252 {<em>$2</em>}gsx;
1254 # <strong> must go first:
1255 $text =~ s{ (?<=\W) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1256 {<strong>$2</strong>}gsx;
1258 $text =~ s{ (?<=\W) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1259 {<em>$2</em>}gsx;
1261 # And now, a second pass to catch nested strong and emphasis special cases
1262 $text =~ s{ (?<=\W) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1263 {<strong>$2</strong>}gsx;
1265 $text =~ s{ (?<=\W) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1266 {<em>$2</em>}gsx;
1268 return $text;
1271 sub _DoBlockQuotes {
1272 my ($self, $text) = @_;
1274 $text =~ s{
1275 ( # Wrap whole match in $1
1277 ^[ \t]*>[ \t]? # '>' at the start of a line
1278 .+\n # rest of the first line
1279 (.+\n)* # subsequent consecutive lines
1280 \n* # blanks
1284 my $bq = $1;
1285 $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1286 $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1287 $bq = $self->_RunBlockGamut($bq); # recurse
1289 $bq =~ s/^/ /mg;
1290 # These leading spaces screw with <pre> content, so we need to fix that:
1291 $bq =~ s{
1292 (\s*<pre>.+?</pre>)
1294 my $pre = $1;
1295 $pre =~ s/^ //mg;
1296 $pre;
1297 }egsx;
1299 "<blockquote>\n$bq\n</blockquote>\n\n";
1300 }egmx;
1303 return $text;
1306 sub _FormParagraphs {
1308 # Params:
1309 # $text - string to process with html <p> tags
1311 my ($self, $text) = @_;
1313 # Strip leading and trailing lines:
1314 $text =~ s/\A\n+//;
1315 $text =~ s/\n+\z//;
1317 my @grafs = split(/\n{2,}/, $text);
1320 # Wrap <p> tags.
1322 foreach (@grafs) {
1323 unless (defined( $self->{_html_blocks}{$_} )) {
1324 $_ = $self->_RunSpanGamut($_);
1325 s/^([ \t]*)/<p>&nbsp;&nbsp;/;
1326 $_ .= "</p>";
1331 # Unhashify HTML blocks
1333 foreach (@grafs) {
1334 if (defined( $self->{_html_blocks}{$_} )) {
1335 $_ = $self->{_html_blocks}{$_};
1339 return join "\n\n", @grafs;
1342 sub _EncodeAmpsAndAngles {
1343 # Smart processing for ampersands and angle brackets that need to be encoded.
1345 my ($self, $text) = @_;
1346 return '' if (!defined $text or !length $text);
1348 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1349 # http://bumppo.net/projects/amputator/
1350 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
1352 # Encode naked <'s
1353 $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
1355 # And >'s - added by Fletcher Penney
1356 # $text =~ s{>(?![a-z/?\$!])}{&gt;}gi;
1357 # Causes problems...
1359 # Remove encoding inside comments
1360 $text =~ s{
1361 (?<=<!--) # Begin comment
1362 (.*?) # Anything inside
1363 (?=-->) # End comments
1365 my $t = $1;
1366 $t =~ s/&amp;/&/g;
1367 $t =~ s/&lt;/</g;
1369 }egsx;
1371 return $text;
1374 sub _EncodeBackslashEscapes {
1376 # Parameter: String.
1377 # Returns: The string, with after processing the following backslash
1378 # escape sequences.
1380 my $self = shift;
1381 local $_ = shift;
1383 s! \\\\ !$g_escape_table{'\\'}!ogx; # Must process escaped backslashes first.
1384 s! \\` !$g_escape_table{'`'}!ogx;
1385 s! \\\* !$g_escape_table{'*'}!ogx;
1386 s! \\_ !$g_escape_table{'_'}!ogx;
1387 s! \\\{ !$g_escape_table{'{'}!ogx;
1388 s! \\\} !$g_escape_table{'}'}!ogx;
1389 s! \\\[ !$g_escape_table{'['}!ogx;
1390 s! \\\] !$g_escape_table{']'}!ogx;
1391 s! \\\( !$g_escape_table{'('}!ogx;
1392 s! \\\) !$g_escape_table{')'}!ogx;
1393 s! \\> !$g_escape_table{'>'}!ogx;
1394 s! \\\# !$g_escape_table{'#'}!ogx;
1395 s! \\\+ !$g_escape_table{'+'}!ogx;
1396 s! \\\- !$g_escape_table{'-'}!ogx;
1397 s! \\\. !$g_escape_table{'.'}!ogx;
1398 s{ \\! }{$g_escape_table{'!'}}ogx;
1400 return $_;
1403 sub _DoAutoLinks {
1404 my ($self, $text) = @_;
1406 $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1408 # Email addresses: <address@domain.foo>
1409 $text =~ s{
1411 (?:mailto:)?
1413 [-.\w\+]+
1415 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1419 $self->_EncodeEmailAddress( $self->_UnescapeSpecialChars($1) );
1420 }egix;
1422 return $text;
1425 sub _EncodeEmailAddress {
1427 # Input: an email address, e.g. "foo@example.com"
1429 # Output: the email address as a mailto link, with each character
1430 # of the address encoded as either a decimal or hex entity, in
1431 # the hopes of foiling most address harvesting spam bots. E.g.:
1433 # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1434 # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1435 # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1437 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1438 # mailing list: <http://tinyurl.com/yu7ue>
1441 my ($self, $addr) = @_;
1443 my @encode = (
1444 sub { '&#' . ord(shift) . ';' },
1445 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1446 sub { shift },
1449 $addr = "mailto:" . $addr;
1451 $addr =~ s{(.)}{
1452 my $char = $1;
1453 if ( $char eq '@' ) {
1454 # this *must* be encoded. I insist.
1455 $char = $encode[int rand 1]->($char);
1457 elsif ( $char ne ':' ) {
1458 # leave ':' alone (to spot mailto: later)
1459 my $r = rand;
1460 # roughly 10% raw, 45% hex, 45% dec
1461 $char = (
1462 $r > .9 ? $encode[2]->($char) :
1463 $r < .45 ? $encode[1]->($char) :
1464 $encode[0]->($char)
1467 $char;
1468 }gex;
1470 $addr = qq{<a href="$addr">$addr</a>};
1471 $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1473 return $addr;
1476 sub _UnescapeSpecialChars {
1478 # Swap back in all the special characters we've hidden.
1480 my ($self, $text) = @_;
1482 while( my($char, $hash) = each(%g_escape_table) ) {
1483 $text =~ s/$hash/$char/g;
1485 return $text;
1488 sub _TokenizeHTML {
1490 # Parameter: String containing HTML markup.
1491 # Returns: Reference to an array of the tokens comprising the input
1492 # string. Each token is either a tag (possibly with nested,
1493 # tags contained therein, such as <a href="<MTFoo>">, or a
1494 # run of text between tags. Each element of the array is a
1495 # two-element array; the first is either 'tag' or 'text';
1496 # the second is the actual value.
1499 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1500 # <http://www.bradchoate.com/past/mtregex.php>
1503 my ($self, $str) = @_;
1504 my $pos = 0;
1505 my $len = length $str;
1506 my @tokens;
1508 my $depth = 6;
1509 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
1510 my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
1511 (?s: <\? .*? \?> ) | # processing instruction
1512 $nested_tags/iox; # nested tags
1514 while ($str =~ m/($match)/og) {
1515 my $whole_tag = $1;
1516 my $sec_start = pos $str;
1517 my $tag_start = $sec_start - length $whole_tag;
1518 if ($pos < $tag_start) {
1519 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1521 push @tokens, ['tag', $whole_tag];
1522 $pos = pos $str;
1524 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1525 \@tokens;
1528 sub _Outdent {
1530 # Remove one level of line-leading tabs or spaces
1532 my ($self, $text) = @_;
1534 $text =~ s/^(\t|[ ]{1,$self->{tab_width}})//gm;
1535 return $text;
1538 sub _Detab {
1540 # Cribbed from a post by Bart Lateur:
1541 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1543 my ($self, $text) = @_;
1545 # FIXME - Better anchor/regex would be quicker.
1547 # Original:
1548 #$text =~ s{(.*?)\t}{$1.(' ' x ($self->{tab_width} - length($1) % $self->{tab_width}))}ge;
1550 # Much swifter, but pretty hateful:
1551 do {} while ($text =~ s{^(.*?)\t}{$1.(' ' x ($self->{tab_width} - length($1) % $self->{tab_width}))}mge);
1552 return $text;
1555 sub _ConvertCopyright {
1556 my ($self, $text) = @_;
1557 # Convert to an XML compatible form of copyright symbol
1559 $text =~ s/&copy;/&#xA9;/gi;
1561 return $text;
1566 __END__
1568 =head1 OTHER IMPLEMENTATIONS
1570 Markdown has been re-implemented in a number of languages, and with a number of additions.
1572 Those that I have found are listed below:
1574 =over
1576 =item C - <http://www.pell.portland.or.us/~orc/Code/discount>
1578 Discount - Original Markdown, but in C. Fastest implementation available, and passes MDTest.
1579 Adds it's own set of custom features.
1581 =item python - <http://www.freewisdom.org/projects/python-markdown/>
1583 Python Markdown which is mostly compatible with the original, with an interesting extension API.
1585 =item ruby (maruku) - <http://maruku.rubyforge.org/>
1587 One of the nicest implementations out there. Builds a parse tree internally so very flexible.
1589 =item php - <http://michelf.com/projects/php-markdown/>
1591 A direct port of Markdown.pl, also has a separately maintained 'extra' version,
1592 which adds a number of features that were borrowed by MultiMarkdown.
1594 =item lua - <http://www.frykholm.se/files/markdown.lua>
1596 Port to lua. Simple and lightweight (as lua is).
1598 =item haskell - <http://johnmacfarlane.net/pandoc/>
1600 Pandoc is a more general library, supporting Markdown, reStructuredText, LaTeX and more.
1602 =item javascript - <http://www.attacklab.net/showdown-gui.html>
1604 Direct(ish) port of Markdown.pl to JavaScript
1606 =back
1608 =head1 BUGS
1610 To file bug reports or feature requests please send email to:
1612 bug-Text-Markdown@rt.cpan.org
1614 Please include with your report: (1) the example input; (2) the output
1615 you expected; (3) the output Markdown actually produced.
1617 =head1 VERSION HISTORY
1619 See the Changes file for detailed release notes for this version.
1621 =head1 AUTHOR
1623 John Gruber
1624 http://daringfireball.net/
1626 PHP port and other contributions by Michel Fortin
1627 http://michelf.com/
1629 MultiMarkdown changes by Fletcher Penney
1630 http://fletcher.freeshell.org/
1632 CPAN Module Text::MultiMarkdown (based on Text::Markdown by Sebastian
1633 Riedel) originally by Darren Kulp (http://kulp.ch/)
1635 This module is maintained by: Tomas Doran http://www.bobtfish.net/
1637 =head1 THIS DISTRIBUTION
1639 Please note that this distribution is a fork of John Gruber's original Markdown project,
1640 and it *is not* in any way blessed by him.
1642 Whilst this code aims to be compatible with the original Markdown.pl (and incorporates
1643 and passes the Markdown test suite) whilst fixing a number of bugs in the original -
1644 there may be differences between the behaviour of this module and Markdown.pl. If you find
1645 any differences where you believe Text::Markdown behaves contrary to the Markdown spec,
1646 please report them as bugs.
1648 Text::Markdown *does not* extend the markdown dialect in any way from that which is documented at
1649 daringfireball. If you want additional features, you should look at L<Text::MultiMarkdown>.
1651 =head1 COPYRIGHT AND LICENSE
1653 Original Code Copyright (c) 2003-2004 John Gruber
1654 <http://daringfireball.net/>
1655 All rights reserved.
1657 MultiMarkdown changes Copyright (c) 2005-2006 Fletcher T. Penney
1658 <http://fletcher.freeshell.org/>
1659 All rights reserved.
1661 Text::MultiMarkdown changes Copyright (c) 2006-2008 Darren Kulp
1662 <http://kulp.ch> and Tomas Doran <http://www.bobtfish.net>
1664 Redistribution and use in source and binary forms, with or without
1665 modification, are permitted provided that the following conditions are
1666 met:
1668 * Redistributions of source code must retain the above copyright notice,
1669 this list of conditions and the following disclaimer.
1671 * Redistributions in binary form must reproduce the above copyright
1672 notice, this list of conditions and the following disclaimer in the
1673 documentation and/or other materials provided with the distribution.
1675 * Neither the name "Markdown" nor the names of its contributors may
1676 be used to endorse or promote products derived from this software
1677 without specific prior written permission.
1679 This software is provided by the copyright holders and contributors "as
1680 is" and any express or implied warranties, including, but not limited
1681 to, the implied warranties of merchantability and fitness for a
1682 particular purpose are disclaimed. In no event shall the copyright owner
1683 or contributors be liable for any direct, indirect, incidental, special,
1684 exemplary, or consequential damages (including, but not limited to,
1685 procurement of substitute goods or services; loss of use, data, or
1686 profits; or business interruption) however caused and on any theory of
1687 liability, whether in contract, strict liability, or tort (including
1688 negligence or otherwise) arising in any way out of the use of this
1689 software, even if advised of the possibility of such damage.
1691 =cut