Track /etc/gitconfig
[msysgit/mtrensch.git] / lib / perl5 / 5.8.8 / Pod / PlainText.pm
blob3f5ce90d2ba1054083079cb89e79fdd0eb05d7d7
1 # Pod::PlainText -- Convert POD data to formatted ASCII text.
2 # $Id: Text.pm,v 2.1 1999/09/20 11:53:33 eagle Exp $
4 # Copyright 1999-2000 by Russ Allbery <rra@stanford.edu>
6 # This program is free software; you can redistribute it and/or modify it
7 # under the same terms as Perl itself.
9 # This module is intended to be a replacement for Pod::Text, and attempts to
10 # match its output except for some specific circumstances where other
11 # decisions seemed to produce better output. It uses Pod::Parser and is
12 # designed to be very easy to subclass.
14 ############################################################################
15 # Modules and declarations
16 ############################################################################
18 package Pod::PlainText;
20 require 5.005;
22 use Carp qw(carp croak);
23 use Pod::Select ();
25 use strict;
26 use vars qw(@ISA %ESCAPES $VERSION);
28 # We inherit from Pod::Select instead of Pod::Parser so that we can be used
29 # by Pod::Usage.
30 @ISA = qw(Pod::Select);
32 $VERSION = '2.02';
35 ############################################################################
36 # Table of supported E<> escapes
37 ############################################################################
39 # This table is taken near verbatim from Pod::PlainText in Pod::Parser,
40 # which got it near verbatim from the original Pod::Text. It is therefore
41 # credited to Tom Christiansen, and I'm glad I didn't have to write it. :)
42 %ESCAPES = (
43 'amp' => '&', # ampersand
44 'lt' => '<', # left chevron, less-than
45 'gt' => '>', # right chevron, greater-than
46 'quot' => '"', # double quote
48 "Aacute" => "\xC1", # capital A, acute accent
49 "aacute" => "\xE1", # small a, acute accent
50 "Acirc" => "\xC2", # capital A, circumflex accent
51 "acirc" => "\xE2", # small a, circumflex accent
52 "AElig" => "\xC6", # capital AE diphthong (ligature)
53 "aelig" => "\xE6", # small ae diphthong (ligature)
54 "Agrave" => "\xC0", # capital A, grave accent
55 "agrave" => "\xE0", # small a, grave accent
56 "Aring" => "\xC5", # capital A, ring
57 "aring" => "\xE5", # small a, ring
58 "Atilde" => "\xC3", # capital A, tilde
59 "atilde" => "\xE3", # small a, tilde
60 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
61 "auml" => "\xE4", # small a, dieresis or umlaut mark
62 "Ccedil" => "\xC7", # capital C, cedilla
63 "ccedil" => "\xE7", # small c, cedilla
64 "Eacute" => "\xC9", # capital E, acute accent
65 "eacute" => "\xE9", # small e, acute accent
66 "Ecirc" => "\xCA", # capital E, circumflex accent
67 "ecirc" => "\xEA", # small e, circumflex accent
68 "Egrave" => "\xC8", # capital E, grave accent
69 "egrave" => "\xE8", # small e, grave accent
70 "ETH" => "\xD0", # capital Eth, Icelandic
71 "eth" => "\xF0", # small eth, Icelandic
72 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
73 "euml" => "\xEB", # small e, dieresis or umlaut mark
74 "Iacute" => "\xCD", # capital I, acute accent
75 "iacute" => "\xED", # small i, acute accent
76 "Icirc" => "\xCE", # capital I, circumflex accent
77 "icirc" => "\xEE", # small i, circumflex accent
78 "Igrave" => "\xCD", # capital I, grave accent
79 "igrave" => "\xED", # small i, grave accent
80 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
81 "iuml" => "\xEF", # small i, dieresis or umlaut mark
82 "Ntilde" => "\xD1", # capital N, tilde
83 "ntilde" => "\xF1", # small n, tilde
84 "Oacute" => "\xD3", # capital O, acute accent
85 "oacute" => "\xF3", # small o, acute accent
86 "Ocirc" => "\xD4", # capital O, circumflex accent
87 "ocirc" => "\xF4", # small o, circumflex accent
88 "Ograve" => "\xD2", # capital O, grave accent
89 "ograve" => "\xF2", # small o, grave accent
90 "Oslash" => "\xD8", # capital O, slash
91 "oslash" => "\xF8", # small o, slash
92 "Otilde" => "\xD5", # capital O, tilde
93 "otilde" => "\xF5", # small o, tilde
94 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
95 "ouml" => "\xF6", # small o, dieresis or umlaut mark
96 "szlig" => "\xDF", # small sharp s, German (sz ligature)
97 "THORN" => "\xDE", # capital THORN, Icelandic
98 "thorn" => "\xFE", # small thorn, Icelandic
99 "Uacute" => "\xDA", # capital U, acute accent
100 "uacute" => "\xFA", # small u, acute accent
101 "Ucirc" => "\xDB", # capital U, circumflex accent
102 "ucirc" => "\xFB", # small u, circumflex accent
103 "Ugrave" => "\xD9", # capital U, grave accent
104 "ugrave" => "\xF9", # small u, grave accent
105 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
106 "uuml" => "\xFC", # small u, dieresis or umlaut mark
107 "Yacute" => "\xDD", # capital Y, acute accent
108 "yacute" => "\xFD", # small y, acute accent
109 "yuml" => "\xFF", # small y, dieresis or umlaut mark
111 "lchevron" => "\xAB", # left chevron (double less than)
112 "rchevron" => "\xBB", # right chevron (double greater than)
116 ############################################################################
117 # Initialization
118 ############################################################################
120 # Initialize the object. Must be sure to call our parent initializer.
121 sub initialize {
122 my $self = shift;
124 $$self{alt} = 0 unless defined $$self{alt};
125 $$self{indent} = 4 unless defined $$self{indent};
126 $$self{loose} = 0 unless defined $$self{loose};
127 $$self{sentence} = 0 unless defined $$self{sentence};
128 $$self{width} = 76 unless defined $$self{width};
130 $$self{INDENTS} = []; # Stack of indentations.
131 $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
133 $self->SUPER::initialize;
137 ############################################################################
138 # Core overrides
139 ############################################################################
141 # Called for each command paragraph. Gets the command, the associated
142 # paragraph, the line number, and a Pod::Paragraph object. Just dispatches
143 # the command to a method named the same as the command. =cut is handled
144 # internally by Pod::Parser.
145 sub command {
146 my $self = shift;
147 my $command = shift;
148 return if $command eq 'pod';
149 return if ($$self{EXCLUDE} && $command ne 'end');
150 $self->item ("\n") if defined $$self{ITEM};
151 $command = 'cmd_' . $command;
152 $self->$command (@_);
155 # Called for a verbatim paragraph. Gets the paragraph, the line number, and
156 # a Pod::Paragraph object. Just output it verbatim, but with tabs converted
157 # to spaces.
158 sub verbatim {
159 my $self = shift;
160 return if $$self{EXCLUDE};
161 $self->item if defined $$self{ITEM};
162 local $_ = shift;
163 return if /^\s*$/;
164 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
165 $self->output ($_);
168 # Called for a regular text block. Gets the paragraph, the line number, and
169 # a Pod::Paragraph object. Perform interpolation and output the results.
170 sub textblock {
171 my $self = shift;
172 return if $$self{EXCLUDE};
173 $self->output ($_[0]), return if $$self{VERBATIM};
174 local $_ = shift;
175 my $line = shift;
177 # Perform a little magic to collapse multiple L<> references. This is
178 # here mostly for backwards-compatibility. We'll just rewrite the whole
179 # thing into actual text at this part, bypassing the whole internal
180 # sequence parsing thing.
183 L< # A link of the form L</something>.
186 [:\w]+ # The item has to be a simple word...
187 (\(\))? # ...or simple function.
191 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
195 [:\w]+
196 (\(\))?
202 local $_ = $1;
203 s%L</([^>]+)>%$1%g;
204 my @items = split /(?:,?\s+(?:and\s+)?)/;
205 my $string = "the ";
206 my $i;
207 for ($i = 0; $i < @items; $i++) {
208 $string .= $items[$i];
209 $string .= ", " if @items > 2 && $i != $#items;
210 $string .= " and " if ($i == $#items - 1);
212 $string .= " entries elsewhere in this document";
213 $string;
214 }gex;
216 # Now actually interpolate and output the paragraph.
217 $_ = $self->interpolate ($_, $line);
218 s/\s+$/\n/;
219 if (defined $$self{ITEM}) {
220 $self->item ($_ . "\n");
221 } else {
222 $self->output ($self->reformat ($_ . "\n"));
226 # Called for an interior sequence. Gets the command, argument, and a
227 # Pod::InteriorSequence object and is expected to return the resulting text.
228 # Calls code, bold, italic, file, and link to handle those types of
229 # sequences, and handles S<>, E<>, X<>, and Z<> directly.
230 sub interior_sequence {
231 my $self = shift;
232 my $command = shift;
233 local $_ = shift;
234 return '' if ($command eq 'X' || $command eq 'Z');
236 # Expand escapes into the actual character now, carping if invalid.
237 if ($command eq 'E') {
238 return $ESCAPES{$_} if defined $ESCAPES{$_};
239 carp "Unknown escape: E<$_>";
240 return "E<$_>";
243 # For all the other sequences, empty content produces no output.
244 return if $_ eq '';
246 # For S<>, compress all internal whitespace and then map spaces to \01.
247 # When we output the text, we'll map this back.
248 if ($command eq 'S') {
249 s/\s{2,}/ /g;
250 tr/ /\01/;
251 return $_;
254 # Anything else needs to get dispatched to another method.
255 if ($command eq 'B') { return $self->seq_b ($_) }
256 elsif ($command eq 'C') { return $self->seq_c ($_) }
257 elsif ($command eq 'F') { return $self->seq_f ($_) }
258 elsif ($command eq 'I') { return $self->seq_i ($_) }
259 elsif ($command eq 'L') { return $self->seq_l ($_) }
260 else { carp "Unknown sequence $command<$_>" }
263 # Called for each paragraph that's actually part of the POD. We take
264 # advantage of this opportunity to untabify the input.
265 sub preprocess_paragraph {
266 my $self = shift;
267 local $_ = shift;
268 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
273 ############################################################################
274 # Command paragraphs
275 ############################################################################
277 # All command paragraphs take the paragraph and the line number.
279 # First level heading.
280 sub cmd_head1 {
281 my $self = shift;
282 local $_ = shift;
283 s/\s+$//;
284 $_ = $self->interpolate ($_, shift);
285 if ($$self{alt}) {
286 $self->output ("\n==== $_ ====\n\n");
287 } else {
288 $_ .= "\n" if $$self{loose};
289 $self->output ($_ . "\n");
293 # Second level heading.
294 sub cmd_head2 {
295 my $self = shift;
296 local $_ = shift;
297 s/\s+$//;
298 $_ = $self->interpolate ($_, shift);
299 if ($$self{alt}) {
300 $self->output ("\n== $_ ==\n\n");
301 } else {
302 $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
306 # Start a list.
307 sub cmd_over {
308 my $self = shift;
309 local $_ = shift;
310 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
311 push (@{ $$self{INDENTS} }, $$self{MARGIN});
312 $$self{MARGIN} += ($_ + 0);
315 # End a list.
316 sub cmd_back {
317 my $self = shift;
318 $$self{MARGIN} = pop @{ $$self{INDENTS} };
319 unless (defined $$self{MARGIN}) {
320 carp "Unmatched =back";
321 $$self{MARGIN} = $$self{indent};
325 # An individual list item.
326 sub cmd_item {
327 my $self = shift;
328 if (defined $$self{ITEM}) { $self->item }
329 local $_ = shift;
330 s/\s+$//;
331 $$self{ITEM} = $self->interpolate ($_);
334 # Begin a block for a particular translator. Setting VERBATIM triggers
335 # special handling in textblock().
336 sub cmd_begin {
337 my $self = shift;
338 local $_ = shift;
339 my ($kind) = /^(\S+)/ or return;
340 if ($kind eq 'text') {
341 $$self{VERBATIM} = 1;
342 } else {
343 $$self{EXCLUDE} = 1;
347 # End a block for a particular translator. We assume that all =begin/=end
348 # pairs are properly closed.
349 sub cmd_end {
350 my $self = shift;
351 $$self{EXCLUDE} = 0;
352 $$self{VERBATIM} = 0;
355 # One paragraph for a particular translator. Ignore it unless it's intended
356 # for text, in which case we treat it as a verbatim text block.
357 sub cmd_for {
358 my $self = shift;
359 local $_ = shift;
360 my $line = shift;
361 return unless s/^text\b[ \t]*\n?//;
362 $self->verbatim ($_, $line);
366 ############################################################################
367 # Interior sequences
368 ############################################################################
370 # The simple formatting ones. These are here mostly so that subclasses can
371 # override them and do more complicated things.
372 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
373 sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" }
374 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
375 sub seq_i { return '*' . $_[1] . '*' }
377 # The complicated one. Handle links. Since this is plain text, we can't
378 # actually make any real links, so this is all to figure out what text we
379 # print out.
380 sub seq_l {
381 my $self = shift;
382 local $_ = shift;
384 # Smash whitespace in case we were split across multiple lines.
385 s/\s+/ /g;
387 # If we were given any explicit text, just output it.
388 if (/^([^|]+)\|/) { return $1 }
390 # Okay, leading and trailing whitespace isn't important; get rid of it.
391 s/^\s+//;
392 s/\s+$//;
394 # Default to using the whole content of the link entry as a section
395 # name. Note that L<manpage/> forces a manpage interpretation, as does
396 # something looking like L<manpage(section)>. The latter is an
397 # enhancement over the original Pod::Text.
398 my ($manpage, $section) = ('', $_);
399 if (/^(?:https?|ftp|news):/) {
400 # a URL
401 return $_;
402 } elsif (/^"\s*(.*?)\s*"$/) {
403 $section = '"' . $1 . '"';
404 } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
405 ($manpage, $section) = ($_, '');
406 } elsif (m%/%) {
407 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
410 my $text = '';
411 # Now build the actual output text.
412 if (!length $section) {
413 $text = "the $manpage manpage" if length $manpage;
414 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
415 $text .= 'the ' . $section . ' entry';
416 $text .= (length $manpage) ? " in the $manpage manpage"
417 : " elsewhere in this document";
418 } else {
419 $section =~ s/^\"\s*//;
420 $section =~ s/\s*\"$//;
421 $text .= 'the section on "' . $section . '"';
422 $text .= " in the $manpage manpage" if length $manpage;
424 $text;
428 ############################################################################
429 # List handling
430 ############################################################################
432 # This method is called whenever an =item command is complete (in other
433 # words, we've seen its associated paragraph or know for certain that it
434 # doesn't have one). It gets the paragraph associated with the item as an
435 # argument. If that argument is empty, just output the item tag; if it
436 # contains a newline, output the item tag followed by the newline.
437 # Otherwise, see if there's enough room for us to output the item tag in the
438 # margin of the text or if we have to put it on a separate line.
439 sub item {
440 my $self = shift;
441 local $_ = shift;
442 my $tag = $$self{ITEM};
443 unless (defined $tag) {
444 carp "item called without tag";
445 return;
447 undef $$self{ITEM};
448 my $indent = $$self{INDENTS}[-1];
449 unless (defined $indent) { $indent = $$self{indent} }
450 my $space = ' ' x $indent;
451 $space =~ s/^ /:/ if $$self{alt};
452 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
453 my $margin = $$self{MARGIN};
454 $$self{MARGIN} = $indent;
455 my $output = $self->reformat ($tag);
456 $output =~ s/\n*$/\n/;
457 $self->output ($output);
458 $$self{MARGIN} = $margin;
459 $self->output ($self->reformat ($_)) if /\S/;
460 } else {
461 $_ = $self->reformat ($_);
462 s/^ /:/ if ($$self{alt} && $indent > 0);
463 my $tagspace = ' ' x length $tag;
464 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
465 $self->output ($_);
470 ############################################################################
471 # Output formatting
472 ############################################################################
474 # Wrap a line, indenting by the current left margin. We can't use
475 # Text::Wrap because it plays games with tabs. We can't use formline, even
476 # though we'd really like to, because it screws up non-printing characters.
477 # So we have to do the wrapping ourselves.
478 sub wrap {
479 my $self = shift;
480 local $_ = shift;
481 my $output = '';
482 my $spaces = ' ' x $$self{MARGIN};
483 my $width = $$self{width} - $$self{MARGIN};
484 while (length > $width) {
485 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
486 $output .= $spaces . $1 . "\n";
487 } else {
488 last;
491 $output .= $spaces . $_;
492 $output =~ s/\s+$/\n\n/;
493 $output;
496 # Reformat a paragraph of text for the current margin. Takes the text to
497 # reformat and returns the formatted text.
498 sub reformat {
499 my $self = shift;
500 local $_ = shift;
502 # If we're trying to preserve two spaces after sentences, do some
503 # munging to support that. Otherwise, smash all repeated whitespace.
504 if ($$self{sentence}) {
505 s/ +$//mg;
506 s/\.\n/. \n/g;
507 s/\n/ /g;
508 s/ +/ /g;
509 } else {
510 s/\s+/ /g;
512 $self->wrap ($_);
515 # Output text to the output device.
516 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
519 ############################################################################
520 # Backwards compatibility
521 ############################################################################
523 # The old Pod::Text module did everything in a pod2text() function. This
524 # tries to provide the same interface for legacy applications.
525 sub pod2text {
526 my @args;
528 # This is really ugly; I hate doing option parsing in the middle of a
529 # module. But the old Pod::Text module supported passing flags to its
530 # entry function, so handle -a and -<number>.
531 while ($_[0] =~ /^-/) {
532 my $flag = shift;
533 if ($flag eq '-a') { push (@args, alt => 1) }
534 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
535 else {
536 unshift (@_, $flag);
537 last;
541 # Now that we know what arguments we're using, create the parser.
542 my $parser = Pod::PlainText->new (@args);
544 # If two arguments were given, the second argument is going to be a file
545 # handle. That means we want to call parse_from_filehandle(), which
546 # means we need to turn the first argument into a file handle. Magic
547 # open will handle the <&STDIN case automagically.
548 if (defined $_[1]) {
549 local *IN;
550 unless (open (IN, $_[0])) {
551 croak ("Can't open $_[0] for reading: $!\n");
552 return;
554 $_[0] = \*IN;
555 return $parser->parse_from_filehandle (@_);
556 } else {
557 return $parser->parse_from_file (@_);
562 ############################################################################
563 # Module return value and documentation
564 ############################################################################
567 __END__
569 =head1 NAME
571 Pod::PlainText - Convert POD data to formatted ASCII text
573 =head1 SYNOPSIS
575 use Pod::PlainText;
576 my $parser = Pod::PlainText->new (sentence => 0, width => 78);
578 # Read POD from STDIN and write to STDOUT.
579 $parser->parse_from_filehandle;
581 # Read POD from file.pod and write to file.txt.
582 $parser->parse_from_file ('file.pod', 'file.txt');
584 =head1 DESCRIPTION
586 Pod::PlainText is a module that can convert documentation in the POD format (the
587 preferred language for documenting Perl) into formatted ASCII. It uses no
588 special formatting controls or codes whatsoever, and its output is therefore
589 suitable for nearly any device.
591 As a derived class from Pod::Parser, Pod::PlainText supports the same methods and
592 interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
593 new parser with C<Pod::PlainText-E<gt>new()> and then calls either
594 parse_from_filehandle() or parse_from_file().
596 new() can take options, in the form of key/value pairs, that control the
597 behavior of the parser. The currently recognized options are:
599 =over 4
601 =item alt
603 If set to a true value, selects an alternate output format that, among other
604 things, uses a different heading style and marks C<=item> entries with a
605 colon in the left margin. Defaults to false.
607 =item indent
609 The number of spaces to indent regular text, and the default indentation for
610 C<=over> blocks. Defaults to 4.
612 =item loose
614 If set to a true value, a blank line is printed after a C<=head1> heading.
615 If set to false (the default), no blank line is printed after C<=head1>,
616 although one is still printed after C<=head2>. This is the default because
617 it's the expected formatting for manual pages; if you're formatting
618 arbitrary text documents, setting this to true may result in more pleasing
619 output.
621 =item sentence
623 If set to a true value, Pod::PlainText will assume that each sentence ends in two
624 spaces, and will try to preserve that spacing. If set to false, all
625 consecutive whitespace in non-verbatim paragraphs is compressed into a
626 single space. Defaults to true.
628 =item width
630 The column at which to wrap text on the right-hand side. Defaults to 76.
632 =back
634 The standard Pod::Parser method parse_from_filehandle() takes up to two
635 arguments, the first being the file handle to read POD from and the second
636 being the file handle to write the formatted output to. The first defaults
637 to STDIN if not given, and the second defaults to STDOUT. The method
638 parse_from_file() is almost identical, except that its two arguments are the
639 input and output disk files instead. See L<Pod::Parser> for the specific
640 details.
642 =head1 DIAGNOSTICS
644 =over 4
646 =item Bizarre space in item
648 (W) Something has gone wrong in internal C<=item> processing. This message
649 indicates a bug in Pod::PlainText; you should never see it.
651 =item Can't open %s for reading: %s
653 (F) Pod::PlainText was invoked via the compatibility mode pod2text() interface
654 and the input file it was given could not be opened.
656 =item Unknown escape: %s
658 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::PlainText didn't
659 know about.
661 =item Unknown sequence: %s
663 (W) The POD source contained a non-standard internal sequence (something of
664 the form C<XE<lt>E<gt>>) that Pod::PlainText didn't know about.
666 =item Unmatched =back
668 (W) Pod::PlainText encountered a C<=back> command that didn't correspond to an
669 C<=over> command.
671 =back
673 =head1 RESTRICTIONS
675 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
676 output, due to an internal implementation detail.
678 =head1 NOTES
680 This is a replacement for an earlier Pod::Text module written by Tom
681 Christiansen. It has a revamped interface, since it now uses Pod::Parser,
682 but an interface roughly compatible with the old Pod::Text::pod2text()
683 function is still available. Please change to the new calling convention,
684 though.
686 The original Pod::Text contained code to do formatting via termcap
687 sequences, although it wasn't turned on by default and it was problematic to
688 get it to work at all. This rewrite doesn't even try to do that, but a
689 subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
691 =head1 SEE ALSO
693 L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
694 pod2text(1)
696 =head1 AUTHOR
698 Please report bugs using L<http://rt.cpan.org>.
700 Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
701 original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
702 its conversion to Pod::Parser by Brad Appleton
703 E<lt>bradapp@enteract.comE<gt>.
705 =cut