Improved header generation in pod2grutatxt.
[grutatxt.git] / Grutatxt.pm
blob800be1174f1b4a753a076020b528149b1a9f42c6
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2008 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.15-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://www.triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 B<new>
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with strings in the format
116 level,heading
118 This information can be used to build a table of contents
119 of the processed text.
121 =item I<strip-parens>
123 Function names in the Grutatxt markup are strings of
124 alphanumeric characters immediately followed by a pair
125 of open and close parentheses. If this boolean value is
126 set, function names found in the processed text will have
127 their parentheses deleted.
129 =item I<strip-dollars>
131 Variable names in the Grutatxt markup are strings of
132 alphanumeric characters preceded by a dollar sign.
133 If this boolean value is set, variable names found in
134 the processed text will have the dollar sign deleted.
136 =item I<abstract>
138 The I<abstract> of a Grutatxt document is the fragment of text
139 from the beginning of the document to the end of the first
140 paragraph after the title. If I<abstract> is specified as a
141 reference to scalar, it will contain (after each call to the
142 B<process()> method) the subscript of the element of the output
143 array that marks the end of the subject.
145 =back
147 =cut
149 sub new
151 my ($class, %args) = @_;
152 my ($gh);
154 $args{'mode'} ||= 'HTML';
156 $class .= "::" . $args{'mode'};
158 $gh = new $class(%args);
160 return $gh;
164 sub escape
165 # escapes special characters, ignoring passthrough code
167 my ($gh, $l) = @_;
169 # splits between << and >>
170 my (@l) = split(/(<<|>>)/, $l);
172 @l = map {
173 my $l = $_;
175 # escape only text outside << and >>
176 unless ($l eq '<<' .. $l eq '>>') {
177 $l = $gh->_escape($l);
180 $_ = $l;
181 } @l;
183 # join again, stripping << and >>
184 $l = join('', grep(!/^(<<|>>)$/, @l));
186 return $l;
190 =head2 B<process>
192 @output = $grutatxt->process($text);
194 Processes a text in Grutatxt format. The result is returned
195 as an array of lines.
197 =cut
199 sub process
201 my ($gh, $content) = @_;
202 my ($p);
204 # clean output
205 @{$gh->{'o'}} = ();
207 # clean title and paragraph numbers
208 $gh->{'-title'} = '';
209 $gh->{'-p'} = 0;
211 # clean marks
212 @{$gh->{'marks'}} = () if ref($gh->{'marks'});
214 # clean index
215 @{$gh->{'index'}} = () if ref($gh->{'index'});
217 # reset abstract line
218 ${$gh->{'abstract'}} = 0 if ref($gh->{'abstract'});
220 # insert prefix
221 $gh->_prefix();
223 $gh->{'-mode'} = undef;
225 foreach my $l (split(/\n/,$content)) {
226 # inline data (passthrough)
227 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
228 $gh->_inline($l);
229 next;
232 # marks
233 if ($l =~ /^\s*<\->\s*$/) {
234 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
235 if ref($gh->{'marks'});
237 next;
240 # escape possibly dangerous characters
241 $l = $gh->escape($l);
243 # empty lines
244 $l =~ s/^\r$//ge;
245 if ($l =~ s/^$/$gh->_empty_line()/ge) {
246 # mark the abstract end
247 if ($gh->{'-title'}) {
248 $gh->{'-p'}++;
250 # mark abstract if it's the
251 # second paragraph from the title
252 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
253 if $gh->{'-p'} == 2;
257 # line-mutating process
258 my $ol = $l;
260 if ($gh->{'-process-urls'}) {
261 # URLs followed by a parenthesized phrase
262 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
263 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
264 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
265 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
266 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
268 # URLs without phrase
269 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
270 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
271 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
272 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
274 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
275 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
276 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
277 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
280 # change '''text''' and *text* into strong emphasis
281 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
282 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
283 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
285 # change ''text'' and _text_ into emphasis
286 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
287 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
288 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
290 # change `text' into code
291 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
293 # enclose function names
294 if ($gh->{'strip-parens'}) {
295 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
297 else {
298 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
301 # enclose variable names
302 if ($gh->{'strip-dollars'}) {
303 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
305 else {
306 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
310 # main switch
313 # definition list
314 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
315 $gh->{'-mode-elems'} ++;
318 # unsorted list
319 elsif ($gh->{'-mode'} ne 'pre' and
320 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
321 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
322 $gh->{'-mode-elems'} ++;
325 # sorted list
326 elsif ($gh->{'-mode'} ne 'pre' and
327 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
328 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
329 $gh->{'-mode-elems'} ++;
332 # quoted block
333 elsif ($l =~ s/^\s\"/$gh->_blockquote()/e) {
336 # table rows
337 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
338 $gh->{'-mode-elems'} ++;
341 # table heading / end of row
342 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
345 # preformatted text
346 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
347 if ($gh->{'-mode'} eq 'pre' &&
348 !$gh->{'no-pure-verbatim'}) {
349 # set line back to original
350 $l = $ol;
354 # anything else
355 else {
356 # back to normal mode
357 $gh->_new_mode(undef);
360 # 1 level heading
361 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
363 # 2 level heading
364 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
366 # 3 level heading
367 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
369 # change ------ into hr
370 $l =~ s/^----*$/$gh->_hr()/e;
372 # push finally
373 $gh->_push($l) if $l;
376 # flush
377 $gh->_new_mode(undef);
379 # postfix
380 $gh->_postfix();
382 # set title
383 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
385 # set abstract, if not set
386 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
387 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
389 # travel all lines again, post-escaping
390 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
392 return @{$gh->{'o'}};
396 =head2 B<process_file>
398 @output = $grutatxt->process_file($filename);
400 Processes a file in Grutatxt format.
402 =cut
404 sub process_file
406 my ($gh, $file) = @_;
408 open F, $file or return(undef);
410 my ($content) = join('',<F>);
411 close F;
413 return $gh->process($content);
417 sub _push
419 my ($gh, $l) = @_;
421 push(@{$gh->{'o'}},$l);
425 sub _process_heading
427 my ($gh, $level, $hd) = @_;
428 my ($l);
430 $l = pop(@{$gh->{'o'}});
432 if ($l eq $gh->_empty_line()) {
433 $gh->_push($l);
434 return $hd;
437 # store title
438 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
440 # store index
441 if (ref($gh->{'index'})) {
442 push(@{$gh->{'index'}},"$level,$l");
445 return $gh->_heading($level,$l);
449 sub _calc_col_span
451 my ($gh, $l) = @_;
452 my (@spans);
454 # strip first + and all -
455 $l =~ s/^\+//;
456 $l =~ s/-//g;
458 my ($t) = 1; @spans = ();
459 for (my $n = 0; $n < length($l); $n++) {
460 if (substr($l, $n, 1) eq '+') {
461 push(@spans, $t);
462 $t = 1;
464 else {
465 # it's a colspan mark:
466 # increment
467 $t++;
471 return @spans;
475 sub _table_row
477 my ($gh, $str) = @_;
479 my @s = split(/\|/,$str);
481 for (my $n = 0; $n < scalar(@s); $n++) {
482 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
485 push(@{$gh->{'-table-raw'}}, $str);
487 return '';
491 sub _pre
493 my ($gh, $l) = @_;
495 # if any other mode is active, add to it
496 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
497 $l =~ s/^\s+//;
499 my ($a) = pop(@{$gh->{'o'}})." ".$l;
500 $gh->_push($a);
501 $l = '';
503 else {
504 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
505 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
507 $gh->_new_mode('pre');
510 return $l;
514 sub _multilevel_list
516 my ($gh, $str, $ind) = @_;
517 my (@l,$level);
519 @l = @{$gh->{$str}};
520 $ind = length($ind);
521 $level = 0;
523 if ($l[-1] < $ind) {
524 # if last level is less indented, increase
525 # nesting level
526 push(@l, $ind);
527 $level++;
529 elsif ($l[-1] > $ind) {
530 # if last level is more indented, decrease
531 # levels until the same is found (or back to
532 # the beginning if not)
533 while (pop(@l)) {
534 $level--;
535 last if $l[-1] == $ind;
539 $gh->{$str} = \@l;
541 return $level;
545 sub _unsorted_list
547 my ($gh, $ind) = @_;
549 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
553 sub _ordered_list
555 my ($gh, $ind) = @_;
557 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
561 # empty stubs for falling through the superclass
563 sub _inline { my ($gh, $l) = @_; $l; }
564 sub _escape { my ($gh, $l) = @_; $l; }
565 sub _escape_post { my ($gh, $l) = @_; $l; }
566 sub _empty_line { my ($gh) = @_; ''; }
567 sub _url { my ($gh, $url, $label) = @_; ''; }
568 sub _strong { my ($gh, $str) = @_; $str; }
569 sub _em { my ($gh, $str) = @_; $str; }
570 sub _code { my ($gh, $str) = @_; $str; }
571 sub _funcname { my ($gh, $str) = @_; $str; }
572 sub _varname { my ($gh, $str) = @_; $str; }
573 sub _new_mode { my ($gh, $mode) = @_; }
574 sub _dl { my ($gh, $str) = @_; $str; }
575 sub _ul { my ($gh, $level) = @_; ''; }
576 sub _ol { my ($gh, $level) = @_; ''; }
577 sub _blockquote { my ($gh, $str) = @_; $str; }
578 sub _hr { my ($gh) = @_; ''; }
579 sub _heading { my ($gh, $level, $l) = @_; $l; }
580 sub _table { my ($gh, $str) = @_; $str; }
581 sub _prefix { my ($gh) = @_; }
582 sub _postfix { my ($gh) = @_; }
584 ###########################################################
586 =head1 DRIVER SPECIFIC INFORMATION
588 =cut
590 ###########################################################
591 # HTML Driver
593 package Grutatxt::HTML;
595 @ISA = ("Grutatxt");
597 =head2 HTML Driver
599 The additional parameters for a new Grutatxt object are:
601 =over 4
603 =item I<table-headers>
605 If this boolean value is set, the first row in tables
606 is assumed to be the heading and rendered using 'th'
607 instead of 'td' tags.
609 =item I<center-tables>
611 If this boolean value is set, tables are centered.
613 =item I<expand-tables>
615 If this boolean value is set, tables are expanded (width 100%).
617 =item I<dl-as-dl>
619 If this boolean value is set, definition lists will be
620 rendered using 'dl', 'dt' and 'dd' instead of tables.
622 =item I<header-offset>
624 Offset to be summed to the heading level when rendering
625 'h?' tags (default is 0).
627 =item I<class-oddeven>
629 If this boolean value is set, tables will be rendered
630 with an "oddeven" CSS class, and rows alternately classed
631 as "even" or "odd". If it's not set, no CSS class info
632 is added to tables.
634 =item I<url-label-max>
636 If an URL without label is given (that is, the URL itself
637 is used as the label), it's trimmed to have as much
638 characters as this value says. By default it's 80.
640 =back
642 =cut
644 sub new
646 my ($class, %args) = @_;
647 my ($gh);
649 bless(\%args, $class);
650 $gh = \%args;
652 $gh->{'-process-urls'} = 1;
653 $gh->{'url-label-max'} ||= 80;
655 return $gh;
659 sub _inline
661 my ($gh, $l) = @_;
663 # accept unnamed and HTML inlines
664 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
665 $gh->{'-inline'} = 'HTML';
666 return;
669 if ($l =~ /^>>$/) {
670 delete $gh->{'-inline'};
671 return;
674 if ($gh->{'-inline'} eq 'HTML') {
675 $gh->_push($l);
680 sub _escape
682 my ($gh, $l) = @_;
684 $l =~ s/&/&amp;/g;
685 $l =~ s/</&lt;/g;
686 $l =~ s/>/&gt;/g;
688 return $l;
692 sub _empty_line
694 my ($gh) = @_;
696 return('<p>');
700 sub _url
702 my ($gh, $url, $label) = @_;
704 if (!$label) {
705 $label = $url;
707 if (length($label) > $gh->{'url-label-max'}) {
708 $label = substr($label, 0,
709 $gh->{'url-label-max'}) . '...';
713 return "<a href = \"$url\">$label</a>";
717 sub _strong
719 my ($gh, $str) = @_;
720 return "<strong>$str</strong>";
724 sub _em
726 my ($gh, $str) = @_;
727 return "<em>$str</em>";
731 sub _code
733 my ($gh, $str) = @_;
734 return "<code class = 'literal'>$str</code>";
738 sub _funcname
740 my ($gh, $str) = @_;
741 return "<code class = 'funcname'>$str</code>";
745 sub _varname
747 my ($gh, $str) = @_;
748 return "<code class = 'var'>$str</code>";
752 sub _new_mode
754 my ($gh, $mode, $params) = @_;
756 if ($mode ne $gh->{'-mode'}) {
757 my $tag;
759 # clean list levels
760 if ($gh->{'-mode'} eq 'ul') {
761 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
763 elsif ($gh->{'-mode'} eq 'ol') {
764 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
766 elsif ($gh->{'-mode'}) {
767 $gh->_push("</$gh->{'-mode'}>");
770 # send new one
771 $tag = $params ? "<$mode $params>" : "<$mode>";
772 $gh->_push($tag) if $mode;
774 $gh->{'-mode'} = $mode;
775 $gh->{'-mode-elems'} = 0;
777 # clean previous lists
778 $gh->{'-ul-levels'} = undef;
779 $gh->{'-ol-levels'} = undef;
784 sub _dl
786 my ($gh, $str) = @_;
787 my ($ret) = '';
789 if ($gh->{'dl-as-dl'}) {
790 $gh->_new_mode('dl');
791 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
793 else {
794 $gh->_new_mode('table');
795 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
798 return $ret;
802 sub _ul
804 my ($gh, $levels) = @_;
805 my ($ret);
807 $ret = '';
809 if ($levels > 0) {
810 $ret .= '<ul>';
812 elsif ($levels < 0) {
813 $ret .= '</li></ul>' x abs($levels);
816 if ($gh->{'-mode'} ne 'ul') {
817 $gh->{'-mode'} = 'ul';
819 else {
820 $ret .= '</li>' if $levels <= 0;
823 $ret .= '<li>';
825 return $ret;
829 sub _ol
831 my ($gh, $levels) = @_;
832 my ($ret);
834 $ret = '';
836 if ($levels > 0) {
837 $ret .= '<ol>';
839 elsif ($levels < 0) {
840 $ret .= '</li></ol>' x abs($levels);
843 if ($gh->{'-mode'} ne 'ol') {
844 $gh->{'-mode'} = 'ol';
846 else {
847 $ret .= '</li>' if $levels <= 0;
850 $ret .= '<li>';
852 return $ret;
856 sub _blockquote
858 my ($gh) = @_;
860 $gh->_new_mode('blockquote');
861 return "\"";
865 sub _hr
867 my ($gh) = @_;
869 return "<hr size = '1' noshade = 'noshade'>";
873 sub _heading
875 my ($gh, $level, $l) = @_;
877 # creates a valid anchor
878 my ($a) = lc($l);
880 $a =~ s/[\"\'\/]//g;
881 $a =~ s/\s/_/g;
882 $a =~ s/<[^>]+>//g;
884 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
885 $a, $level+$gh->{'header-offset'},
886 $l, $level+$gh->{'header-offset'});
888 return $l;
892 sub _table
894 my ($gh, $str) = @_;
896 if ($gh->{'-mode'} eq 'table') {
897 my ($class) = '';
898 my (@spans) = $gh->_calc_col_span($str);
900 # calculate CSS class, if any
901 if ($gh->{'class-oddeven'}) {
902 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
905 $str = "<tr $class>";
907 # build columns
908 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
909 my ($i,$s);
911 $i = ${$gh->{'-table'}}[$n];
912 $i = "&nbsp;" if $i =~ /^\s*$/;
914 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
916 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
917 $str .= "<th $class $s>$i</th>";
919 else {
920 $str .= "<td $class $s>$i</td>";
924 $str .= '</tr>';
926 @{$gh->{'-table'}} = ();
927 $gh->{'-tbl-row'}++;
929 else {
930 # new table
931 my ($params);
933 $params = "border = '1'";
934 $params .= " width = '100\%'" if $gh->{'expand-tables'};
935 $params .= " align = 'center'" if $gh->{'center-tables'};
936 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
938 $gh->_new_mode('table', $params);
940 @{$gh->{'-table'}} = ();
941 $gh->{'-tbl-row'} = 1;
942 $str = '';
945 return $str;
949 ###########################################################
950 # troff Driver
952 package Grutatxt::troff;
954 @ISA = ("Grutatxt");
956 =head2 troff Driver
958 The troff driver uses the B<-me> macros and B<tbl>. A
959 good way to post-process this output (to PostScript in
960 the example) could be by using
962 groff -t -me -Tps
964 The additional parameters for a new Grutatxt object are:
966 =over 4
968 =item I<normal-size>
970 The point size of normal text. By default is 10.
972 =item I<heading-sizes>
974 This argument must be a reference to an array containing
975 the size in points of the 3 different heading levels. By
976 default, level sizes are [ 20, 18, 15 ].
978 =item I<table-type>
980 The type of table to be rendered by B<tbl>. Can be
981 I<allbox> (all lines rendered; this is the default value),
982 I<box> (only outlined) or I<doublebox> (only outlined by
983 a double line).
985 =back
987 =cut
989 sub new
991 my ($class, %args) = @_;
992 my ($gh);
994 bless(\%args,$class);
995 $gh = \%args;
997 $gh->{'-process-urls'} = 0;
999 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
1000 $gh->{'normal-size'} ||= 10;
1001 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
1003 return $gh;
1007 sub _prefix
1009 my ($gh) = @_;
1011 $gh->_push(".nr pp $gh->{'normal-size'}");
1012 $gh->_push(".nh");
1016 sub _inline
1018 my ($gh,$l) = @_;
1020 # accept only troff inlines
1021 if ($l =~ /^<<\s*troff$/i) {
1022 $gh->{'-inline'} = 'troff';
1023 return;
1026 if ($l =~ /^>>$/) {
1027 delete $gh->{'-inline'};
1028 return;
1031 if ($gh->{'-inline'} eq 'troff') {
1032 $gh->_push($l);
1037 sub _escape
1039 my ($gh,$l) = @_;
1041 $l =~ s/\\/\\\\/g;
1042 $l =~ s/^'/\\&'/;
1044 return $l;
1048 sub _empty_line
1050 my ($gh) = @_;
1052 return '.lp';
1056 sub _strong
1058 my ($gh, $str) = @_;
1059 return "\\fB$str\\fP";
1063 sub _em
1065 my ($gh, $str) = @_;
1066 return "\\fI$str\\fP";
1070 sub _code
1072 my ($gh, $str) = @_;
1073 return "\\fI$str\\fP";
1077 sub _funcname
1079 my ($gh, $str) = @_;
1080 return "\\fB$str\\fP";
1084 sub _varname
1086 my ($gh, $str) = @_;
1087 return "\\fI$str\\fP";
1091 sub _new_mode
1093 my ($gh, $mode, $params) = @_;
1095 if ($mode ne $gh->{'-mode'}) {
1096 my $tag;
1098 # flush previous list
1099 if ($gh->{'-mode'} eq 'pre') {
1100 $gh->_push('.)l');
1102 elsif ($gh->{'-mode'} eq 'table') {
1103 chomp($gh->{'-table-head'});
1104 $gh->{'-table-head'} =~ s/\s+$//;
1105 $gh->_push($gh->{'-table-head'} . '.');
1106 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1108 elsif ($gh->{'-mode'} eq 'blockquote') {
1109 $gh->_push('.)q');
1112 # send new one
1113 if ($mode eq 'pre') {
1114 $gh->_push('.(l L');
1116 elsif ($mode eq 'blockquote') {
1117 $gh->_push('.(q');
1120 $gh->{'-mode'} = $mode;
1125 sub _dl
1127 my ($gh, $str) = @_;
1129 $gh->_new_mode('dl');
1130 return ".ip \"$str\"\n";
1134 sub _ul
1136 my ($gh) = @_;
1138 $gh->_new_mode('ul');
1139 return ".bu\n";
1143 sub _ol
1145 my ($gh) = @_;
1147 $gh->_new_mode('ol');
1148 return ".np\n";
1152 sub _blockquote
1154 my ($gh) = @_;
1156 $gh->_new_mode('blockquote');
1157 return "\"";
1161 sub _hr
1163 my ($gh) = @_;
1165 return '.hl';
1169 sub _heading
1171 my ($gh, $level, $l) = @_;
1173 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1175 return $l;
1179 sub _table
1181 my ($gh, $str) = @_;
1183 if ($gh->{'-mode'} eq 'table') {
1184 my ($h, $b);
1185 my (@spans) = $gh->_calc_col_span($str);
1187 # build columns
1188 $h = '';
1189 $b = '';
1190 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1191 my ($i);
1193 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1194 $h .= 'cB ';
1196 else {
1197 $h .= 'l ';
1200 # add span columns
1201 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1203 $b .= '#' if $n;
1205 $i = ${$gh->{'-table'}}[$n];
1206 $i =~ s/^\s+//;
1207 $i =~ s/\s+$//;
1208 $i =~ s/(\s)+/$1/g;
1209 $b .= $i;
1212 # add a separator
1213 $b .= "\n_" if $gh->{'table-headers'} and
1214 $gh->{'-tbl-row'} == 1 and
1215 $gh->{'table-type'} ne "allbox";
1217 $gh->{'-table-head'} .= "$h\n";
1218 $gh->{'-table-body'} .= "$b\n";
1220 @{$gh->{'-table'}} = ();
1221 $gh->{'-tbl-row'}++;
1223 else {
1224 # new table
1225 $gh->_new_mode('table');
1227 @{$gh->{'-table'}} = ();
1228 $gh->{'-tbl-row'} = 1;
1230 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1231 $gh->{'-table-body'} = '';
1234 $str = '';
1235 return $str;
1239 sub _postfix
1241 my ($gh) = @_;
1243 # add to top headings and footers
1244 unshift(@{$gh->{'o'}},".ef '\%' ''");
1245 unshift(@{$gh->{'o'}},".of '' '\%'");
1246 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1247 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1251 ###########################################################
1252 # man Driver
1254 package Grutatxt::man;
1256 @ISA = ("Grutatxt::troff", "Grutatxt");
1258 =head2 man Driver
1260 The man driver is used to generate Unix-like man pages. Note that
1261 all headings have the same level with this output driver.
1263 The additional parameters for a new Grutatxt object are:
1265 =over 4
1267 =item I<section>
1269 The man page section (see man documentation). By default is 1.
1271 =item I<page-name>
1273 The name of the page. This is usually the name of the program
1274 or function the man page is documenting and will be shown in the
1275 page header. By default is the empty string.
1277 =back
1279 =cut
1281 sub new
1283 my ($class, %args) = @_;
1284 my ($gh);
1286 bless(\%args,$class);
1287 $gh = \%args;
1289 $gh->{'-process-urls'} = 0;
1291 $gh->{'section'} ||= 1;
1292 $gh->{'page-name'} ||= "";
1294 return $gh;
1298 sub _prefix
1300 my ($gh) = @_;
1302 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1306 sub _inline
1308 my ($gh, $l) = @_;
1310 # accept only man markup inlines
1311 if ($l =~ /^<<\s*man$/i) {
1312 $gh->{'-inline'} = 'man';
1313 return;
1316 if ($l =~ /^>>$/) {
1317 delete $gh->{'-inline'};
1318 return;
1321 if ($gh->{'-inline'} eq 'man') {
1322 $gh->_push($l);
1327 sub _empty_line
1329 my ($gh) = @_;
1331 return ' ';
1335 sub _new_mode
1337 my ($gh,$mode,$params) = @_;
1339 if ($mode ne $gh->{'-mode'}) {
1340 my $tag;
1342 # flush previous list
1343 if ($gh->{'-mode'} eq 'pre' or
1344 $gh->{'-mode'} eq 'table') {
1345 $gh->_push('.fi');
1348 if ($gh->{'-mode'} eq 'blockquote') {
1349 $gh->_push('.RE');
1352 if ($gh->{'-mode'} eq 'ul') {
1353 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1356 if ($gh->{'-mode'} eq 'ol') {
1357 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1360 # send new one
1361 if ($mode eq 'pre' or $mode eq 'table') {
1362 $gh->_push('.nf');
1365 if ($mode eq 'blockquote') {
1366 $gh->_push('.RS 4');
1369 $gh->{'-mode'} = $mode;
1374 sub _dl
1376 my ($gh, $str) = @_;
1378 $gh->_new_mode('dl');
1379 return ".TP\n.B \"$str\"\n";
1383 sub _ul
1385 my ($gh, $levels) = @_;
1386 my ($ret) = '';
1388 if ($levels > 0) {
1389 $ret = ".RS 4\n";
1391 elsif ($levels < 0) {
1392 $ret = ".RE\n" x abs($levels);
1395 $gh->_new_mode('ul');
1396 return $ret . ".TP 4\n\\(bu\n";
1400 sub _ol
1402 my ($gh, $levels) = @_;
1403 my $l = @{$gh->{'-ol-levels'}};
1404 my $ret = '';
1406 $gh->{'-ol-level'} += $levels;
1408 if ($levels > 0) {
1409 $ret = ".RS 4\n";
1411 $l[$gh->{'-ol-level'}] = 1;
1413 elsif ($levels < 0) {
1414 $ret = ".RE\n" x abs($levels);
1417 $gh->_new_mode('ol');
1418 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1420 return $ret;
1424 sub _hr
1426 my ($gh) = @_;
1428 return '';
1432 sub _heading
1434 my ($gh, $level, $l) = @_;
1436 # all headers are the same depth in man pages
1437 return ".SH \"" . uc($l) . "\"";
1441 sub _table
1443 my ($gh, $str) = @_;
1445 if ($gh->{'-mode'} eq 'table') {
1446 foreach my $r (@{$gh->{'-table-raw'}}) {
1447 $gh->_push("|$r|");
1450 else {
1451 $gh->_new_mode('table');
1454 @{$gh->{'-table'}} = ();
1455 @{$gh->{'-table-raw'}} = ();
1457 $gh->_push($str);
1459 return '';
1463 sub _postfix
1468 ###########################################################
1469 # latex Driver
1471 package Grutatxt::latex;
1473 @ISA = ("Grutatxt");
1475 =head2 LaTeX Driver
1477 The additional parameters for a new Grutatxt object are:
1479 =over 4
1481 =item I<docclass>
1483 The LaTeX document class. By default is 'report'. You can also use
1484 'article' or 'book' (consult your LaTeX documentation for details).
1486 =item I<papersize>
1488 The paper size to be used in the document. By default is 'a4paper'.
1490 =item I<encoding>
1492 The character encoding used in the document. By default is 'latin1'.
1494 =back
1496 Note that you can't nest further than 4 levels in LaTeX; if you do,
1497 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1499 =cut
1501 sub new
1503 my ($class, %args) = @_;
1504 my ($gh);
1506 bless(\%args,$class);
1507 $gh = \%args;
1509 $gh->{'-process-urls'} = 0;
1511 $gh->{'-docclass'} ||= 'report';
1512 $gh->{'-papersize'} ||= 'a4paper';
1513 $gh->{'-encoding'} ||= 'latin1';
1515 return $gh;
1519 sub _prefix
1521 my ($gh) = @_;
1523 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1524 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1526 $gh->_push("\\begin{document}");
1530 sub _inline
1532 my ($gh, $l) = @_;
1534 # accept only latex inlines
1535 if ($l =~ /^<<\s*latex$/i) {
1536 $gh->{'-inline'} = 'latex';
1537 return;
1540 if ($l =~ /^>>$/) {
1541 delete $gh->{'-inline'};
1542 return;
1545 if ($gh->{'-inline'} eq 'latex') {
1546 $gh->_push($l);
1551 sub _escape
1553 my ($gh, $l) = @_;
1555 $l =~ s/ _ / \\_ /g;
1556 $l =~ s/ ~ / \\~ /g;
1557 $l =~ s/ & / \\& /g;
1559 return $l;
1563 sub _escape_post
1565 my ($gh, $l) = @_;
1567 $l =~ s/ # / \\# /g;
1568 $l =~ s/^\\n$//g;
1569 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1571 return $l;
1575 sub _empty_line
1577 my ($gh) = @_;
1579 return "\\n";
1583 sub _strong
1585 my ($gh, $str) = @_;
1586 return "\\textbf{$str}";
1590 sub _em
1592 my ($gh, $str) = @_;
1593 return "\\emph{$str}";
1597 sub _code
1599 my ($gh, $str) = @_;
1600 return "{\\tt $str}";
1604 sub _funcname
1606 my ($gh, $str) = @_;
1607 return "{\\tt $str}";
1611 sub _varname
1613 my ($gh, $str) = @_;
1615 $str =~ s/^\$/\\\$/;
1617 return "{\\tt $str}";
1621 sub _new_mode
1623 my ($gh, $mode, $params) = @_;
1625 # mode equivalences
1626 my %latex_modes = (
1627 'pre' => 'verbatim',
1628 'blockquote' => 'quote',
1629 'table' => 'tabular',
1630 'dl' => 'description',
1631 'ul' => 'itemize',
1632 'ol' => 'enumerate'
1635 if ($mode ne $gh->{'-mode'}) {
1636 # close previous mode
1637 if ($gh->{'-mode'} eq 'ul') {
1638 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1640 elsif ($gh->{'-mode'} eq 'ol') {
1641 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1643 elsif ($gh->{'-mode'} eq 'table') {
1644 $gh->_push("\\end{tabular}\n");
1646 else {
1647 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1648 if $gh->{'-mode'};
1651 # send new one
1652 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1653 if $mode;
1655 $gh->{'-mode'} = $mode;
1657 $gh->{'-ul-levels'} = undef;
1658 $gh->{'-ol-levels'} = undef;
1663 sub _dl
1665 my ($gh, $str) = @_;
1667 $gh->_new_mode('dl');
1668 return "\\item[$str]\n";
1672 sub _ul
1674 my ($gh, $levels) = @_;
1675 my ($ret);
1677 $ret = '';
1679 if ($levels > 0) {
1680 $ret .= "\\begin{itemize}\n";
1682 elsif ($levels < 0) {
1683 $ret .= "\\end{itemize}\n" x abs($levels);
1686 $gh->{'-mode'} = 'ul';
1688 $ret .= "\\item\n";
1690 return $ret;
1694 sub _ol
1696 my ($gh, $levels) = @_;
1697 my ($ret);
1699 $ret = '';
1701 if ($levels > 0) {
1702 $ret .= "\\begin{enumerate}\n";
1704 elsif ($levels < 0) {
1705 $ret .= "\\end{enumerate}\n" x abs($levels);
1708 $gh->{'-mode'} = 'ol';
1710 $ret .= "\\item\n";
1712 return $ret;
1716 sub _blockquote
1718 my ($gh) = @_;
1720 $gh->_new_mode('blockquote');
1721 return "``";
1725 sub _hr
1727 my ($gh) = @_;
1729 return "------------\n";
1733 sub _heading
1735 my ($gh, $level, $l) = @_;
1737 my @latex_headings = ( "\\section*{", "\\subsection*{",
1738 "\\subsubsection*{");
1740 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1742 return $l;
1746 sub _table
1748 my ($gh,$str) = @_;
1750 if ($gh->{'-mode'} eq 'table') {
1751 my ($class) = '';
1752 my (@spans) = $gh->_calc_col_span($str);
1753 my (@cols);
1755 $str = '';
1757 # build columns
1758 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1759 my ($i, $s);
1761 $i = ${$gh->{'-table'}}[$n];
1762 $i = "&nbsp;" if $i =~ /^\s*$/;
1764 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1766 # multispan columns
1767 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1768 if $spans[$n] > 1;
1770 $i =~ s/\s{2,}/ /g;
1771 $i =~ s/^\s+//;
1772 $i =~ s/\s+$//;
1774 push(@cols, $i);
1777 $str .= join('&', @cols) . "\\\\\n\\hline";
1779 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1781 @{$gh->{'-table'}} = ();
1782 $gh->{'-tbl-row'}++;
1784 else {
1785 # new table
1787 # count the number of columns
1788 $str =~ s/[^\+]//g;
1789 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1791 $gh->_push();
1792 $gh->_new_mode('table', $params);
1794 @{$gh->{'-table'}} = ();
1795 $gh->{'-tbl-row'} = 1;
1796 $str = '';
1799 return $str;
1803 sub _postfix
1805 my ($gh) = @_;
1807 $gh->_push("\\end{document}");
1811 =head1 AUTHOR
1813 Angel Ortega angel@triptico.com
1815 =cut