Avoid using direct HTML tags in documentation.
[grutatxt.git] / Grutatxt.pm
blob37aa8f06fc0f9a74248bb616a0adefff75230522
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2008 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.15-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 A comprehensive description of the markup is defined in
66 the README file, included with the Grutatxt package (it is
67 written in Grutatxt format itself, so it can be converted
68 using the I<grutatxt> tool to any of the supported formats).
69 The latest version (and more information) can be retrieved
70 from the Grutatxt home page at:
72 http://www.triptico.com/software/grutatxt.html
74 =head1 FUNCTIONS AND METHODS
76 =head2 B<new>
78 $grutatxt = new Grutatxt([ "mode" => $mode, ]
79 [ "title" => \$title, ]
80 [ "marks" => \@marks, ]
81 [ "index" => \@index, ]
82 [ "abstract" => \$abstract, ]
83 [ "strip-parens" => $bool, ]
84 [ "strip-dollars" => $bool, ]
85 [ %driver_specific_arguments ] );
87 Creates a new Grutatxt object instance. All parameters are
88 optional.
90 =over 4
92 =item I<mode>
94 Output format. Can be HTML, troff or man. HTML is used if not specified.
96 =item I<title>
98 If I<title> is specified as a reference to scalar, the first
99 level 1 heading found in the text is stored inside it.
101 =item I<marks>
103 Marks in the Grutatxt markup are created by inserting the
104 string <-> alone in a line. If I<marks> is specified as a
105 reference to array, it will be filled with the subscripts
106 (relative to the output array) of the lines where the marks
107 are found in the text.
109 =item I<index>
111 If I<index> is specified as a reference to array, it will
112 be filled with strings in the format
114 level,heading
116 This information can be used to build a table of contents
117 of the processed text.
119 =item I<strip-parens>
121 Function names in the Grutatxt markup are strings of
122 alphanumeric characters immediately followed by a pair
123 of open and close parentheses. If this boolean value is
124 set, function names found in the processed text will have
125 their parentheses deleted.
127 =item I<strip-dollars>
129 Variable names in the Grutatxt markup are strings of
130 alphanumeric characters preceded by a dollar sign.
131 If this boolean value is set, variable names found in
132 the processed text will have the dollar sign deleted.
134 =item I<abstract>
136 The I<abstract> of a Grutatxt document is the fragment of text
137 from the beginning of the document to the end of the first
138 paragraph after the title. If I<abstract> is specified as a
139 reference to scalar, it will contain (after each call to the
140 B<process()> method) the subscript of the element of the output
141 array that marks the end of the subject.
143 =back
145 =cut
147 sub new
149 my ($class, %args) = @_;
150 my ($gh);
152 $args{'mode'} ||= 'HTML';
154 $class .= "::" . $args{'mode'};
156 $gh = new $class(%args);
158 return $gh;
162 sub escape
163 # escapes special characters, ignoring passthrough code
165 my ($gh, $l) = @_;
167 # splits between << and >>
168 my (@l) = split(/(<<|>>)/, $l);
170 @l = map {
171 my $l = $_;
173 # escape only text outside << and >>
174 unless ($l eq '<<' .. $l eq '>>') {
175 $l = $gh->_escape($l);
178 $_ = $l;
179 } @l;
181 # join again, stripping << and >>
182 $l = join('', grep(!/^(<<|>>)$/, @l));
184 return $l;
188 =head2 B<process>
190 @output = $grutatxt->process($text);
192 Processes a text in Grutatxt format. The result is returned
193 as an array of lines.
195 =cut
197 sub process
199 my ($gh, $content) = @_;
200 my ($p);
202 # clean output
203 @{$gh->{'o'}} = ();
205 # clean title and paragraph numbers
206 $gh->{'-title'} = '';
207 $gh->{'-p'} = 0;
209 # clean marks
210 @{$gh->{'marks'}} = () if ref($gh->{'marks'});
212 # clean index
213 @{$gh->{'index'}} = () if ref($gh->{'index'});
215 # reset abstract line
216 ${$gh->{'abstract'}} = 0 if ref($gh->{'abstract'});
218 # insert prefix
219 $gh->_prefix();
221 $gh->{'-mode'} = undef;
223 foreach my $l (split(/\n/,$content)) {
224 # inline data (passthrough)
225 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
226 $gh->_inline($l);
227 next;
230 # marks
231 if ($l =~ /^\s*<\->\s*$/) {
232 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
233 if ref($gh->{'marks'});
235 next;
238 # escape possibly dangerous characters
239 $l = $gh->escape($l);
241 # empty lines
242 $l =~ s/^\r$//ge;
243 if ($l =~ s/^$/$gh->_empty_line()/ge) {
244 # mark the abstract end
245 if ($gh->{'-title'}) {
246 $gh->{'-p'}++;
248 # mark abstract if it's the
249 # second paragraph from the title
250 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
251 if $gh->{'-p'} == 2;
255 if ($gh->{'-process-urls'}) {
256 # URLs followed by a parenthesized phrase
257 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
258 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
259 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
260 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
261 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
263 # URLs without phrase
264 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
265 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
266 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
267 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
269 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
270 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
271 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
272 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
275 # change '''text''' and *text* into strong emphasis
276 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
277 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
278 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
280 # change ''text'' and _text_ into emphasis
281 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
282 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
283 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
285 # change `text' into code
286 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
288 # enclose function names
289 if ($gh->{'strip-parens'}) {
290 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
292 else {
293 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
296 # enclose variable names
297 if ($gh->{'strip-dollars'}) {
298 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
300 else {
301 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
305 # main switch
308 # definition list
309 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
310 $gh->{'-mode-elems'} ++;
313 # unsorted list
314 elsif ($gh->{'-mode'} ne 'pre' and
315 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
316 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
317 $gh->{'-mode-elems'} ++;
320 # sorted list
321 elsif ($gh->{'-mode'} ne 'pre' and
322 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
323 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
324 $gh->{'-mode-elems'} ++;
327 # quoted block
328 elsif ($l =~ s/^\s\"/$gh->_blockquote()/e) {
331 # table rows
332 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
333 $gh->{'-mode-elems'} ++;
336 # table heading / end of row
337 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
340 # preformatted text
341 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
344 # anything else
345 else {
346 # back to normal mode
347 $gh->_new_mode(undef);
350 # 1 level heading
351 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
353 # 2 level heading
354 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
356 # 3 level heading
357 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
359 # change ------ into hr
360 $l =~ s/^----*$/$gh->_hr()/e;
362 # push finally
363 $gh->_push($l) if $l;
366 # flush
367 $gh->_new_mode(undef);
369 # postfix
370 $gh->_postfix();
372 # set title
373 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
375 # set abstract, if not set
376 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
377 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
379 # travel all lines again, post-escaping
380 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
382 return @{$gh->{'o'}};
386 =head2 B<process_file>
388 @output = $grutatxt->process_file($filename);
390 Processes a file in Grutatxt format.
392 =cut
394 sub process_file
396 my ($gh, $file) = @_;
398 open F, $file or return(undef);
400 my ($content) = join('',<F>);
401 close F;
403 return $gh->process($content);
407 sub _push
409 my ($gh, $l) = @_;
411 push(@{$gh->{'o'}},$l);
415 sub _process_heading
417 my ($gh, $level, $hd) = @_;
418 my ($l);
420 $l = pop(@{$gh->{'o'}});
422 if ($l eq $gh->_empty_line()) {
423 $gh->_push($l);
424 return $hd;
427 # store title
428 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
430 # store index
431 if (ref($gh->{'index'})) {
432 push(@{$gh->{'index'}},"$level,$l");
435 return $gh->_heading($level,$l);
439 sub _calc_col_span
441 my ($gh, $l) = @_;
442 my (@spans);
444 # strip first + and all -
445 $l =~ s/^\+//;
446 $l =~ s/-//g;
448 my ($t) = 1; @spans = ();
449 for (my $n = 0; $n < length($l); $n++) {
450 if (substr($l, $n, 1) eq '+') {
451 push(@spans, $t);
452 $t = 1;
454 else {
455 # it's a colspan mark:
456 # increment
457 $t++;
461 return @spans;
465 sub _table_row
467 my ($gh, $str) = @_;
469 my @s = split(/\|/,$str);
471 for (my $n = 0; $n < scalar(@s); $n++) {
472 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
475 push(@{$gh->{'-table-raw'}}, $str);
477 return '';
481 sub _pre
483 my ($gh, $l) = @_;
485 # if any other mode is active, add to it
486 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
487 $l =~ s/^\s+//;
489 my ($a) = pop(@{$gh->{'o'}})." ".$l;
490 $gh->_push($a);
491 $l = '';
493 else {
494 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
495 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
497 $gh->_new_mode('pre');
500 return $l;
504 sub _multilevel_list
506 my ($gh, $str, $ind) = @_;
507 my (@l,$level);
509 @l = @{$gh->{$str}};
510 $ind = length($ind);
511 $level = 0;
513 if ($l[-1] < $ind) {
514 # if last level is less indented, increase
515 # nesting level
516 push(@l, $ind);
517 $level++;
519 elsif ($l[-1] > $ind) {
520 # if last level is more indented, decrease
521 # levels until the same is found (or back to
522 # the beginning if not)
523 while (pop(@l)) {
524 $level--;
525 last if $l[-1] == $ind;
529 $gh->{$str} = \@l;
531 return $level;
535 sub _unsorted_list
537 my ($gh, $ind) = @_;
539 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
543 sub _ordered_list
545 my ($gh, $ind) = @_;
547 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
551 # empty stubs for falling through the superclass
553 sub _inline { my ($gh, $l) = @_; $l; }
554 sub _escape { my ($gh, $l) = @_; $l; }
555 sub _escape_post { my ($gh, $l) = @_; $l; }
556 sub _empty_line { my ($gh) = @_; ''; }
557 sub _url { my ($gh, $url, $label) = @_; ''; }
558 sub _strong { my ($gh, $str) = @_; $str; }
559 sub _em { my ($gh, $str) = @_; $str; }
560 sub _code { my ($gh, $str) = @_; $str; }
561 sub _funcname { my ($gh, $str) = @_; $str; }
562 sub _varname { my ($gh, $str) = @_; $str; }
563 sub _new_mode { my ($gh, $mode) = @_; }
564 sub _dl { my ($gh, $str) = @_; $str; }
565 sub _ul { my ($gh, $level) = @_; ''; }
566 sub _ol { my ($gh, $level) = @_; ''; }
567 sub _blockquote { my ($gh, $str) = @_; $str; }
568 sub _hr { my ($gh) = @_; ''; }
569 sub _heading { my ($gh, $level, $l) = @_; $l; }
570 sub _table { my ($gh, $str) = @_; $str; }
571 sub _prefix { my ($gh) = @_; }
572 sub _postfix { my ($gh) = @_; }
574 ###########################################################
576 =head1 DRIVER SPECIFIC INFORMATION
578 =cut
580 ###########################################################
581 # HTML Driver
583 package Grutatxt::HTML;
585 @ISA = ("Grutatxt");
587 =head2 HTML Driver
589 The additional parameters for a new Grutatxt object are:
591 =over 4
593 =item I<table-headers>
595 If this boolean value is set, the first row in tables
596 is assumed to be the heading and rendered using 'th'
597 instead of 'td' tags.
599 =item I<center-tables>
601 If this boolean value is set, tables are centered.
603 =item I<expand-tables>
605 If this boolean value is set, tables are expanded (width 100%).
607 =item I<dl-as-dl>
609 If this boolean value is set, definition lists will be
610 rendered using 'dl', 'dt' and 'dd' instead of tables.
612 =item I<header-offset>
614 Offset to be summed to the heading level when rendering
615 'h?' tags (default is 0).
617 =item I<class-oddeven>
619 If this boolean value is set, tables will be rendered
620 with an "oddeven" CSS class, and rows alternately classed
621 as "even" or "odd". If it's not set, no CSS class info
622 is added to tables.
624 =item I<url-label-max>
626 If an URL without label is given (that is, the URL itself
627 is used as the label), it's trimmed to have as much
628 characters as this value says. By default it's 80.
630 =back
632 =cut
634 sub new
636 my ($class, %args) = @_;
637 my ($gh);
639 bless(\%args, $class);
640 $gh = \%args;
642 $gh->{'-process-urls'} = 1;
643 $gh->{'url-label-max'} ||= 80;
645 return $gh;
649 sub _inline
651 my ($gh, $l) = @_;
653 # accept unnamed and HTML inlines
654 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
655 $gh->{'-inline'} = 'HTML';
656 return;
659 if ($l =~ /^>>$/) {
660 delete $gh->{'-inline'};
661 return;
664 if ($gh->{'-inline'} eq 'HTML') {
665 $gh->_push($l);
670 sub _escape
672 my ($gh, $l) = @_;
674 $l =~ s/&/&amp;/g;
675 $l =~ s/</&lt;/g;
676 $l =~ s/>/&gt;/g;
678 return $l;
682 sub _empty_line
684 my ($gh) = @_;
686 return('<p>');
690 sub _url
692 my ($gh, $url, $label) = @_;
694 if (!$label) {
695 $label = $url;
697 if (length($label) > $gh->{'url-label-max'}) {
698 $label = substr($label, 0,
699 $gh->{'url-label-max'}) . '...';
703 return "<a href = \"$url\">$label</a>";
707 sub _strong
709 my ($gh, $str) = @_;
710 return "<strong>$str</strong>";
714 sub _em
716 my ($gh, $str) = @_;
717 return "<em>$str</em>";
721 sub _code
723 my ($gh, $str) = @_;
724 return "<code class = 'literal'>$str</code>";
728 sub _funcname
730 my ($gh, $str) = @_;
731 return "<code class = 'funcname'>$str</code>";
735 sub _varname
737 my ($gh, $str) = @_;
738 return "<code class = 'var'>$str</code>";
742 sub _new_mode
744 my ($gh, $mode, $params) = @_;
746 if ($mode ne $gh->{'-mode'}) {
747 my $tag;
749 # clean list levels
750 if ($gh->{'-mode'} eq 'ul') {
751 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
753 elsif ($gh->{'-mode'} eq 'ol') {
754 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
756 elsif ($gh->{'-mode'}) {
757 $gh->_push("</$gh->{'-mode'}>");
760 # send new one
761 $tag = $params ? "<$mode $params>" : "<$mode>";
762 $gh->_push($tag) if $mode;
764 $gh->{'-mode'} = $mode;
765 $gh->{'-mode-elems'} = 0;
767 # clean previous lists
768 $gh->{'-ul-levels'} = undef;
769 $gh->{'-ol-levels'} = undef;
774 sub _dl
776 my ($gh, $str) = @_;
777 my ($ret) = '';
779 if ($gh->{'dl-as-dl'}) {
780 $gh->_new_mode('dl');
781 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
783 else {
784 $gh->_new_mode('table');
785 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
788 return $ret;
792 sub _ul
794 my ($gh, $levels) = @_;
795 my ($ret);
797 $ret = '';
799 if ($levels > 0) {
800 $ret .= '<ul>';
802 elsif ($levels < 0) {
803 $ret .= '</li></ul>' x abs($levels);
806 if ($gh->{'-mode'} ne 'ul') {
807 $gh->{'-mode'} = 'ul';
809 else {
810 $ret .= '</li>' if $levels <= 0;
813 $ret .= '<li>';
815 return $ret;
819 sub _ol
821 my ($gh, $levels) = @_;
822 my ($ret);
824 $ret = '';
826 if ($levels > 0) {
827 $ret .= '<ol>';
829 elsif ($levels < 0) {
830 $ret .= '</li></ol>' x abs($levels);
833 if ($gh->{'-mode'} ne 'ol') {
834 $gh->{'-mode'} = 'ol';
836 else {
837 $ret .= '</li>' if $levels <= 0;
840 $ret .= '<li>';
842 return $ret;
846 sub _blockquote
848 my ($gh) = @_;
850 $gh->_new_mode('blockquote');
851 return "\"";
855 sub _hr
857 my ($gh) = @_;
859 return "<hr size = '1' noshade = 'noshade'>";
863 sub _heading
865 my ($gh, $level, $l) = @_;
867 # creates a valid anchor
868 my ($a) = lc($l);
870 $a =~ s/[\"\'\/]//g;
871 $a =~ s/\s/_/g;
872 $a =~ s/<[^>]+>//g;
874 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
875 $a, $level+$gh->{'header-offset'},
876 $l, $level+$gh->{'header-offset'});
878 return $l;
882 sub _table
884 my ($gh, $str) = @_;
886 if ($gh->{'-mode'} eq 'table') {
887 my ($class) = '';
888 my (@spans) = $gh->_calc_col_span($str);
890 # calculate CSS class, if any
891 if ($gh->{'class-oddeven'}) {
892 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
895 $str = "<tr $class>";
897 # build columns
898 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
899 my ($i,$s);
901 $i = ${$gh->{'-table'}}[$n];
902 $i = "&nbsp;" if $i =~ /^\s*$/;
904 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
906 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
907 $str .= "<th $class $s>$i</th>";
909 else {
910 $str .= "<td $class $s>$i</td>";
914 $str .= '</tr>';
916 @{$gh->{'-table'}} = ();
917 $gh->{'-tbl-row'}++;
919 else {
920 # new table
921 my ($params);
923 $params = "border = '1'";
924 $params .= " width = '100\%'" if $gh->{'expand-tables'};
925 $params .= " align = 'center'" if $gh->{'center-tables'};
926 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
928 $gh->_new_mode('table', $params);
930 @{$gh->{'-table'}} = ();
931 $gh->{'-tbl-row'} = 1;
932 $str = '';
935 return $str;
939 ###########################################################
940 # troff Driver
942 package Grutatxt::troff;
944 @ISA = ("Grutatxt");
946 =head2 troff Driver
948 The troff driver uses the B<-me> macros and B<tbl>. A
949 good way to post-process this output (to PostScript in
950 the example) could be by using
952 groff -t -me -Tps
954 The additional parameters for a new Grutatxt object are:
956 =over 4
958 =item I<normal-size>
960 The point size of normal text. By default is 10.
962 =item I<heading-sizes>
964 This argument must be a reference to an array containing
965 the size in points of the 3 different heading levels. By
966 default, level sizes are [ 20, 18, 15 ].
968 =item I<table-type>
970 The type of table to be rendered by B<tbl>. Can be
971 I<allbox> (all lines rendered; this is the default value),
972 I<box> (only outlined) or I<doublebox> (only outlined by
973 a double line).
975 =back
977 =cut
979 sub new
981 my ($class, %args) = @_;
982 my ($gh);
984 bless(\%args,$class);
985 $gh = \%args;
987 $gh->{'-process-urls'} = 0;
989 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
990 $gh->{'normal-size'} ||= 10;
991 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
993 return $gh;
997 sub _prefix
999 my ($gh) = @_;
1001 $gh->_push(".nr pp $gh->{'normal-size'}");
1002 $gh->_push(".nh");
1006 sub _inline
1008 my ($gh,$l) = @_;
1010 # accept only troff inlines
1011 if ($l =~ /^<<\s*troff$/i) {
1012 $gh->{'-inline'} = 'troff';
1013 return;
1016 if ($l =~ /^>>$/) {
1017 delete $gh->{'-inline'};
1018 return;
1021 if ($gh->{'-inline'} eq 'troff') {
1022 $gh->_push($l);
1027 sub _escape
1029 my ($gh,$l) = @_;
1031 $l =~ s/\\/\\\\/g;
1032 $l =~ s/^'/\\&'/;
1034 return $l;
1038 sub _empty_line
1040 my ($gh) = @_;
1042 return '.lp';
1046 sub _strong
1048 my ($gh, $str) = @_;
1049 return "\\fB$str\\fP";
1053 sub _em
1055 my ($gh, $str) = @_;
1056 return "\\fI$str\\fP";
1060 sub _code
1062 my ($gh, $str) = @_;
1063 return "\\fI$str\\fP";
1067 sub _funcname
1069 my ($gh, $str) = @_;
1070 return "\\fB$str\\fP";
1074 sub _varname
1076 my ($gh, $str) = @_;
1077 return "\\fI$str\\fP";
1081 sub _new_mode
1083 my ($gh, $mode, $params) = @_;
1085 if ($mode ne $gh->{'-mode'}) {
1086 my $tag;
1088 # flush previous list
1089 if ($gh->{'-mode'} eq 'pre') {
1090 $gh->_push('.)l');
1092 elsif ($gh->{'-mode'} eq 'table') {
1093 chomp($gh->{'-table-head'});
1094 $gh->{'-table-head'} =~ s/\s+$//;
1095 $gh->_push($gh->{'-table-head'} . '.');
1096 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1098 elsif ($gh->{'-mode'} eq 'blockquote') {
1099 $gh->_push('.)q');
1102 # send new one
1103 if ($mode eq 'pre') {
1104 $gh->_push('.(l L');
1106 elsif ($mode eq 'blockquote') {
1107 $gh->_push('.(q');
1110 $gh->{'-mode'} = $mode;
1115 sub _dl
1117 my ($gh, $str) = @_;
1119 $gh->_new_mode('dl');
1120 return ".ip \"$str\"\n";
1124 sub _ul
1126 my ($gh) = @_;
1128 $gh->_new_mode('ul');
1129 return ".bu\n";
1133 sub _ol
1135 my ($gh) = @_;
1137 $gh->_new_mode('ol');
1138 return ".np\n";
1142 sub _blockquote
1144 my ($gh) = @_;
1146 $gh->_new_mode('blockquote');
1147 return "\"";
1151 sub _hr
1153 my ($gh) = @_;
1155 return '.hl';
1159 sub _heading
1161 my ($gh, $level, $l) = @_;
1163 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1165 return $l;
1169 sub _table
1171 my ($gh, $str) = @_;
1173 if ($gh->{'-mode'} eq 'table') {
1174 my ($h, $b);
1175 my (@spans) = $gh->_calc_col_span($str);
1177 # build columns
1178 $h = '';
1179 $b = '';
1180 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1181 my ($i);
1183 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1184 $h .= 'cB ';
1186 else {
1187 $h .= 'l ';
1190 # add span columns
1191 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1193 $b .= '#' if $n;
1195 $i = ${$gh->{'-table'}}[$n];
1196 $i =~ s/^\s+//;
1197 $i =~ s/\s+$//;
1198 $i =~ s/(\s)+/$1/g;
1199 $b .= $i;
1202 # add a separator
1203 $b .= "\n_" if $gh->{'table-headers'} and
1204 $gh->{'-tbl-row'} == 1 and
1205 $gh->{'table-type'} ne "allbox";
1207 $gh->{'-table-head'} .= "$h\n";
1208 $gh->{'-table-body'} .= "$b\n";
1210 @{$gh->{'-table'}} = ();
1211 $gh->{'-tbl-row'}++;
1213 else {
1214 # new table
1215 $gh->_new_mode('table');
1217 @{$gh->{'-table'}} = ();
1218 $gh->{'-tbl-row'} = 1;
1220 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1221 $gh->{'-table-body'} = '';
1224 $str = '';
1225 return $str;
1229 sub _postfix
1231 my ($gh) = @_;
1233 # add to top headings and footers
1234 unshift(@{$gh->{'o'}},".ef '\%' ''");
1235 unshift(@{$gh->{'o'}},".of '' '\%'");
1236 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1237 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1241 ###########################################################
1242 # man Driver
1244 package Grutatxt::man;
1246 @ISA = ("Grutatxt::troff", "Grutatxt");
1248 =head2 man Driver
1250 The man driver is used to generate Unix-like man pages. Note that
1251 all headings have the same level with this output driver.
1253 The additional parameters for a new Grutatxt object are:
1255 =over 4
1257 =item I<section>
1259 The man page section (see man documentation). By default is 1.
1261 =item I<page-name>
1263 The name of the page. This is usually the name of the program
1264 or function the man page is documenting and will be shown in the
1265 page header. By default is the empty string.
1267 =back
1269 =cut
1271 sub new
1273 my ($class, %args) = @_;
1274 my ($gh);
1276 bless(\%args,$class);
1277 $gh = \%args;
1279 $gh->{'-process-urls'} = 0;
1281 $gh->{'section'} ||= 1;
1282 $gh->{'page-name'} ||= "";
1284 return $gh;
1288 sub _prefix
1290 my ($gh) = @_;
1292 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1296 sub _inline
1298 my ($gh, $l) = @_;
1300 # accept only man markup inlines
1301 if ($l =~ /^<<\s*man$/i) {
1302 $gh->{'-inline'} = 'man';
1303 return;
1306 if ($l =~ /^>>$/) {
1307 delete $gh->{'-inline'};
1308 return;
1311 if ($gh->{'-inline'} eq 'man') {
1312 $gh->_push($l);
1317 sub _empty_line
1319 my ($gh) = @_;
1321 return ' ';
1325 sub _new_mode
1327 my ($gh,$mode,$params) = @_;
1329 if ($mode ne $gh->{'-mode'}) {
1330 my $tag;
1332 # flush previous list
1333 if ($gh->{'-mode'} eq 'pre' or
1334 $gh->{'-mode'} eq 'table') {
1335 $gh->_push('.fi');
1338 if ($gh->{'-mode'} eq 'blockquote') {
1339 $gh->_push('.RE');
1342 if ($gh->{'-mode'} eq 'ul') {
1343 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1346 if ($gh->{'-mode'} eq 'ol') {
1347 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1350 # send new one
1351 if ($mode eq 'pre' or $mode eq 'table') {
1352 $gh->_push('.nf');
1355 if ($mode eq 'blockquote') {
1356 $gh->_push('.RS 4');
1359 $gh->{'-mode'} = $mode;
1364 sub _dl
1366 my ($gh, $str) = @_;
1368 $gh->_new_mode('dl');
1369 return ".TP\n.B \"$str\"\n";
1373 sub _ul
1375 my ($gh, $levels) = @_;
1376 my ($ret) = '';
1378 if ($levels > 0) {
1379 $ret = ".RS 4\n";
1381 elsif ($levels < 0) {
1382 $ret = ".RE\n" x abs($levels);
1385 $gh->_new_mode('ul');
1386 return $ret . ".TP 4\n\\(bu\n";
1390 sub _ol
1392 my ($gh, $levels) = @_;
1393 my $l = @{$gh->{'-ol-levels'}};
1394 my $ret = '';
1396 $gh->{'-ol-level'} += $levels;
1398 if ($levels > 0) {
1399 $ret = ".RS 4\n";
1401 $l[$gh->{'-ol-level'}] = 1;
1403 elsif ($levels < 0) {
1404 $ret = ".RE\n" x abs($levels);
1407 $gh->_new_mode('ol');
1408 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1410 return $ret;
1414 sub _hr
1416 my ($gh) = @_;
1418 return '';
1422 sub _heading
1424 my ($gh, $level, $l) = @_;
1426 # all headers are the same depth in man pages
1427 return ".SH \"" . uc($l) . "\"";
1431 sub _table
1433 my ($gh, $str) = @_;
1435 if ($gh->{'-mode'} eq 'table') {
1436 foreach my $r (@{$gh->{'-table-raw'}}) {
1437 $gh->_push("|$r|");
1440 else {
1441 $gh->_new_mode('table');
1444 @{$gh->{'-table'}} = ();
1445 @{$gh->{'-table-raw'}} = ();
1447 $gh->_push($str);
1449 return '';
1453 sub _postfix
1458 ###########################################################
1459 # latex Driver
1461 package Grutatxt::latex;
1463 @ISA = ("Grutatxt");
1465 =head2 LaTeX Driver
1467 The additional parameters for a new Grutatxt object are:
1469 =over 4
1471 =item I<docclass>
1473 The LaTeX document class. By default is 'report'. You can also use
1474 'article' or 'book' (consult your LaTeX documentation for details).
1476 =item I<papersize>
1478 The paper size to be used in the document. By default is 'a4paper'.
1480 =item I<encoding>
1482 The character encoding used in the document. By default is 'latin1'.
1484 =back
1486 Note that you can't nest further than 4 levels in LaTeX; if you do,
1487 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1489 =cut
1491 sub new
1493 my ($class, %args) = @_;
1494 my ($gh);
1496 bless(\%args,$class);
1497 $gh = \%args;
1499 $gh->{'-process-urls'} = 0;
1501 $gh->{'-docclass'} ||= 'report';
1502 $gh->{'-papersize'} ||= 'a4paper';
1503 $gh->{'-encoding'} ||= 'latin1';
1505 return $gh;
1509 sub _prefix
1511 my ($gh) = @_;
1513 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1514 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1516 $gh->_push("\\begin{document}");
1520 sub _inline
1522 my ($gh, $l) = @_;
1524 # accept only latex inlines
1525 if ($l =~ /^<<\s*latex$/i) {
1526 $gh->{'-inline'} = 'latex';
1527 return;
1530 if ($l =~ /^>>$/) {
1531 delete $gh->{'-inline'};
1532 return;
1535 if ($gh->{'-inline'} eq 'latex') {
1536 $gh->_push($l);
1541 sub _escape
1543 my ($gh, $l) = @_;
1545 $l =~ s/ _ / \\_ /g;
1546 $l =~ s/ ~ / \\~ /g;
1547 $l =~ s/ & / \\& /g;
1549 return $l;
1553 sub _escape_post
1555 my ($gh, $l) = @_;
1557 $l =~ s/ # / \\# /g;
1558 $l =~ s/^\\n$//g;
1559 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1561 return $l;
1565 sub _empty_line
1567 my ($gh) = @_;
1569 return "\\n";
1573 sub _strong
1575 my ($gh, $str) = @_;
1576 return "\\textbf{$str}";
1580 sub _em
1582 my ($gh, $str) = @_;
1583 return "\\emph{$str}";
1587 sub _code
1589 my ($gh, $str) = @_;
1590 return "{\\tt $str}";
1594 sub _funcname
1596 my ($gh, $str) = @_;
1597 return "{\\tt $str}";
1601 sub _varname
1603 my ($gh, $str) = @_;
1605 $str =~ s/^\$/\\\$/;
1607 return "{\\tt $str}";
1611 sub _new_mode
1613 my ($gh, $mode, $params) = @_;
1615 # mode equivalences
1616 my %latex_modes = (
1617 'pre' => 'verbatim',
1618 'blockquote' => 'quote',
1619 'table' => 'tabular',
1620 'dl' => 'description',
1621 'ul' => 'itemize',
1622 'ol' => 'enumerate'
1625 if ($mode ne $gh->{'-mode'}) {
1626 # close previous mode
1627 if ($gh->{'-mode'} eq 'ul') {
1628 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1630 elsif ($gh->{'-mode'} eq 'ol') {
1631 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1633 elsif ($gh->{'-mode'} eq 'table') {
1634 $gh->_push("\\end{tabular}\n");
1636 else {
1637 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1638 if $gh->{'-mode'};
1641 # send new one
1642 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1643 if $mode;
1645 $gh->{'-mode'} = $mode;
1647 $gh->{'-ul-levels'} = undef;
1648 $gh->{'-ol-levels'} = undef;
1653 sub _dl
1655 my ($gh, $str) = @_;
1657 $gh->_new_mode('dl');
1658 return "\\item[$str]\n";
1662 sub _ul
1664 my ($gh, $levels) = @_;
1665 my ($ret);
1667 $ret = '';
1669 if ($levels > 0) {
1670 $ret .= "\\begin{itemize}\n";
1672 elsif ($levels < 0) {
1673 $ret .= "\\end{itemize}\n" x abs($levels);
1676 $gh->{'-mode'} = 'ul';
1678 $ret .= "\\item\n";
1680 return $ret;
1684 sub _ol
1686 my ($gh, $levels) = @_;
1687 my ($ret);
1689 $ret = '';
1691 if ($levels > 0) {
1692 $ret .= "\\begin{enumerate}\n";
1694 elsif ($levels < 0) {
1695 $ret .= "\\end{enumerate}\n" x abs($levels);
1698 $gh->{'-mode'} = 'ol';
1700 $ret .= "\\item\n";
1702 return $ret;
1706 sub _blockquote
1708 my ($gh) = @_;
1710 $gh->_new_mode('blockquote');
1711 return "``";
1715 sub _hr
1717 my ($gh) = @_;
1719 return "------------\n";
1723 sub _heading
1725 my ($gh, $level, $l) = @_;
1727 my @latex_headings = ( "\\section*{", "\\subsection*{",
1728 "\\subsubsection*{");
1730 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1732 return $l;
1736 sub _table
1738 my ($gh,$str) = @_;
1740 if ($gh->{'-mode'} eq 'table') {
1741 my ($class) = '';
1742 my (@spans) = $gh->_calc_col_span($str);
1743 my (@cols);
1745 $str = '';
1747 # build columns
1748 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1749 my ($i, $s);
1751 $i = ${$gh->{'-table'}}[$n];
1752 $i = "&nbsp;" if $i =~ /^\s*$/;
1754 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1756 # multispan columns
1757 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1758 if $spans[$n] > 1;
1760 $i =~ s/\s{2,}/ /g;
1761 $i =~ s/^\s+//;
1762 $i =~ s/\s+$//;
1764 push(@cols, $i);
1767 $str .= join('&', @cols) . "\\\\\n\\hline";
1769 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1771 @{$gh->{'-table'}} = ();
1772 $gh->{'-tbl-row'}++;
1774 else {
1775 # new table
1777 # count the number of columns
1778 $str =~ s/[^\+]//g;
1779 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1781 $gh->_push();
1782 $gh->_new_mode('table', $params);
1784 @{$gh->{'-table'}} = ();
1785 $gh->{'-tbl-row'} = 1;
1786 $str = '';
1789 return $str;
1793 sub _postfix
1795 my ($gh) = @_;
1797 $gh->_push("\\end{document}");
1801 =head1 AUTHOR
1803 Angel Ortega angel@triptico.com
1805 =cut