Small tunings to HTML.
[grutatxt.git] / Grutatxt.pm
blob2991164ddb3506e1a502918b325b33fdf2d5a1c4
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2007 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.14-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 A comprehensive description of the markup is defined in
66 the README file, included with the Grutatxt package (it is
67 written in Grutatxt format itself, so it can be converted
68 using the I<grutatxt> tool to any of the supported formats).
69 The latest version (and more information) can be retrieved
70 from the Grutatxt home page at:
72 http://www.triptico.com/software/grutatxt.html
74 =head1 FUNCTIONS AND METHODS
76 =head2 B<new>
78 $grutatxt = new Grutatxt([ "mode" => $mode, ]
79 [ "title" => \$title, ]
80 [ "marks" => \@marks, ]
81 [ "index" => \@index, ]
82 [ "abstract" => \$abstract, ]
83 [ "strip-parens" => $bool, ]
84 [ "strip-dollars" => $bool, ]
85 [ %driver_specific_arguments ] );
87 Creates a new Grutatxt object instance. All parameters are
88 optional.
90 =over 4
92 =item I<mode>
94 Output format. Can be HTML, troff or man. HTML is used if not specified.
96 =item I<title>
98 If I<title> is specified as a reference to scalar, the first
99 level 1 heading found in the text is stored inside it.
101 =item I<marks>
103 Marks in the Grutatxt markup are created by inserting the
104 string <-> alone in a line. If I<marks> is specified as a
105 reference to array, it will be filled with the subscripts
106 (relative to the output array) of the lines where the marks
107 are found in the text.
109 =item I<index>
111 If I<index> is specified as a reference to array, it will
112 be filled with strings in the format
114 level,heading
116 This information can be used to build a table of contents
117 of the processed text.
119 =item I<strip-parens>
121 Function names in the Grutatxt markup are strings of
122 alphanumeric characters immediately followed by a pair
123 of open and close parentheses. If this boolean value is
124 set, function names found in the processed text will have
125 their parentheses deleted.
127 =item I<strip-dollars>
129 Variable names in the Grutatxt markup are strings of
130 alphanumeric characters preceded by a dollar sign.
131 If this boolean value is set, variable names found in
132 the processed text will have the dollar sign deleted.
134 =item I<abstract>
136 The I<abstract> of a Grutatxt document is the fragment of text
137 from the beginning of the document to the end of the first
138 paragraph after the title. If I<abstract> is specified as a
139 reference to scalar, it will contain (after each call to the
140 B<process()> method) the subscript of the element of the output
141 array that marks the end of the subject.
143 =back
145 =cut
147 sub new
149 my ($class, %args) = @_;
150 my ($gh);
152 $args{'mode'} ||= 'HTML';
154 $class .= "::" . $args{'mode'};
156 $gh = new $class(%args);
158 return $gh;
162 sub escape
163 # escapes special characters, ignoring passthrough code
165 my ($gh, $l) = @_;
167 # splits between << and >>
168 my (@l) = split(/(<<|>>)/, $l);
170 @l = map {
171 my $l = $_;
173 # escape only text outside << and >>
174 unless ($l eq '<<' .. $l eq '>>') {
175 $l = $gh->_escape($l);
178 $_ = $l;
179 } @l;
181 # join again, stripping << and >>
182 $l = join('', grep(!/^(<<|>>)$/, @l));
184 return $l;
188 =head2 B<process>
190 @output = $grutatxt->process($text);
192 Processes a text in Grutatxt format. The result is returned
193 as an array of lines.
195 =cut
197 sub process
199 my ($gh, $content) = @_;
200 my ($p);
202 # clean output
203 @{$gh->{'o'}} = ();
205 # clean title and paragraph numbers
206 $gh->{'-title'} = '';
207 $gh->{'-p'} = 0;
209 # clean marks
210 @{$gh->{'marks'}} = () if ref($gh->{'marks'});
212 # clean index
213 @{$gh->{'index'}} = () if ref($gh->{'index'});
215 # reset abstract line
216 ${$gh->{'abstract'}} = 0 if ref($gh->{'abstract'});
218 # insert prefix
219 $gh->_prefix();
221 $gh->{'-mode'} = undef;
223 foreach my $l (split(/\n/,$content)) {
224 # inline data (passthrough)
225 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
226 $gh->_inline($l);
227 next;
230 # marks
231 if ($l =~ /^\s*<\->\s*$/) {
232 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
233 if ref($gh->{'marks'});
235 next;
238 # escape possibly dangerous characters
239 $l = $gh->escape($l);
241 # empty lines
242 $l =~ s/^\r$//ge;
243 if ($l =~ s/^$/$gh->_empty_line()/ge) {
244 # mark the abstract end
245 if ($gh->{'-title'}) {
246 $gh->{'-p'}++;
248 # mark abstract if it's the
249 # second paragraph from the title
250 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
251 if $gh->{'-p'} == 2;
255 if ($gh->{'-process-urls'}) {
256 # URLs followed by a parenthesized phrase
257 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
258 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
259 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
260 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
261 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
263 # URLs without phrase
264 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
265 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
266 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
267 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
269 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
270 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
271 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
272 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
275 # change '''text''' and *text* into strong emphasis
276 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
277 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
278 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
280 # change ''text'' and _text_ into emphasis
281 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
282 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
283 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
285 # change `text' into code
286 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
288 # enclose function names
289 if ($gh->{'strip-parens'}) {
290 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
292 else {
293 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
296 # enclose variable names
297 if ($gh->{'strip-dollars'}) {
298 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
300 else {
301 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
305 # main switch
308 # definition list
309 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
310 $gh->{'-mode-elems'} ++;
313 # unsorted list
314 elsif ($gh->{'-mode'} ne 'pre' and
315 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
316 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
317 $gh->{'-mode-elems'} ++;
320 # sorted list
321 elsif ($gh->{'-mode'} ne 'pre' and
322 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
323 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
324 $gh->{'-mode-elems'} ++;
327 # quoted block
328 elsif ($l =~ s/^\s\"/$gh->_blockquote()/e) {
331 # table rows
332 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
333 $gh->{'-mode-elems'} ++;
336 # table heading / end of row
337 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
340 # preformatted text
341 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
344 # anything else
345 else {
346 # back to normal mode
347 $gh->_new_mode(undef);
350 # 1 level heading
351 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
353 # 2 level heading
354 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
356 # 3 level heading
357 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
359 # change ------ into hr
360 $l =~ s/^----*$/$gh->_hr()/e;
362 # push finally
363 $gh->_push($l) if $l;
366 # flush
367 $gh->_new_mode(undef);
369 # postfix
370 $gh->_postfix();
372 # set title
373 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
375 # set abstract, if not set
376 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
377 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
379 # travel all lines again, post-escaping
380 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
382 return @{$gh->{'o'}};
386 =head2 B<process_file>
388 @output = $grutatxt->process_file($filename);
390 Processes a file in Grutatxt format.
392 =cut
394 sub process_file
396 my ($gh, $file) = @_;
398 open F, $file or return(undef);
400 my ($content) = join('',<F>);
401 close F;
403 return $gh->process($content);
407 sub _push
409 my ($gh, $l) = @_;
411 push(@{$gh->{'o'}},$l);
415 sub _process_heading
417 my ($gh, $level, $hd) = @_;
418 my ($l);
420 $l = pop(@{$gh->{'o'}});
422 if ($l eq $gh->_empty_line()) {
423 $gh->_push($l);
424 return $hd;
427 # store title
428 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
430 # store index
431 if (ref($gh->{'index'})) {
432 push(@{$gh->{'index'}},"$level,$l");
435 return $gh->_heading($level,$l);
439 sub _calc_col_span
441 my ($gh, $l) = @_;
442 my (@spans);
444 # strip first + and all -
445 $l =~ s/^\+//;
446 $l =~ s/-//g;
448 my ($t) = 1; @spans = ();
449 for (my $n = 0; $n < length($l); $n++) {
450 if (substr($l, $n, 1) eq '+') {
451 push(@spans, $t);
452 $t = 1;
454 else {
455 # it's a colspan mark:
456 # increment
457 $t++;
461 return @spans;
465 sub _table_row
467 my ($gh, $str) = @_;
469 my @s = split(/\|/,$str);
471 for (my $n = 0; $n < scalar(@s); $n++) {
472 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
475 push(@{$gh->{'-table-raw'}}, $str);
477 return '';
481 sub _pre
483 my ($gh, $l) = @_;
485 # if any other mode is active, add to it
486 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
487 $l =~ s/^\s+//;
489 my ($a) = pop(@{$gh->{'o'}})." ".$l;
490 $gh->_push($a);
491 $l = '';
493 else {
494 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
495 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
497 $gh->_new_mode('pre');
500 return $l;
504 sub _multilevel_list
506 my ($gh, $str, $ind) = @_;
507 my (@l,$level);
509 @l = @{$gh->{$str}};
510 $ind = length($ind);
511 $level = 0;
513 if ($l[-1] < $ind) {
514 # if last level is less indented, increase
515 # nesting level
516 push(@l, $ind);
517 $level++;
519 elsif ($l[-1] > $ind) {
520 # if last level is more indented, decrease
521 # levels until the same is found (or back to
522 # the beginning if not)
523 while (pop(@l)) {
524 $level--;
525 last if $l[-1] == $ind;
529 $gh->{$str} = \@l;
531 return $level;
535 sub _unsorted_list
537 my ($gh, $ind) = @_;
539 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
543 sub _ordered_list
545 my ($gh, $ind) = @_;
547 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
551 # empty stubs for falling through the superclass
553 sub _inline { my ($gh, $l) = @_; $l; }
554 sub _escape { my ($gh, $l) = @_; $l; }
555 sub _escape_post { my ($gh, $l) = @_; $l; }
556 sub _empty_line { my ($gh) = @_; ''; }
557 sub _url { my ($gh, $url, $label) = @_; ''; }
558 sub _strong { my ($gh, $str) = @_; $str; }
559 sub _em { my ($gh, $str) = @_; $str; }
560 sub _code { my ($gh, $str) = @_; $str; }
561 sub _funcname { my ($gh, $str) = @_; $str; }
562 sub _varname { my ($gh, $str) = @_; $str; }
563 sub _new_mode { my ($gh, $mode) = @_; }
564 sub _dl { my ($gh, $str) = @_; $str; }
565 sub _ul { my ($gh, $level) = @_; ''; }
566 sub _ol { my ($gh, $level) = @_; ''; }
567 sub _blockquote { my ($gh, $str) = @_; $str; }
568 sub _hr { my ($gh) = @_; ''; }
569 sub _heading { my ($gh, $level, $l) = @_; $l; }
570 sub _table { my ($gh, $str) = @_; $str; }
571 sub _prefix { my ($gh) = @_; }
572 sub _postfix { my ($gh) = @_; }
574 ###########################################################
576 =head1 DRIVER SPECIFIC INFORMATION
578 =cut
580 ###########################################################
581 # HTML Driver
583 package Grutatxt::HTML;
585 @ISA = ("Grutatxt");
587 =head2 HTML Driver
589 The additional parameters for a new Grutatxt object are:
591 =over 4
593 =item I<table-headers>
595 If this boolean value is set, the first row in tables
596 is assumed to be the heading and rendered using <th>
597 instead of <td> tags.
599 =item I<center-tables>
601 If this boolean value is set, tables are centered.
603 =item I<expand-tables>
605 If this boolean value is set, tables are expanded (width 100%).
607 =item I<dl-as-dl>
609 If this boolean value is set, definition lists will be
610 rendered using <dl>, <dt> and <dd> instead of tables.
612 =item I<header-offset>
614 Offset to be summed to the heading level when rendering
615 <h?> tags (default is 0).
617 =item I<class-oddeven>
619 If this boolean value is set, tables will be rendered
620 with an "oddeven" CSS class, and rows alternately classed
621 as "even" or "odd". If it's not set, no CSS class info
622 is added to tables.
624 =back
626 =cut
628 sub new
630 my ($class, %args) = @_;
631 my ($gh);
633 bless(\%args, $class);
634 $gh = \%args;
636 $gh->{'-process-urls'} = 1;
638 return $gh;
642 sub _inline
644 my ($gh, $l) = @_;
646 # accept unnamed and HTML inlines
647 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
648 $gh->{'-inline'} = 'HTML';
649 return;
652 if ($l =~ /^>>$/) {
653 delete $gh->{'-inline'};
654 return;
657 if ($gh->{'-inline'} eq 'HTML') {
658 $gh->_push($l);
663 sub _escape
665 my ($gh, $l) = @_;
667 $l =~ s/&/&amp;/g;
668 $l =~ s/</&lt;/g;
669 $l =~ s/>/&gt;/g;
671 return $l;
675 sub _empty_line
677 my ($gh) = @_;
679 return('<p>');
683 sub _url
685 my ($gh, $url, $label) = @_;
687 $label = $url unless $label;
689 return "<a href = '$url'>$label</a>";
693 sub _strong
695 my ($gh, $str) = @_;
696 return "<strong>$str</strong>";
700 sub _em
702 my ($gh, $str) = @_;
703 return "<em>$str</em>";
707 sub _code
709 my ($gh, $str) = @_;
710 return "<code class = 'literal'>$str</code>";
714 sub _funcname
716 my ($gh, $str) = @_;
717 return "<code class = 'funcname'>$str</code>";
721 sub _varname
723 my ($gh, $str) = @_;
724 return "<code class = 'var'>$str</code>";
728 sub _new_mode
730 my ($gh, $mode, $params) = @_;
732 if ($mode ne $gh->{'-mode'}) {
733 my $tag;
735 # clean list levels
736 if ($gh->{'-mode'} eq 'ul') {
737 $gh->_push('</ul>' x scalar(@{$gh->{'-ul-levels'}}));
739 elsif ($gh->{'-mode'} eq 'ol') {
740 $gh->_push('</ol>' x scalar(@{$gh->{'-ol-levels'}}));
742 elsif ($gh->{'-mode'}) {
743 $gh->_push("</$gh->{'-mode'}>");
746 # send new one
747 $tag = $params ? "<$mode $params>" : "<$mode>";
748 $gh->_push($tag) if $mode;
750 $gh->{'-mode'} = $mode;
751 $gh->{'-mode-elems'} = 0;
753 # clean previous lists
754 $gh->{'-ul-levels'} = undef;
755 $gh->{'-ol-levels'} = undef;
760 sub _dl
762 my ($gh, $str) = @_;
763 my ($ret) = '';
765 if ($gh->{'dl-as-dl'}) {
766 $gh->_new_mode('dl');
767 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
769 else {
770 $gh->_new_mode('table');
771 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
774 return $ret;
778 sub _ul
780 my ($gh, $levels) = @_;
781 my ($ret);
783 $ret = '';
785 if ($levels > 0) {
786 $ret .= '<ul>';
788 elsif ($levels < 0) {
789 $ret .= '</ul>' x abs($levels);
792 $gh->{'-mode'} = 'ul';
794 $ret .= '<li>';
796 return $ret;
800 sub _ol
802 my ($gh, $levels) = @_;
803 my ($ret);
805 $ret = '';
807 if ($levels > 0) {
808 $ret = '<ol>';
810 elsif ($levels < 0) {
811 $ret = '</ol>' x abs($levels);
814 $gh->{'-mode'} = 'ol';
816 $ret .= '<li>';
818 return $ret;
822 sub _blockquote
824 my ($gh) = @_;
826 $gh->_new_mode('blockquote');
827 return "\"";
831 sub _hr
833 my ($gh) = @_;
835 return "<hr size = '1' noshade = 'noshade'>";
839 sub _heading
841 my ($gh, $level, $l) = @_;
843 # creates a valid anchor
844 my ($a) = lc($l);
846 $a =~ s/[\"\'\/]//g;
847 $a =~ s/\s/_/g;
848 $a =~ s/<[^>]+>//g;
850 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
851 $a, $level+$gh->{'header-offset'},
852 $l, $level+$gh->{'header-offset'});
854 return $l;
858 sub _table
860 my ($gh, $str) = @_;
862 if ($gh->{'-mode'} eq 'table') {
863 my ($class) = '';
864 my (@spans) = $gh->_calc_col_span($str);
866 # calculate CSS class, if any
867 if ($gh->{'class-oddeven'}) {
868 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
871 $str = "<tr $class>";
873 # build columns
874 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
875 my ($i,$s);
877 $i = ${$gh->{'-table'}}[$n];
878 $i = "&nbsp;" if $i =~ /^\s*$/;
880 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
882 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
883 $str .= "<th $class $s>$i</th>";
885 else {
886 $str .= "<td $class $s>$i</td>";
890 $str .= '</tr>';
892 @{$gh->{'-table'}} = ();
893 $gh->{'-tbl-row'}++;
895 else {
896 # new table
897 my ($params);
899 $params = "border = '1'";
900 $params .= " width = '100\%'" if $gh->{'expand-tables'};
901 $params .= " align = 'center'" if $gh->{'center-tables'};
902 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
904 $gh->_new_mode('table', $params);
906 @{$gh->{'-table'}} = ();
907 $gh->{'-tbl-row'} = 1;
908 $str = '';
911 return $str;
915 ###########################################################
916 # troff Driver
918 package Grutatxt::troff;
920 @ISA = ("Grutatxt");
922 =head2 troff Driver
924 The troff driver uses the B<-me> macros and B<tbl>. A
925 good way to post-process this output (to PostScript in
926 the example) could be by using
928 groff -t -me -Tps
930 The additional parameters for a new Grutatxt object are:
932 =over 4
934 =item I<normal-size>
936 The point size of normal text. By default is 10.
938 =item I<heading-sizes>
940 This argument must be a reference to an array containing
941 the size in points of the 3 different heading levels. By
942 default, level sizes are [ 20, 18, 15 ].
944 =item I<table-type>
946 The type of table to be rendered by B<tbl>. Can be
947 I<allbox> (all lines rendered; this is the default value),
948 I<box> (only outlined) or I<doublebox> (only outlined by
949 a double line).
951 =back
953 =cut
955 sub new
957 my ($class, %args) = @_;
958 my ($gh);
960 bless(\%args,$class);
961 $gh = \%args;
963 $gh->{'-process-urls'} = 0;
965 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
966 $gh->{'normal-size'} ||= 10;
967 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
969 return $gh;
973 sub _prefix
975 my ($gh) = @_;
977 $gh->_push(".nr pp $gh->{'normal-size'}");
978 $gh->_push(".nh");
982 sub _inline
984 my ($gh,$l) = @_;
986 # accept only troff inlines
987 if ($l =~ /^<<\s*troff$/i) {
988 $gh->{'-inline'} = 'troff';
989 return;
992 if ($l =~ /^>>$/) {
993 delete $gh->{'-inline'};
994 return;
997 if ($gh->{'-inline'} eq 'troff') {
998 $gh->_push($l);
1003 sub _escape
1005 my ($gh,$l) = @_;
1007 $l =~ s/\\/\\\\/g;
1008 $l =~ s/^'/\\&'/;
1010 return $l;
1014 sub _empty_line
1016 my ($gh) = @_;
1018 return '.lp';
1022 sub _strong
1024 my ($gh, $str) = @_;
1025 return "\\fB$str\\fP";
1029 sub _em
1031 my ($gh, $str) = @_;
1032 return "\\fI$str\\fP";
1036 sub _code
1038 my ($gh, $str) = @_;
1039 return "\\fI$str\\fP";
1043 sub _funcname
1045 my ($gh, $str) = @_;
1046 return "\\fB$str\\fP";
1050 sub _varname
1052 my ($gh, $str) = @_;
1053 return "\\fI$str\\fP";
1057 sub _new_mode
1059 my ($gh, $mode, $params) = @_;
1061 if ($mode ne $gh->{'-mode'}) {
1062 my $tag;
1064 # flush previous list
1065 if ($gh->{'-mode'} eq 'pre') {
1066 $gh->_push('.)l');
1068 elsif ($gh->{'-mode'} eq 'table') {
1069 chomp($gh->{'-table-head'});
1070 $gh->{'-table-head'} =~ s/\s+$//;
1071 $gh->_push($gh->{'-table-head'} . '.');
1072 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1074 elsif ($gh->{'-mode'} eq 'blockquote') {
1075 $gh->_push('.)q');
1078 # send new one
1079 if ($mode eq 'pre') {
1080 $gh->_push('.(l L');
1082 elsif ($mode eq 'blockquote') {
1083 $gh->_push('.(q');
1086 $gh->{'-mode'} = $mode;
1091 sub _dl
1093 my ($gh, $str) = @_;
1095 $gh->_new_mode('dl');
1096 return ".ip \"$str\"\n";
1100 sub _ul
1102 my ($gh) = @_;
1104 $gh->_new_mode('ul');
1105 return ".bu\n";
1109 sub _ol
1111 my ($gh) = @_;
1113 $gh->_new_mode('ol');
1114 return ".np\n";
1118 sub _blockquote
1120 my ($gh) = @_;
1122 $gh->_new_mode('blockquote');
1123 return "\"";
1127 sub _hr
1129 my ($gh) = @_;
1131 return '.hl';
1135 sub _heading
1137 my ($gh, $level, $l) = @_;
1139 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1141 return $l;
1145 sub _table
1147 my ($gh, $str) = @_;
1149 if ($gh->{'-mode'} eq 'table') {
1150 my ($h, $b);
1151 my (@spans) = $gh->_calc_col_span($str);
1153 # build columns
1154 $h = '';
1155 $b = '';
1156 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1157 my ($i);
1159 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1160 $h .= 'cB ';
1162 else {
1163 $h .= 'l ';
1166 # add span columns
1167 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1169 $b .= '#' if $n;
1171 $i = ${$gh->{'-table'}}[$n];
1172 $i =~ s/^\s+//;
1173 $i =~ s/\s+$//;
1174 $i =~ s/(\s)+/$1/g;
1175 $b .= $i;
1178 # add a separator
1179 $b .= "\n_" if $gh->{'table-headers'} and
1180 $gh->{'-tbl-row'} == 1 and
1181 $gh->{'table-type'} ne "allbox";
1183 $gh->{'-table-head'} .= "$h\n";
1184 $gh->{'-table-body'} .= "$b\n";
1186 @{$gh->{'-table'}} = ();
1187 $gh->{'-tbl-row'}++;
1189 else {
1190 # new table
1191 $gh->_new_mode('table');
1193 @{$gh->{'-table'}} = ();
1194 $gh->{'-tbl-row'} = 1;
1196 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1197 $gh->{'-table-body'} = '';
1200 $str = '';
1201 return $str;
1205 sub _postfix
1207 my ($gh) = @_;
1209 # add to top headings and footers
1210 unshift(@{$gh->{'o'}},".ef '\%' ''");
1211 unshift(@{$gh->{'o'}},".of '' '\%'");
1212 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1213 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1217 ###########################################################
1218 # man Driver
1220 package Grutatxt::man;
1222 @ISA = ("Grutatxt::troff", "Grutatxt");
1224 =head2 man Driver
1226 The man driver is used to generate Unix-like man pages. Note that
1227 all headings have the same level with this output driver.
1229 The additional parameters for a new Grutatxt object are:
1231 =over 4
1233 =item I<section>
1235 The man page section (see man documentation). By default is 1.
1237 =item I<page-name>
1239 The name of the page. This is usually the name of the program
1240 or function the man page is documenting and will be shown in the
1241 page header. By default is the empty string.
1243 =back
1245 =cut
1247 sub new
1249 my ($class, %args) = @_;
1250 my ($gh);
1252 bless(\%args,$class);
1253 $gh = \%args;
1255 $gh->{'-process-urls'} = 0;
1257 $gh->{'section'} ||= 1;
1258 $gh->{'page-name'} ||= "";
1260 return $gh;
1264 sub _prefix
1266 my ($gh) = @_;
1268 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1272 sub _inline
1274 my ($gh, $l) = @_;
1276 # accept only man markup inlines
1277 if ($l =~ /^<<\s*man$/i) {
1278 $gh->{'-inline'} = 'man';
1279 return;
1282 if ($l =~ /^>>$/) {
1283 delete $gh->{'-inline'};
1284 return;
1287 if ($gh->{'-inline'} eq 'man') {
1288 $gh->_push($l);
1293 sub _empty_line
1295 my ($gh) = @_;
1297 return ' ';
1301 sub _new_mode
1303 my ($gh,$mode,$params) = @_;
1305 if ($mode ne $gh->{'-mode'}) {
1306 my $tag;
1308 # flush previous list
1309 if ($gh->{'-mode'} eq 'pre' or
1310 $gh->{'-mode'} eq 'table') {
1311 $gh->_push('.fi');
1314 if ($gh->{'-mode'} eq 'blockquote') {
1315 $gh->_push('.RE');
1318 if ($gh->{'-mode'} eq 'ul') {
1319 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1322 if ($gh->{'-mode'} eq 'ol') {
1323 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1326 # send new one
1327 if ($mode eq 'pre' or $mode eq 'table') {
1328 $gh->_push('.nf');
1331 if ($mode eq 'blockquote') {
1332 $gh->_push('.RS 4');
1335 $gh->{'-mode'} = $mode;
1340 sub _dl
1342 my ($gh, $str) = @_;
1344 $gh->_new_mode('dl');
1345 return ".TP\n.B \"$str\"\n";
1349 sub _ul
1351 my ($gh, $levels) = @_;
1352 my ($ret) = '';
1354 if ($levels > 0) {
1355 $ret = ".RS 4\n";
1357 elsif ($levels < 0) {
1358 $ret = ".RE\n" x abs($levels);
1361 $gh->_new_mode('ul');
1362 return $ret . ".TP 4\n\\(bu\n";
1366 sub _ol
1368 my ($gh, $levels) = @_;
1369 my $l = @{$gh->{'-ol-levels'}};
1370 my $ret = '';
1372 $gh->{'-ol-level'} += $levels;
1374 if ($levels > 0) {
1375 $ret = ".RS 4\n";
1377 $l[$gh->{'-ol-level'}] = 1;
1379 elsif ($levels < 0) {
1380 $ret = ".RE\n" x abs($levels);
1383 $gh->_new_mode('ol');
1384 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1386 return $ret;
1390 sub _hr
1392 my ($gh) = @_;
1394 return '';
1398 sub _heading
1400 my ($gh, $level, $l) = @_;
1402 # all headers are the same depth in man pages
1403 return ".SH \"" . uc($l) . "\"";
1407 sub _table
1409 my ($gh, $str) = @_;
1411 if ($gh->{'-mode'} eq 'table') {
1412 foreach my $r (@{$gh->{'-table-raw'}}) {
1413 $gh->_push("|$r|");
1416 else {
1417 $gh->_new_mode('table');
1420 @{$gh->{'-table'}} = ();
1421 @{$gh->{'-table-raw'}} = ();
1423 $gh->_push($str);
1425 return '';
1429 sub _postfix
1434 ###########################################################
1435 # latex Driver
1437 package Grutatxt::latex;
1439 @ISA = ("Grutatxt");
1441 =head2 LaTeX Driver
1443 The additional parameters for a new Grutatxt object are:
1445 =over 4
1447 =item I<docclass>
1449 The LaTeX document class. By default is 'report'. You can also use
1450 'article' or 'book' (consult your LaTeX documentation for details).
1452 =item I<papersize>
1454 The paper size to be used in the document. By default is 'a4paper'.
1456 =item I<encoding>
1458 The character encoding used in the document. By default is 'latin1'.
1460 =back
1462 Note that you can't nest further than 4 levels in LaTeX; if you do,
1463 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1465 =cut
1467 sub new
1469 my ($class, %args) = @_;
1470 my ($gh);
1472 bless(\%args,$class);
1473 $gh = \%args;
1475 $gh->{'-process-urls'} = 0;
1477 $gh->{'-docclass'} ||= 'report';
1478 $gh->{'-papersize'} ||= 'a4paper';
1479 $gh->{'-encoding'} ||= 'latin1';
1481 return $gh;
1485 sub _prefix
1487 my ($gh) = @_;
1489 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1490 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1492 $gh->_push("\\begin{document}");
1496 sub _inline
1498 my ($gh, $l) = @_;
1500 # accept only latex inlines
1501 if ($l =~ /^<<\s*latex$/i) {
1502 $gh->{'-inline'} = 'latex';
1503 return;
1506 if ($l =~ /^>>$/) {
1507 delete $gh->{'-inline'};
1508 return;
1511 if ($gh->{'-inline'} eq 'latex') {
1512 $gh->_push($l);
1517 sub _escape
1519 my ($gh, $l) = @_;
1521 $l =~ s/ _ / \\_ /g;
1522 $l =~ s/ ~ / \\~ /g;
1523 $l =~ s/ & / \\& /g;
1525 return $l;
1529 sub _escape_post
1531 my ($gh, $l) = @_;
1533 $l =~ s/ # / \\# /g;
1534 $l =~ s/^\\n$//g;
1535 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1537 return $l;
1541 sub _empty_line
1543 my ($gh) = @_;
1545 return "\\n";
1549 sub _strong
1551 my ($gh, $str) = @_;
1552 return "\\textbf{$str}";
1556 sub _em
1558 my ($gh, $str) = @_;
1559 return "\\emph{$str}";
1563 sub _code
1565 my ($gh, $str) = @_;
1566 return "{\\tt $str}";
1570 sub _funcname
1572 my ($gh, $str) = @_;
1573 return "{\\tt $str}";
1577 sub _varname
1579 my ($gh, $str) = @_;
1581 $str =~ s/^\$/\\\$/;
1583 return "{\\tt $str}";
1587 sub _new_mode
1589 my ($gh, $mode, $params) = @_;
1591 # mode equivalences
1592 my %latex_modes = (
1593 'pre' => 'verbatim',
1594 'blockquote' => 'quote',
1595 'table' => 'tabular',
1596 'dl' => 'description',
1597 'ul' => 'itemize',
1598 'ol' => 'enumerate'
1601 if ($mode ne $gh->{'-mode'}) {
1602 # close previous mode
1603 if ($gh->{'-mode'} eq 'ul') {
1604 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1606 elsif ($gh->{'-mode'} eq 'ol') {
1607 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1609 elsif ($gh->{'-mode'} eq 'table') {
1610 $gh->_push("\\end{tabular}\n");
1612 else {
1613 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1614 if $gh->{'-mode'};
1617 # send new one
1618 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1619 if $mode;
1621 $gh->{'-mode'} = $mode;
1623 $gh->{'-ul-levels'} = undef;
1624 $gh->{'-ol-levels'} = undef;
1629 sub _dl
1631 my ($gh, $str) = @_;
1633 $gh->_new_mode('dl');
1634 return "\\item[$str]\n";
1638 sub _ul
1640 my ($gh, $levels) = @_;
1641 my ($ret);
1643 $ret = '';
1645 if ($levels > 0) {
1646 $ret .= "\\begin{itemize}\n";
1648 elsif ($levels < 0) {
1649 $ret .= "\\end{itemize}\n" x abs($levels);
1652 $gh->{'-mode'} = 'ul';
1654 $ret .= "\\item\n";
1656 return $ret;
1660 sub _ol
1662 my ($gh, $levels) = @_;
1663 my ($ret);
1665 $ret = '';
1667 if ($levels > 0) {
1668 $ret .= "\\begin{enumerate}\n";
1670 elsif ($levels < 0) {
1671 $ret .= "\\end{enumerate}\n" x abs($levels);
1674 $gh->{'-mode'} = 'ol';
1676 $ret .= "\\item\n";
1678 return $ret;
1682 sub _blockquote
1684 my ($gh) = @_;
1686 $gh->_new_mode('blockquote');
1687 return "``";
1691 sub _hr
1693 my ($gh) = @_;
1695 return "------------\n";
1699 sub _heading
1701 my ($gh, $level, $l) = @_;
1703 my @latex_headings = ( "\\section*{", "\\subsection*{",
1704 "\\subsubsection*{");
1706 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1708 return $l;
1712 sub _table
1714 my ($gh,$str) = @_;
1716 if ($gh->{'-mode'} eq 'table') {
1717 my ($class) = '';
1718 my (@spans) = $gh->_calc_col_span($str);
1719 my (@cols);
1721 $str = '';
1723 # build columns
1724 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1725 my ($i, $s);
1727 $i = ${$gh->{'-table'}}[$n];
1728 $i = "&nbsp;" if $i =~ /^\s*$/;
1730 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1732 # multispan columns
1733 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1734 if $spans[$n] > 1;
1736 $i =~ s/\s{2,}/ /g;
1737 $i =~ s/^\s+//;
1738 $i =~ s/\s+$//;
1740 push(@cols, $i);
1743 $str .= join('&', @cols) . "\\\\\n\\hline";
1745 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1747 @{$gh->{'-table'}} = ();
1748 $gh->{'-tbl-row'}++;
1750 else {
1751 # new table
1753 # count the number of columns
1754 $str =~ s/[^\+]//g;
1755 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1757 $gh->_push();
1758 $gh->_new_mode('table', $params);
1760 @{$gh->{'-table'}} = ();
1761 $gh->{'-tbl-row'} = 1;
1762 $str = '';
1765 return $str;
1769 sub _postfix
1771 my ($gh) = @_;
1773 $gh->_push("\\end{document}");
1777 =head1 AUTHOR
1779 Angel Ortega angel@triptico.com
1781 =cut