Exit with return code 1 in usage().
[grutatxt.git] / Grutatxt.pm
blob7902f927844a4261047c164a3afc3bcce6ae6027
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2009 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.16-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://www.triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 new
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with two element arrayrefs with the level as first
115 argument and the heading as second.
117 This information can be used to build a table of contents
118 of the processed text.
120 =item I<strip-parens>
122 Function names in the Grutatxt markup are strings of
123 alphanumeric characters immediately followed by a pair
124 of open and close parentheses. If this boolean value is
125 set, function names found in the processed text will have
126 their parentheses deleted.
128 =item I<strip-dollars>
130 Variable names in the Grutatxt markup are strings of
131 alphanumeric characters preceded by a dollar sign.
132 If this boolean value is set, variable names found in
133 the processed text will have the dollar sign deleted.
135 =item I<abstract>
137 The I<abstract> of a Grutatxt document is the fragment of text
138 from the beginning of the document to the end of the first
139 paragraph after the title. If I<abstract> is specified as a
140 reference to scalar, it will contain (after each call to the
141 B<process()> method) the subscript of the element of the output
142 array that marks the end of the subject.
144 =item I<no-pure-verbatim>
146 Since version 2.0.15, text effects as italics and bold are not
147 processed in I<verbatim> (preformatted) mode. If you want to
148 revert to the old behaviour, use this option.
150 =item I<toc>
152 If set, a table of contents will be generated after the abstract.
153 The table of contents will be elaborated using headings from 2
154 and 3 levels.
156 =back
158 =cut
160 sub new
162 my ($class, %args) = @_;
163 my ($gh);
165 $args{'mode'} ||= 'HTML';
167 $class .= "::" . $args{'mode'};
169 $gh = new $class(%args);
171 return $gh;
175 sub escape
176 # escapes special characters, ignoring passthrough code
178 my ($gh, $l) = @_;
180 # splits between << and >>
181 my (@l) = split(/(<<|>>)/, $l);
183 @l = map {
184 my $l = $_;
186 # escape only text outside << and >>
187 unless ($l eq '<<' .. $l eq '>>') {
188 $l = $gh->_escape($l);
191 $_ = $l;
192 } @l;
194 # join again, stripping << and >>
195 $l = join('', grep(!/^(<<|>>)$/, @l));
197 return $l;
201 =head2 process
203 @output = $grutatxt->process($text);
205 Processes a text in Grutatxt format. The result is returned
206 as an array of lines.
208 =cut
210 sub process
212 my ($gh, $content) = @_;
213 my ($p);
215 # clean output
216 @{$gh->{'o'}} = ();
218 # clean title and paragraph numbers
219 $gh->{'-title'} = '';
220 $gh->{'-p'} = 0;
222 # clean marks
223 if (!$gh->{marks}) {
224 $gh->{marks} = \$gh->{_marks};
227 $gh->{'marks'} = [];
229 # clean index
230 if (!$gh->{index}) {
231 $gh->{index} = \$gh->{_index};
234 $gh->{'index'} = [];
236 # reset abstract line
237 if (!$gh->{abstract}) {
238 $gh->{abstract} = \$gh->{_abstract};
241 ${$gh->{'abstract'}} = 0;
243 # insert prefix
244 $gh->_prefix();
246 $gh->{'-mode'} = undef;
248 foreach my $l (split(/\n/,$content)) {
249 # inline data (passthrough)
250 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
251 $gh->_inline($l);
252 next;
255 # marks
256 if ($l =~ /^\s*<\->\s*$/) {
257 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
258 if ref($gh->{'marks'});
260 next;
263 # TOC mark
264 if ($l =~ /^\s*<\?>\s*$/) {
265 $gh->{toc} = $gh->{_toc_pos} = scalar(@{$gh->{o}});
266 next;
269 # escape possibly dangerous characters
270 $l = $gh->escape($l);
272 # empty lines
273 $l =~ s/^\r$//ge;
274 if ($l =~ s/^$/$gh->_empty_line()/ge) {
275 # mark the abstract end
276 if ($gh->{'-title'}) {
277 $gh->{'-p'}++;
279 # mark abstract if it's the
280 # second paragraph from the title
281 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
282 if $gh->{'-p'} == 2;
286 # line-mutating process
287 my $ol = $l;
289 if ($gh->{'-process-urls'}) {
290 # URLs followed by a parenthesized phrase
291 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
292 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
293 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
294 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
295 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
297 # URLs without phrase
298 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
299 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
300 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
301 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
303 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
304 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
305 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
306 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
309 # change '''text''' and *text* into strong emphasis
310 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
311 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
312 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
314 # change ''text'' and _text_ into emphasis
315 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
316 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
317 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
319 # change `text' into code
320 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
322 # enclose function names
323 if ($gh->{'strip-parens'}) {
324 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
326 else {
327 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
330 # enclose variable names
331 if ($gh->{'strip-dollars'}) {
332 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
334 else {
335 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
339 # main switch
342 # definition list
343 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
344 $gh->{'-mode-elems'} ++;
347 # unsorted list
348 elsif ($gh->{'-mode'} ne 'pre' and
349 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
350 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
351 $gh->{'-mode-elems'} ++;
354 # sorted list
355 elsif ($gh->{'-mode'} ne 'pre' and
356 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
357 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
358 $gh->{'-mode-elems'} ++;
361 # quoted block
362 elsif ($gh->{'-mode'} ne 'pre' and
363 $l =~ s/^\s\"/$gh->_blockquote()/e) {
366 # table rows
367 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
368 $gh->{'-mode-elems'} ++;
371 # table heading / end of row
372 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
375 # preformatted text
376 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
377 if ($gh->{'-mode'} eq 'pre' &&
378 !$gh->{'no-pure-verbatim'}) {
379 # set line back to original
380 $l = $ol;
384 # anything else
385 else {
386 # back to normal mode
387 $gh->_new_mode(undef);
390 # 1 level heading
391 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
393 # 2 level heading
394 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
396 # 3 level heading
397 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
399 # change ------ into hr
400 $l =~ s/^----*$/$gh->_hr()/e;
402 # push finally
403 $gh->_push($l) if $l;
406 # flush
407 $gh->_new_mode(undef);
409 # postfix
410 $gh->_postfix();
412 # set title
413 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
415 # set abstract, if not set
416 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
417 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
419 # travel all lines again, post-escaping
420 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
422 # add TOC after first paragraph
423 if ($gh->{toc} && @{$gh->{o}}) {
424 my $p = $gh->{_toc_pos} ||
425 $gh->{marks}->[0] ||
426 ${$gh->{abstract}};
428 @{$gh->{o}} = (@{$gh->{o}}[0 .. $p],
429 $gh->_toc(),
430 @{$gh->{o}}[$p + 1 ..
431 scalar(@{$gh->{o}})]);
434 return @{$gh->{'o'}};
438 =head2 process_file
440 @output = $grutatxt->process_file($filename);
442 Processes a file in Grutatxt format.
444 =cut
446 sub process_file
448 my ($gh, $file) = @_;
450 open F, $file or return(undef);
452 my ($content) = join('',<F>);
453 close F;
455 return $gh->process($content);
459 sub _push
461 my ($gh, $l) = @_;
463 push(@{$gh->{'o'}},$l);
467 sub _process_heading
469 my ($gh, $level, $hd) = @_;
470 my ($l);
472 $l = pop(@{$gh->{'o'}});
474 if ($l eq $gh->_empty_line()) {
475 $gh->_push($l);
476 return $hd;
479 # store title
480 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
482 # store index
483 if (ref($gh->{'index'})) {
484 push(@{$gh->{'index'}}, [ $level, $l ]);
487 return $gh->_heading($level,$l);
491 sub _calc_col_span
493 my ($gh, $l) = @_;
494 my (@spans);
496 # strip first + and all -
497 $l =~ s/^\+//;
498 $l =~ s/-//g;
500 my ($t) = 1; @spans = ();
501 for (my $n = 0; $n < length($l); $n++) {
502 if (substr($l, $n, 1) eq '+') {
503 push(@spans, $t);
504 $t = 1;
506 else {
507 # it's a colspan mark:
508 # increment
509 $t++;
513 return @spans;
517 sub _table_row
519 my ($gh, $str) = @_;
521 my @s = split(/\|/,$str);
523 for (my $n = 0; $n < scalar(@s); $n++) {
524 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
527 push(@{$gh->{'-table-raw'}}, $str);
529 return '';
533 sub _pre
535 my ($gh, $l) = @_;
537 # if any other mode is active, add to it
538 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
539 $l =~ s/^\s+//;
541 my ($a) = pop(@{$gh->{'o'}})." ".$l;
542 $gh->_push($a);
543 $l = '';
545 else {
546 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
547 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
549 $gh->_new_mode('pre');
552 return $l;
556 sub _multilevel_list
558 my ($gh, $str, $ind) = @_;
559 my (@l,$level);
561 @l = @{$gh->{$str}};
562 $ind = length($ind);
563 $level = 0;
565 if ($l[-1] < $ind) {
566 # if last level is less indented, increase
567 # nesting level
568 push(@l, $ind);
569 $level++;
571 elsif ($l[-1] > $ind) {
572 # if last level is more indented, decrease
573 # levels until the same is found (or back to
574 # the beginning if not)
575 while (pop(@l)) {
576 $level--;
577 last if $l[-1] == $ind;
581 $gh->{$str} = \@l;
583 return $level;
587 sub _unsorted_list
589 my ($gh, $ind) = @_;
591 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
595 sub _ordered_list
597 my ($gh, $ind) = @_;
599 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
603 # empty stubs for falling through the superclass
605 sub _inline { my ($gh, $l) = @_; $l; }
606 sub _escape { my ($gh, $l) = @_; $l; }
607 sub _escape_post { my ($gh, $l) = @_; $l; }
608 sub _empty_line { my ($gh) = @_; ''; }
609 sub _url { my ($gh, $url, $label) = @_; ''; }
610 sub _strong { my ($gh, $str) = @_; $str; }
611 sub _em { my ($gh, $str) = @_; $str; }
612 sub _code { my ($gh, $str) = @_; $str; }
613 sub _funcname { my ($gh, $str) = @_; $str; }
614 sub _varname { my ($gh, $str) = @_; $str; }
615 sub _new_mode { my ($gh, $mode) = @_; }
616 sub _dl { my ($gh, $str) = @_; $str; }
617 sub _ul { my ($gh, $level) = @_; ''; }
618 sub _ol { my ($gh, $level) = @_; ''; }
619 sub _blockquote { my ($gh, $str) = @_; $str; }
620 sub _hr { my ($gh) = @_; ''; }
621 sub _heading { my ($gh, $level, $l) = @_; $l; }
622 sub _table { my ($gh, $str) = @_; $str; }
623 sub _prefix { my ($gh) = @_; }
624 sub _postfix { my ($gh) = @_; }
625 sub _toc { my ($gh) = @_; return (); }
627 ###########################################################
629 =head1 DRIVER SPECIFIC INFORMATION
631 =cut
633 ###########################################################
634 # HTML Driver
636 package Grutatxt::HTML;
638 @ISA = ("Grutatxt");
640 =head2 HTML Driver
642 The additional parameters for a new Grutatxt object are:
644 =over 4
646 =item I<table-headers>
648 If this boolean value is set, the first row in tables
649 is assumed to be the heading and rendered using 'th'
650 instead of 'td' tags.
652 =item I<center-tables>
654 If this boolean value is set, tables are centered.
656 =item I<expand-tables>
658 If this boolean value is set, tables are expanded (width 100%).
660 =item I<dl-as-dl>
662 If this boolean value is set, definition lists will be
663 rendered using 'dl', 'dt' and 'dd' instead of tables.
665 =item I<header-offset>
667 Offset to be summed to the heading level when rendering
668 'h?' tags (default is 0).
670 =item I<class-oddeven>
672 If this boolean value is set, tables will be rendered
673 with an "oddeven" CSS class, and rows alternately classed
674 as "even" or "odd". If it's not set, no CSS class info
675 is added to tables.
677 =item I<url-label-max>
679 If an URL without label is given (that is, the URL itself
680 is used as the label), it's trimmed to have as much
681 characters as this value says. By default it's 80.
683 =back
685 =cut
687 sub new
689 my ($class, %args) = @_;
690 my ($gh);
692 bless(\%args, $class);
693 $gh = \%args;
695 $gh->{'-process-urls'} = 1;
696 $gh->{'url-label-max'} ||= 80;
698 return $gh;
702 sub _inline
704 my ($gh, $l) = @_;
706 # accept unnamed and HTML inlines
707 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
708 $gh->{'-inline'} = 'HTML';
709 return;
712 if ($l =~ /^>>$/) {
713 delete $gh->{'-inline'};
714 return;
717 if ($gh->{'-inline'} eq 'HTML') {
718 $gh->_push($l);
723 sub _escape
725 my ($gh, $l) = @_;
727 $l =~ s/&/&amp;/g;
728 $l =~ s/</&lt;/g;
729 $l =~ s/>/&gt;/g;
731 return $l;
735 sub _empty_line
737 my ($gh) = @_;
739 return('<p>');
743 sub _url
745 my ($gh, $url, $label) = @_;
747 if (!$label) {
748 $label = $url;
750 if (length($label) > $gh->{'url-label-max'}) {
751 $label = substr($label, 0,
752 $gh->{'url-label-max'}) . '...';
756 return "<a href = \"$url\">$label</a>";
760 sub _strong
762 my ($gh, $str) = @_;
763 return "<strong>$str</strong>";
767 sub _em
769 my ($gh, $str) = @_;
770 return "<em>$str</em>";
774 sub _code
776 my ($gh, $str) = @_;
777 return "<code class = 'literal'>$str</code>";
781 sub _funcname
783 my ($gh, $str) = @_;
784 return "<code class = 'funcname'>$str</code>";
788 sub _varname
790 my ($gh, $str) = @_;
791 return "<code class = 'var'>$str</code>";
795 sub _new_mode
797 my ($gh, $mode, $params) = @_;
799 if ($mode ne $gh->{'-mode'}) {
800 my $tag;
802 # clean list levels
803 if ($gh->{'-mode'} eq 'ul') {
804 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
806 elsif ($gh->{'-mode'} eq 'ol') {
807 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
809 elsif ($gh->{'-mode'}) {
810 $gh->_push("</$gh->{'-mode'}>");
813 # send new one
814 $tag = $params ? "<$mode $params>" : "<$mode>";
815 $gh->_push($tag) if $mode;
817 $gh->{'-mode'} = $mode;
818 $gh->{'-mode-elems'} = 0;
820 # clean previous lists
821 $gh->{'-ul-levels'} = undef;
822 $gh->{'-ol-levels'} = undef;
827 sub _dl
829 my ($gh, $str) = @_;
830 my ($ret) = '';
832 if ($gh->{'dl-as-dl'}) {
833 $gh->_new_mode('dl');
834 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
836 else {
837 $gh->_new_mode('table');
838 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
841 return $ret;
845 sub _ul
847 my ($gh, $levels) = @_;
848 my ($ret);
850 $ret = '';
852 if ($levels > 0) {
853 $ret .= '<ul>';
855 elsif ($levels < 0) {
856 $ret .= '</li></ul>' x abs($levels);
859 if ($gh->{'-mode'} ne 'ul') {
860 $gh->{'-mode'} = 'ul';
862 else {
863 $ret .= '</li>' if $levels <= 0;
866 $ret .= '<li>';
868 return $ret;
872 sub _ol
874 my ($gh, $levels) = @_;
875 my ($ret);
877 $ret = '';
879 if ($levels > 0) {
880 $ret .= '<ol>';
882 elsif ($levels < 0) {
883 $ret .= '</li></ol>' x abs($levels);
886 if ($gh->{'-mode'} ne 'ol') {
887 $gh->{'-mode'} = 'ol';
889 else {
890 $ret .= '</li>' if $levels <= 0;
893 $ret .= '<li>';
895 return $ret;
899 sub _blockquote
901 my ($gh) = @_;
903 $gh->_new_mode('blockquote');
904 return "\"";
908 sub _hr
910 my ($gh) = @_;
912 return "<hr size = '1' noshade = 'noshade'>";
916 sub __mkanchor
918 my $gh = shift;
919 my $a = shift;
921 $a = lc($a);
922 $a =~ s/[\"\'\/]//g;
923 $a =~ s/\s/_/g;
924 $a =~ s/<[^>]+>//g;
926 return $a;
930 sub _heading
932 my ($gh, $level, $l) = @_;
934 # creates a valid anchor
935 my $a = $gh->__mkanchor($l);
937 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
938 $a, $level+$gh->{'header-offset'},
939 $l, $level+$gh->{'header-offset'});
941 return $l;
945 sub _table
947 my ($gh, $str) = @_;
949 if ($gh->{'-mode'} eq 'table') {
950 my ($class) = '';
951 my (@spans) = $gh->_calc_col_span($str);
953 # calculate CSS class, if any
954 if ($gh->{'class-oddeven'}) {
955 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
958 $str = "<tr $class>";
960 # build columns
961 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
962 my ($i,$s);
964 $i = ${$gh->{'-table'}}[$n];
965 $i = "&nbsp;" if $i =~ /^\s*$/;
967 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
969 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
970 $str .= "<th $class $s>$i</th>";
972 else {
973 $str .= "<td $class $s>$i</td>";
977 $str .= '</tr>';
979 @{$gh->{'-table'}} = ();
980 $gh->{'-tbl-row'}++;
982 else {
983 # new table
984 my ($params);
986 $params = "border = '1'";
987 $params .= " width = '100\%'" if $gh->{'expand-tables'};
988 $params .= " align = 'center'" if $gh->{'center-tables'};
989 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
991 $gh->_new_mode('table', $params);
993 @{$gh->{'-table'}} = ();
994 $gh->{'-tbl-row'} = 1;
995 $str = '';
998 return $str;
1002 sub _toc
1004 my $gh = shift;
1005 my @t = ();
1007 push(@t, '<!-- TOC -->');
1009 my $l = 0;
1011 foreach my $e (@{$gh->{index}}) {
1012 # ignore level 1 headings
1013 if ($e->[0] == 1) {
1014 next;
1017 if ($l < $e->[0]) {
1018 push(@t, '<ol>');
1020 elsif ($l > $e->[0]) {
1021 push(@t, '</ol>');
1024 $l = $e->[0];
1026 push(@t, sprintf("<li><a href = '#%s'>%s</a></li>",
1027 $gh->__mkanchor($e->[1]), $e->[1]));
1030 while (--$l) {
1031 push(@t, '</ol>');
1034 return @t;
1037 ###########################################################
1038 # troff Driver
1040 package Grutatxt::troff;
1042 @ISA = ("Grutatxt");
1044 =head2 troff Driver
1046 The troff driver uses the B<-me> macros and B<tbl>. A
1047 good way to post-process this output (to PostScript in
1048 the example) could be by using
1050 groff -t -me -Tps
1052 The additional parameters for a new Grutatxt object are:
1054 =over 4
1056 =item I<normal-size>
1058 The point size of normal text. By default is 10.
1060 =item I<heading-sizes>
1062 This argument must be a reference to an array containing
1063 the size in points of the 3 different heading levels. By
1064 default, level sizes are [ 20, 18, 15 ].
1066 =item I<table-type>
1068 The type of table to be rendered by B<tbl>. Can be
1069 I<allbox> (all lines rendered; this is the default value),
1070 I<box> (only outlined) or I<doublebox> (only outlined by
1071 a double line).
1073 =back
1075 =cut
1077 sub new
1079 my ($class, %args) = @_;
1080 my ($gh);
1082 bless(\%args,$class);
1083 $gh = \%args;
1085 $gh->{'-process-urls'} = 0;
1087 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
1088 $gh->{'normal-size'} ||= 10;
1089 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
1091 return $gh;
1095 sub _prefix
1097 my ($gh) = @_;
1099 $gh->_push(".nr pp $gh->{'normal-size'}");
1100 $gh->_push(".nh");
1104 sub _inline
1106 my ($gh,$l) = @_;
1108 # accept only troff inlines
1109 if ($l =~ /^<<\s*troff$/i) {
1110 $gh->{'-inline'} = 'troff';
1111 return;
1114 if ($l =~ /^>>$/) {
1115 delete $gh->{'-inline'};
1116 return;
1119 if ($gh->{'-inline'} eq 'troff') {
1120 $gh->_push($l);
1125 sub _escape
1127 my ($gh,$l) = @_;
1129 $l =~ s/\\/\\\\/g;
1130 $l =~ s/^'/\\&'/;
1132 return $l;
1136 sub _empty_line
1138 my ($gh) = @_;
1140 return '.lp';
1144 sub _strong
1146 my ($gh, $str) = @_;
1147 return "\\fB$str\\fP";
1151 sub _em
1153 my ($gh, $str) = @_;
1154 return "\\fI$str\\fP";
1158 sub _code
1160 my ($gh, $str) = @_;
1161 return "\\fI$str\\fP";
1165 sub _funcname
1167 my ($gh, $str) = @_;
1168 return "\\fB$str\\fP";
1172 sub _varname
1174 my ($gh, $str) = @_;
1175 return "\\fI$str\\fP";
1179 sub _new_mode
1181 my ($gh, $mode, $params) = @_;
1183 if ($mode ne $gh->{'-mode'}) {
1184 my $tag;
1186 # flush previous list
1187 if ($gh->{'-mode'} eq 'pre') {
1188 $gh->_push('.)l');
1190 elsif ($gh->{'-mode'} eq 'table') {
1191 chomp($gh->{'-table-head'});
1192 $gh->{'-table-head'} =~ s/\s+$//;
1193 $gh->_push($gh->{'-table-head'} . '.');
1194 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1196 elsif ($gh->{'-mode'} eq 'blockquote') {
1197 $gh->_push('.)q');
1200 # send new one
1201 if ($mode eq 'pre') {
1202 $gh->_push('.(l L');
1204 elsif ($mode eq 'blockquote') {
1205 $gh->_push('.(q');
1208 $gh->{'-mode'} = $mode;
1213 sub _dl
1215 my ($gh, $str) = @_;
1217 $gh->_new_mode('dl');
1218 return ".ip \"$str\"\n";
1222 sub _ul
1224 my ($gh) = @_;
1226 $gh->_new_mode('ul');
1227 return ".bu\n";
1231 sub _ol
1233 my ($gh) = @_;
1235 $gh->_new_mode('ol');
1236 return ".np\n";
1240 sub _blockquote
1242 my ($gh) = @_;
1244 $gh->_new_mode('blockquote');
1245 return "\"";
1249 sub _hr
1251 my ($gh) = @_;
1253 return '.hl';
1257 sub _heading
1259 my ($gh, $level, $l) = @_;
1261 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1263 return $l;
1267 sub _table
1269 my ($gh, $str) = @_;
1271 if ($gh->{'-mode'} eq 'table') {
1272 my ($h, $b);
1273 my (@spans) = $gh->_calc_col_span($str);
1275 # build columns
1276 $h = '';
1277 $b = '';
1278 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1279 my ($i);
1281 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1282 $h .= 'cB ';
1284 else {
1285 $h .= 'l ';
1288 # add span columns
1289 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1291 $b .= '#' if $n;
1293 $i = ${$gh->{'-table'}}[$n];
1294 $i =~ s/^\s+//;
1295 $i =~ s/\s+$//;
1296 $i =~ s/(\s)+/$1/g;
1297 $b .= $i;
1300 # add a separator
1301 $b .= "\n_" if $gh->{'table-headers'} and
1302 $gh->{'-tbl-row'} == 1 and
1303 $gh->{'table-type'} ne "allbox";
1305 $gh->{'-table-head'} .= "$h\n";
1306 $gh->{'-table-body'} .= "$b\n";
1308 @{$gh->{'-table'}} = ();
1309 $gh->{'-tbl-row'}++;
1311 else {
1312 # new table
1313 $gh->_new_mode('table');
1315 @{$gh->{'-table'}} = ();
1316 $gh->{'-tbl-row'} = 1;
1318 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1319 $gh->{'-table-body'} = '';
1322 $str = '';
1323 return $str;
1327 sub _postfix
1329 my ($gh) = @_;
1331 # add to top headings and footers
1332 unshift(@{$gh->{'o'}},".ef '\%' ''");
1333 unshift(@{$gh->{'o'}},".of '' '\%'");
1334 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1335 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1339 ###########################################################
1340 # man Driver
1342 package Grutatxt::man;
1344 @ISA = ("Grutatxt::troff", "Grutatxt");
1346 =head2 man Driver
1348 The man driver is used to generate Unix-like man pages. Note that
1349 all headings have the same level with this output driver.
1351 The additional parameters for a new Grutatxt object are:
1353 =over 4
1355 =item I<section>
1357 The man page section (see man documentation). By default is 1.
1359 =item I<page-name>
1361 The name of the page. This is usually the name of the program
1362 or function the man page is documenting and will be shown in the
1363 page header. By default is the empty string.
1365 =back
1367 =cut
1369 sub new
1371 my ($class, %args) = @_;
1372 my ($gh);
1374 bless(\%args,$class);
1375 $gh = \%args;
1377 $gh->{'-process-urls'} = 0;
1379 $gh->{'section'} ||= 1;
1380 $gh->{'page-name'} ||= "";
1382 return $gh;
1386 sub _prefix
1388 my ($gh) = @_;
1390 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1394 sub _inline
1396 my ($gh, $l) = @_;
1398 # accept only man markup inlines
1399 if ($l =~ /^<<\s*man$/i) {
1400 $gh->{'-inline'} = 'man';
1401 return;
1404 if ($l =~ /^>>$/) {
1405 delete $gh->{'-inline'};
1406 return;
1409 if ($gh->{'-inline'} eq 'man') {
1410 $gh->_push($l);
1415 sub _empty_line
1417 my ($gh) = @_;
1419 return ' ';
1423 sub _new_mode
1425 my ($gh,$mode,$params) = @_;
1427 if ($mode ne $gh->{'-mode'}) {
1428 my $tag;
1430 # flush previous list
1431 if ($gh->{'-mode'} eq 'pre' or
1432 $gh->{'-mode'} eq 'table') {
1433 $gh->_push('.fi');
1436 if ($gh->{'-mode'} eq 'blockquote') {
1437 $gh->_push('.RE');
1440 if ($gh->{'-mode'} eq 'ul') {
1441 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1444 if ($gh->{'-mode'} eq 'ol') {
1445 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1448 # send new one
1449 if ($mode eq 'pre' or $mode eq 'table') {
1450 $gh->_push('.nf');
1453 if ($mode eq 'blockquote') {
1454 $gh->_push('.RS 4');
1457 $gh->{'-mode'} = $mode;
1462 sub _dl
1464 my ($gh, $str) = @_;
1466 $gh->_new_mode('dl');
1467 return ".TP\n.B \"$str\"\n";
1471 sub _ul
1473 my ($gh, $levels) = @_;
1474 my ($ret) = '';
1476 if ($levels > 0) {
1477 $ret = ".RS 4\n";
1479 elsif ($levels < 0) {
1480 $ret = ".RE\n" x abs($levels);
1483 $gh->_new_mode('ul');
1484 return $ret . ".TP 4\n\\(bu\n";
1488 sub _ol
1490 my ($gh, $levels) = @_;
1491 my $l = @{$gh->{'-ol-levels'}};
1492 my $ret = '';
1494 $gh->{'-ol-level'} += $levels;
1496 if ($levels > 0) {
1497 $ret = ".RS 4\n";
1499 $l[$gh->{'-ol-level'}] = 1;
1501 elsif ($levels < 0) {
1502 $ret = ".RE\n" x abs($levels);
1505 $gh->_new_mode('ol');
1506 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1508 return $ret;
1512 sub _hr
1514 my ($gh) = @_;
1516 return '';
1520 sub _heading
1522 my ($gh, $level, $l) = @_;
1524 # all headers are the same depth in man pages
1525 return ".SH \"" . uc($l) . "\"";
1529 sub _table
1531 my ($gh, $str) = @_;
1533 if ($gh->{'-mode'} eq 'table') {
1534 foreach my $r (@{$gh->{'-table-raw'}}) {
1535 $gh->_push("|$r|");
1538 else {
1539 $gh->_new_mode('table');
1542 @{$gh->{'-table'}} = ();
1543 @{$gh->{'-table-raw'}} = ();
1545 $gh->_push($str);
1547 return '';
1551 sub _postfix
1556 ###########################################################
1557 # latex Driver
1559 package Grutatxt::latex;
1561 @ISA = ("Grutatxt");
1563 =head2 LaTeX Driver
1565 The additional parameters for a new Grutatxt object are:
1567 =over 4
1569 =item I<docclass>
1571 The LaTeX document class. By default is 'report'. You can also use
1572 'article' or 'book' (consult your LaTeX documentation for details).
1574 =item I<papersize>
1576 The paper size to be used in the document. By default is 'a4paper'.
1578 =item I<encoding>
1580 The character encoding used in the document. By default is 'latin1'.
1582 =back
1584 Note that you can't nest further than 4 levels in LaTeX; if you do,
1585 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1587 =cut
1589 sub new
1591 my ($class, %args) = @_;
1592 my ($gh);
1594 bless(\%args,$class);
1595 $gh = \%args;
1597 $gh->{'-process-urls'} = 0;
1599 $gh->{'-docclass'} ||= 'report';
1600 $gh->{'-papersize'} ||= 'a4paper';
1601 $gh->{'-encoding'} ||= 'latin1';
1603 return $gh;
1607 sub _prefix
1609 my ($gh) = @_;
1611 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1612 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1614 $gh->_push("\\begin{document}");
1618 sub _inline
1620 my ($gh, $l) = @_;
1622 # accept only latex inlines
1623 if ($l =~ /^<<\s*latex$/i) {
1624 $gh->{'-inline'} = 'latex';
1625 return;
1628 if ($l =~ /^>>$/) {
1629 delete $gh->{'-inline'};
1630 return;
1633 if ($gh->{'-inline'} eq 'latex') {
1634 $gh->_push($l);
1639 sub _escape
1641 my ($gh, $l) = @_;
1643 $l =~ s/ _ / \\_ /g;
1644 $l =~ s/ ~ / \\~ /g;
1645 $l =~ s/ & / \\& /g;
1647 return $l;
1651 sub _escape_post
1653 my ($gh, $l) = @_;
1655 $l =~ s/ # / \\# /g;
1656 $l =~ s/^\\n$//g;
1657 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1659 return $l;
1663 sub _empty_line
1665 my ($gh) = @_;
1667 return "\\n";
1671 sub _strong
1673 my ($gh, $str) = @_;
1674 return "\\textbf{$str}";
1678 sub _em
1680 my ($gh, $str) = @_;
1681 return "\\emph{$str}";
1685 sub _code
1687 my ($gh, $str) = @_;
1688 return "{\\tt $str}";
1692 sub _funcname
1694 my ($gh, $str) = @_;
1695 return "{\\tt $str}";
1699 sub _varname
1701 my ($gh, $str) = @_;
1703 $str =~ s/^\$/\\\$/;
1705 return "{\\tt $str}";
1709 sub _new_mode
1711 my ($gh, $mode, $params) = @_;
1713 # mode equivalences
1714 my %latex_modes = (
1715 'pre' => 'verbatim',
1716 'blockquote' => 'quote',
1717 'table' => 'tabular',
1718 'dl' => 'description',
1719 'ul' => 'itemize',
1720 'ol' => 'enumerate'
1723 if ($mode ne $gh->{'-mode'}) {
1724 # close previous mode
1725 if ($gh->{'-mode'} eq 'ul') {
1726 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1728 elsif ($gh->{'-mode'} eq 'ol') {
1729 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1731 elsif ($gh->{'-mode'} eq 'table') {
1732 $gh->_push("\\end{tabular}\n");
1734 else {
1735 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1736 if $gh->{'-mode'};
1739 # send new one
1740 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1741 if $mode;
1743 $gh->{'-mode'} = $mode;
1745 $gh->{'-ul-levels'} = undef;
1746 $gh->{'-ol-levels'} = undef;
1751 sub _dl
1753 my ($gh, $str) = @_;
1755 $gh->_new_mode('dl');
1756 return "\\item[$str]\n";
1760 sub _ul
1762 my ($gh, $levels) = @_;
1763 my ($ret);
1765 $ret = '';
1767 if ($levels > 0) {
1768 $ret .= "\\begin{itemize}\n";
1770 elsif ($levels < 0) {
1771 $ret .= "\\end{itemize}\n" x abs($levels);
1774 $gh->{'-mode'} = 'ul';
1776 $ret .= "\\item\n";
1778 return $ret;
1782 sub _ol
1784 my ($gh, $levels) = @_;
1785 my ($ret);
1787 $ret = '';
1789 if ($levels > 0) {
1790 $ret .= "\\begin{enumerate}\n";
1792 elsif ($levels < 0) {
1793 $ret .= "\\end{enumerate}\n" x abs($levels);
1796 $gh->{'-mode'} = 'ol';
1798 $ret .= "\\item\n";
1800 return $ret;
1804 sub _blockquote
1806 my ($gh) = @_;
1808 $gh->_new_mode('blockquote');
1809 return "``";
1813 sub _hr
1815 my ($gh) = @_;
1817 return "------------\n";
1821 sub _heading
1823 my ($gh, $level, $l) = @_;
1825 my @latex_headings = ( "\\section*{", "\\subsection*{",
1826 "\\subsubsection*{");
1828 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1830 return $l;
1834 sub _table
1836 my ($gh,$str) = @_;
1838 if ($gh->{'-mode'} eq 'table') {
1839 my ($class) = '';
1840 my (@spans) = $gh->_calc_col_span($str);
1841 my (@cols);
1843 $str = '';
1845 # build columns
1846 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1847 my ($i, $s);
1849 $i = ${$gh->{'-table'}}[$n];
1850 $i = "&nbsp;" if $i =~ /^\s*$/;
1852 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1854 # multispan columns
1855 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1856 if $spans[$n] > 1;
1858 $i =~ s/\s{2,}/ /g;
1859 $i =~ s/^\s+//;
1860 $i =~ s/\s+$//;
1862 push(@cols, $i);
1865 $str .= join('&', @cols) . "\\\\\n\\hline";
1867 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1869 @{$gh->{'-table'}} = ();
1870 $gh->{'-tbl-row'}++;
1872 else {
1873 # new table
1875 # count the number of columns
1876 $str =~ s/[^\+]//g;
1877 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1879 $gh->_push();
1880 $gh->_new_mode('table', $params);
1882 @{$gh->{'-table'}} = ();
1883 $gh->{'-tbl-row'} = 1;
1884 $str = '';
1887 return $str;
1891 sub _postfix
1893 my ($gh) = @_;
1895 $gh->_push("\\end{document}");
1899 =head1 AUTHOR
1901 Angel Ortega angel@triptico.com
1903 =cut