Added 'ul' support to RTF (non-working).
[grutatxt.git] / Grutatxt.pm
blob4110a0385fb141f58417ae2c6d04fcbe8469bbfe
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2009 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.16-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://www.triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 new
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with two element arrayrefs with the level as first
115 argument and the heading as second.
117 This information can be used to build a table of contents
118 of the processed text.
120 =item I<strip-parens>
122 Function names in the Grutatxt markup are strings of
123 alphanumeric characters immediately followed by a pair
124 of open and close parentheses. If this boolean value is
125 set, function names found in the processed text will have
126 their parentheses deleted.
128 =item I<strip-dollars>
130 Variable names in the Grutatxt markup are strings of
131 alphanumeric characters preceded by a dollar sign.
132 If this boolean value is set, variable names found in
133 the processed text will have the dollar sign deleted.
135 =item I<abstract>
137 The I<abstract> of a Grutatxt document is the fragment of text
138 from the beginning of the document to the end of the first
139 paragraph after the title. If I<abstract> is specified as a
140 reference to scalar, it will contain (after each call to the
141 B<process()> method) the subscript of the element of the output
142 array that marks the end of the subject.
144 =item I<no-pure-verbatim>
146 Since version 2.0.15, text effects as italics and bold are not
147 processed in I<verbatim> (preformatted) mode. If you want to
148 revert to the old behaviour, use this option.
150 =item I<toc>
152 If set, a table of contents will be generated after the abstract.
153 The table of contents will be elaborated using headings from 2
154 and 3 levels.
156 =back
158 =cut
160 sub new
162 my ($class, %args) = @_;
163 my ($gh);
165 $args{'mode'} ||= 'HTML';
167 $class .= "::" . $args{'mode'};
169 $gh = new $class(%args);
171 return $gh;
175 sub escape
176 # escapes special characters, ignoring passthrough code
178 my ($gh, $l) = @_;
180 # splits between << and >>
181 my (@l) = split(/(<<|>>)/, $l);
183 @l = map {
184 my $l = $_;
186 # escape only text outside << and >>
187 unless ($l eq '<<' .. $l eq '>>') {
188 $l = $gh->_escape($l);
191 $_ = $l;
192 } @l;
194 # join again, stripping << and >>
195 $l = join('', grep(!/^(<<|>>)$/, @l));
197 return $l;
201 =head2 process
203 @output = $grutatxt->process($text);
205 Processes a text in Grutatxt format. The result is returned
206 as an array of lines.
208 =cut
210 sub process
212 my ($gh, $content) = @_;
213 my ($p);
215 # clean output
216 @{$gh->{'o'}} = ();
218 # clean title and paragraph numbers
219 $gh->{'-title'} = '';
220 $gh->{'-p'} = 0;
222 # clean marks
223 if (!defined $gh->{marks}) {
224 $gh->{marks} = [];
227 @{$gh->{'marks'}} = ();
229 # clean index
230 if (!$gh->{index}) {
231 $gh->{index} = [];
234 @{$gh->{'index'}} = ();
236 # reset abstract line
237 if (!$gh->{abstract}) {
238 $gh->{abstract} = \$gh->{_abstract};
241 ${$gh->{'abstract'}} = 0;
243 # insert prefix
244 $gh->_prefix();
246 $gh->{'-mode'} = undef;
248 foreach my $l (split(/\n/,$content)) {
249 # inline data (passthrough)
250 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
251 $gh->_inline($l);
252 next;
255 # marks
256 if ($l =~ /^\s*<\->\s*$/) {
257 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
258 if ref($gh->{'marks'});
260 next;
263 # TOC mark
264 if ($l =~ /^\s*<\?>\s*$/) {
265 $gh->{toc} = $gh->{_toc_pos} = scalar(@{$gh->{o}});
266 next;
269 # escape possibly dangerous characters
270 $l = $gh->escape($l);
272 # empty lines
273 $l =~ s/^\r$//ge;
274 if ($l =~ s/^$/$gh->_empty_line()/ge) {
275 # mark the abstract end
276 if ($gh->{'-title'}) {
277 $gh->{'-p'}++;
279 # mark abstract if it's the
280 # second paragraph from the title
281 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
282 if $gh->{'-p'} == 2;
286 # line-mutating process
287 my $ol = $l;
289 if ($gh->{'-process-urls'}) {
290 # URLs followed by a parenthesized phrase
291 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
292 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
293 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
294 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
295 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
297 # URLs without phrase
298 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
299 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
300 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
301 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
303 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
304 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
305 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
306 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
309 # change '''text''' and *text* into strong emphasis
310 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
311 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
312 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
314 # change ''text'' and _text_ into emphasis
315 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
316 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
317 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
319 # change `text' into code
320 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
322 # enclose function names
323 if ($gh->{'strip-parens'}) {
324 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
326 else {
327 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
330 # enclose variable names
331 if ($gh->{'strip-dollars'}) {
332 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
334 else {
335 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
339 # main switch
342 # definition list
343 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
344 $gh->{'-mode-elems'} ++;
347 # unsorted list
348 elsif ($gh->{'-mode'} ne 'pre' and
349 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
350 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
351 $gh->{'-mode-elems'} ++;
354 # sorted list
355 elsif ($gh->{'-mode'} ne 'pre' and
356 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
357 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
358 $gh->{'-mode-elems'} ++;
361 # quoted block
362 elsif ($gh->{'-mode'} ne 'pre' and
363 $l =~ s/^\s\"/$gh->_blockquote()/e) {
366 # table rows
367 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
368 $gh->{'-mode-elems'} ++;
371 # table heading / end of row
372 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
375 # preformatted text
376 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
377 if ($gh->{'-mode'} eq 'pre' &&
378 !$gh->{'no-pure-verbatim'}) {
379 # set line back to original
380 $l = $ol;
384 # anything else
385 else {
386 # back to normal mode
387 $gh->_new_mode(undef);
390 # 1 level heading
391 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
393 # 2 level heading
394 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
396 # 3 level heading
397 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
399 # change ------ into hr
400 $l =~ s/^----*$/$gh->_hr()/e;
402 # push finally
403 $gh->_push($l) if $l;
406 # flush
407 $gh->_new_mode(undef);
409 # postfix
410 $gh->_postfix();
412 # set title
413 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
415 # set abstract, if not set
416 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
417 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
419 # travel all lines again, post-escaping
420 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
422 # add TOC after first paragraph
423 if ($gh->{toc} && @{$gh->{o}}) {
424 my $p = $gh->{_toc_pos} ||
425 $gh->{marks}->[0] ||
426 ${$gh->{abstract}};
428 @{$gh->{o}} = (@{$gh->{o}}[0 .. $p],
429 $gh->_toc(),
430 @{$gh->{o}}[$p + 1 ..
431 scalar(@{$gh->{o}})]);
434 return @{$gh->{'o'}};
438 =head2 process_file
440 @output = $grutatxt->process_file($filename);
442 Processes a file in Grutatxt format.
444 =cut
446 sub process_file
448 my ($gh, $file) = @_;
450 open F, $file or return(undef);
452 my ($content) = join('',<F>);
453 close F;
455 return $gh->process($content);
459 sub _push
461 my ($gh, $l) = @_;
463 push(@{$gh->{'o'}},$l);
467 sub _process_heading
469 my ($gh, $level, $hd) = @_;
470 my ($l);
472 $l = pop(@{$gh->{'o'}});
474 if ($l eq $gh->_empty_line()) {
475 $gh->_push($l);
476 return $hd;
479 # store title
480 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
482 # store index
483 if (ref($gh->{'index'})) {
484 push(@{$gh->{'index'}}, [ $level, $l ]);
487 return $gh->_heading($level,$l);
491 sub _calc_col_span
493 my ($gh, $l) = @_;
494 my (@spans);
496 # strip first + and all -
497 $l =~ s/^\+//;
498 $l =~ s/-//g;
500 my ($t) = 1; @spans = ();
501 for (my $n = 0; $n < length($l); $n++) {
502 if (substr($l, $n, 1) eq '+') {
503 push(@spans, $t);
504 $t = 1;
506 else {
507 # it's a colspan mark:
508 # increment
509 $t++;
513 return @spans;
517 sub _table_row
519 my ($gh, $str) = @_;
521 my @s = split(/\|/,$str);
523 for (my $n = 0; $n < scalar(@s); $n++) {
524 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
527 push(@{$gh->{'-table-raw'}}, $str);
529 return '';
533 sub _pre
535 my ($gh, $l) = @_;
537 # if any other mode is active, add to it
538 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
539 $l =~ s/^\s+//;
541 my ($a) = pop(@{$gh->{'o'}})." ".$l;
542 $gh->_push($a);
543 $l = '';
545 else {
546 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
547 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
549 $gh->_new_mode('pre');
552 return $l;
556 sub _multilevel_list
558 my ($gh, $str, $ind) = @_;
559 my (@l,$level);
561 @l = @{$gh->{$str}};
562 $ind = length($ind);
563 $level = 0;
565 if ($l[-1] < $ind) {
566 # if last level is less indented, increase
567 # nesting level
568 push(@l, $ind);
569 $level++;
571 elsif ($l[-1] > $ind) {
572 # if last level is more indented, decrease
573 # levels until the same is found (or back to
574 # the beginning if not)
575 while (pop(@l)) {
576 $level--;
577 last if $l[-1] == $ind;
581 $gh->{$str} = \@l;
583 return $level;
587 sub _unsorted_list
589 my ($gh, $ind) = @_;
591 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
595 sub _ordered_list
597 my ($gh, $ind) = @_;
599 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
603 # empty stubs for falling through the superclass
605 sub _inline { my ($gh, $l) = @_; $l; }
606 sub _escape { my ($gh, $l) = @_; $l; }
607 sub _escape_post { my ($gh, $l) = @_; $l; }
608 sub _empty_line { my ($gh) = @_; ''; }
609 sub _url { my ($gh, $url, $label) = @_; ''; }
610 sub _strong { my ($gh, $str) = @_; $str; }
611 sub _em { my ($gh, $str) = @_; $str; }
612 sub _code { my ($gh, $str) = @_; $str; }
613 sub _funcname { my ($gh, $str) = @_; $str; }
614 sub _varname { my ($gh, $str) = @_; $str; }
615 sub _new_mode { my ($gh, $mode) = @_; }
616 sub _dl { my ($gh, $str) = @_; $str; }
617 sub _ul { my ($gh, $level) = @_; ''; }
618 sub _ol { my ($gh, $level) = @_; ''; }
619 sub _blockquote { my ($gh, $str) = @_; $str; }
620 sub _hr { my ($gh) = @_; ''; }
621 sub _heading { my ($gh, $level, $l) = @_; $l; }
622 sub _table { my ($gh, $str) = @_; $str; }
623 sub _prefix { my ($gh) = @_; }
624 sub _postfix { my ($gh) = @_; }
625 sub _toc { my ($gh) = @_; return (); }
627 ###########################################################
629 =head1 DRIVER SPECIFIC INFORMATION
631 =cut
633 ###########################################################
634 # HTML Driver
636 package Grutatxt::HTML;
638 @ISA = ("Grutatxt");
640 =head2 HTML Driver
642 The additional parameters for a new Grutatxt object are:
644 =over 4
646 =item I<table-headers>
648 If this boolean value is set, the first row in tables
649 is assumed to be the heading and rendered using 'th'
650 instead of 'td' tags.
652 =item I<center-tables>
654 If this boolean value is set, tables are centered.
656 =item I<expand-tables>
658 If this boolean value is set, tables are expanded (width 100%).
660 =item I<dl-as-dl>
662 If this boolean value is set, definition lists will be
663 rendered using 'dl', 'dt' and 'dd' instead of tables.
665 =item I<header-offset>
667 Offset to be summed to the heading level when rendering
668 'h?' tags (default is 0).
670 =item I<class-oddeven>
672 If this boolean value is set, tables will be rendered
673 with an "oddeven" CSS class, and rows alternately classed
674 as "even" or "odd". If it's not set, no CSS class info
675 is added to tables.
677 =item I<url-label-max>
679 If an URL without label is given (that is, the URL itself
680 is used as the label), it's trimmed to have as much
681 characters as this value says. By default it's 80.
683 =back
685 =cut
687 sub new
689 my ($class, %args) = @_;
690 my ($gh);
692 bless(\%args, $class);
693 $gh = \%args;
695 $gh->{'-process-urls'} = 1;
696 $gh->{'url-label-max'} ||= 80;
698 return $gh;
702 sub _inline
704 my ($gh, $l) = @_;
706 # accept unnamed and HTML inlines
707 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
708 $gh->{'-inline'} = 'HTML';
709 return;
712 if ($l =~ /^>>$/) {
713 delete $gh->{'-inline'};
714 return;
717 if ($gh->{'-inline'} eq 'HTML') {
718 $gh->_push($l);
723 sub _escape
725 my ($gh, $l) = @_;
727 $l =~ s/&/&amp;/g;
728 $l =~ s/</&lt;/g;
729 $l =~ s/>/&gt;/g;
731 return $l;
735 sub _empty_line
737 my ($gh) = @_;
739 return('<p>');
743 sub _url
745 my ($gh, $url, $label) = @_;
747 if (!$label) {
748 $label = $url;
750 if (length($label) > $gh->{'url-label-max'}) {
751 $label = substr($label, 0,
752 $gh->{'url-label-max'}) . '...';
756 return "<a href = \"$url\">$label</a>";
760 sub _strong
762 my ($gh, $str) = @_;
763 return "<strong>$str</strong>";
767 sub _em
769 my ($gh, $str) = @_;
770 return "<em>$str</em>";
774 sub _code
776 my ($gh, $str) = @_;
777 return "<code class = 'literal'>$str</code>";
781 sub _funcname
783 my ($gh, $str) = @_;
784 return "<code class = 'funcname'>$str</code>";
788 sub _varname
790 my ($gh, $str) = @_;
791 return "<code class = 'var'>$str</code>";
795 sub _new_mode
797 my ($gh, $mode, $params) = @_;
799 if ($mode ne $gh->{'-mode'}) {
800 my $tag;
802 # clean list levels
803 if ($gh->{'-mode'} eq 'ul') {
804 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
806 elsif ($gh->{'-mode'} eq 'ol') {
807 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
809 elsif ($gh->{'-mode'}) {
810 $gh->_push("</$gh->{'-mode'}>");
813 # send new one
814 $tag = $params ? "<$mode $params>" : "<$mode>";
815 $gh->_push($tag) if $mode;
817 $gh->{'-mode'} = $mode;
818 $gh->{'-mode-elems'} = 0;
820 # clean previous lists
821 $gh->{'-ul-levels'} = undef;
822 $gh->{'-ol-levels'} = undef;
827 sub _dl
829 my ($gh, $str) = @_;
830 my ($ret) = '';
832 if ($gh->{'dl-as-dl'}) {
833 $gh->_new_mode('dl');
834 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
836 else {
837 $gh->_new_mode('table');
838 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
841 return $ret;
845 sub _ul
847 my ($gh, $levels) = @_;
848 my ($ret);
850 $ret = '';
852 if ($levels > 0) {
853 $ret .= '<ul>';
855 elsif ($levels < 0) {
856 $ret .= '</li></ul>' x abs($levels);
859 if ($gh->{'-mode'} ne 'ul') {
860 $gh->{'-mode'} = 'ul';
862 else {
863 $ret .= '</li>' if $levels <= 0;
866 $ret .= '<li>';
868 return $ret;
872 sub _ol
874 my ($gh, $levels) = @_;
875 my ($ret);
877 $ret = '';
879 if ($levels > 0) {
880 $ret .= '<ol>';
882 elsif ($levels < 0) {
883 $ret .= '</li></ol>' x abs($levels);
886 if ($gh->{'-mode'} ne 'ol') {
887 $gh->{'-mode'} = 'ol';
889 else {
890 $ret .= '</li>' if $levels <= 0;
893 $ret .= '<li>';
895 return $ret;
899 sub _blockquote
901 my ($gh) = @_;
903 $gh->_new_mode('blockquote');
904 return "\"";
908 sub _hr
910 my ($gh) = @_;
912 return "<hr size = '1' noshade = 'noshade'>";
916 sub __mkanchor
918 my $gh = shift;
919 my $a = shift;
921 $a = lc($a);
922 $a =~ s/[\"\'\/]//g;
923 $a =~ s/\s/_/g;
924 $a =~ s/<[^>]+>//g;
926 return $a;
930 sub _heading
932 my ($gh, $level, $l) = @_;
934 # creates a valid anchor
935 my $a = $gh->__mkanchor($l);
937 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
938 $a, $level+$gh->{'header-offset'},
939 $l, $level+$gh->{'header-offset'});
941 return $l;
945 sub _table
947 my ($gh, $str) = @_;
949 if ($gh->{'-mode'} eq 'table') {
950 my ($class) = '';
951 my (@spans) = $gh->_calc_col_span($str);
953 # calculate CSS class, if any
954 if ($gh->{'class-oddeven'}) {
955 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
958 $str = "<tr $class>";
960 # build columns
961 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
962 my ($i,$s);
964 $i = ${$gh->{'-table'}}[$n];
965 $i = "&nbsp;" if $i =~ /^\s*$/;
967 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
969 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
970 $str .= "<th $class $s>$i</th>";
972 else {
973 $str .= "<td $class $s>$i</td>";
977 $str .= '</tr>';
979 @{$gh->{'-table'}} = ();
980 $gh->{'-tbl-row'}++;
982 else {
983 # new table
984 my ($params);
986 $params = "border = '1'";
987 $params .= " width = '100\%'" if $gh->{'expand-tables'};
988 $params .= " align = 'center'" if $gh->{'center-tables'};
989 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
991 $gh->_new_mode('table', $params);
993 @{$gh->{'-table'}} = ();
994 $gh->{'-tbl-row'} = 1;
995 $str = '';
998 return $str;
1002 sub _toc
1004 my $gh = shift;
1005 my @t = ();
1007 push(@t, "<div class = 'TOC'>");
1009 my $l = 0;
1011 foreach my $e (@{$gh->{index}}) {
1012 # ignore level 1 headings
1013 if ($e->[0] == 1) {
1014 next;
1017 if ($l < $e->[0]) {
1018 push(@t, '<ol>');
1020 elsif ($l > $e->[0]) {
1021 push(@t, '</ol>');
1024 $l = $e->[0];
1026 push(@t, sprintf("<li><a href = '#%s'>%s</a></li>",
1027 $gh->__mkanchor($e->[1]), $e->[1]));
1030 while (--$l) {
1031 push(@t, '</ol>');
1034 push(@t, "</div>");
1036 return @t;
1039 ###########################################################
1040 # troff Driver
1042 package Grutatxt::troff;
1044 @ISA = ("Grutatxt");
1046 =head2 troff Driver
1048 The troff driver uses the B<-me> macros and B<tbl>. A
1049 good way to post-process this output (to PostScript in
1050 the example) could be by using
1052 groff -t -me -Tps
1054 The additional parameters for a new Grutatxt object are:
1056 =over 4
1058 =item I<normal-size>
1060 The point size of normal text. By default is 10.
1062 =item I<heading-sizes>
1064 This argument must be a reference to an array containing
1065 the size in points of the 3 different heading levels. By
1066 default, level sizes are [ 20, 18, 15 ].
1068 =item I<table-type>
1070 The type of table to be rendered by B<tbl>. Can be
1071 I<allbox> (all lines rendered; this is the default value),
1072 I<box> (only outlined) or I<doublebox> (only outlined by
1073 a double line).
1075 =back
1077 =cut
1079 sub new
1081 my ($class, %args) = @_;
1082 my ($gh);
1084 bless(\%args,$class);
1085 $gh = \%args;
1087 $gh->{'-process-urls'} = 0;
1089 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
1090 $gh->{'normal-size'} ||= 10;
1091 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
1093 return $gh;
1097 sub _prefix
1099 my ($gh) = @_;
1101 $gh->_push(".nr pp $gh->{'normal-size'}");
1102 $gh->_push(".nh");
1106 sub _inline
1108 my ($gh,$l) = @_;
1110 # accept only troff inlines
1111 if ($l =~ /^<<\s*troff$/i) {
1112 $gh->{'-inline'} = 'troff';
1113 return;
1116 if ($l =~ /^>>$/) {
1117 delete $gh->{'-inline'};
1118 return;
1121 if ($gh->{'-inline'} eq 'troff') {
1122 $gh->_push($l);
1127 sub _escape
1129 my ($gh,$l) = @_;
1131 $l =~ s/\\/\\\\/g;
1132 $l =~ s/^'/\\&'/;
1134 return $l;
1138 sub _empty_line
1140 my ($gh) = @_;
1142 return '.lp';
1146 sub _strong
1148 my ($gh, $str) = @_;
1149 return "\\fB$str\\fP";
1153 sub _em
1155 my ($gh, $str) = @_;
1156 return "\\fI$str\\fP";
1160 sub _code
1162 my ($gh, $str) = @_;
1163 return "\\fI$str\\fP";
1167 sub _funcname
1169 my ($gh, $str) = @_;
1170 return "\\fB$str\\fP";
1174 sub _varname
1176 my ($gh, $str) = @_;
1177 return "\\fI$str\\fP";
1181 sub _new_mode
1183 my ($gh, $mode, $params) = @_;
1185 if ($mode ne $gh->{'-mode'}) {
1186 my $tag;
1188 # flush previous list
1189 if ($gh->{'-mode'} eq 'pre') {
1190 $gh->_push('.)l');
1192 elsif ($gh->{'-mode'} eq 'table') {
1193 chomp($gh->{'-table-head'});
1194 $gh->{'-table-head'} =~ s/\s+$//;
1195 $gh->_push($gh->{'-table-head'} . '.');
1196 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1198 elsif ($gh->{'-mode'} eq 'blockquote') {
1199 $gh->_push('.)q');
1202 # send new one
1203 if ($mode eq 'pre') {
1204 $gh->_push('.(l L');
1206 elsif ($mode eq 'blockquote') {
1207 $gh->_push('.(q');
1210 $gh->{'-mode'} = $mode;
1215 sub _dl
1217 my ($gh, $str) = @_;
1219 $gh->_new_mode('dl');
1220 return ".ip \"$str\"\n";
1224 sub _ul
1226 my ($gh) = @_;
1228 $gh->_new_mode('ul');
1229 return ".bu\n";
1233 sub _ol
1235 my ($gh) = @_;
1237 $gh->_new_mode('ol');
1238 return ".np\n";
1242 sub _blockquote
1244 my ($gh) = @_;
1246 $gh->_new_mode('blockquote');
1247 return "\"";
1251 sub _hr
1253 my ($gh) = @_;
1255 return '.hl';
1259 sub _heading
1261 my ($gh, $level, $l) = @_;
1263 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1265 return $l;
1269 sub _table
1271 my ($gh, $str) = @_;
1273 if ($gh->{'-mode'} eq 'table') {
1274 my ($h, $b);
1275 my (@spans) = $gh->_calc_col_span($str);
1277 # build columns
1278 $h = '';
1279 $b = '';
1280 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1281 my ($i);
1283 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1284 $h .= 'cB ';
1286 else {
1287 $h .= 'l ';
1290 # add span columns
1291 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1293 $b .= '#' if $n;
1295 $i = ${$gh->{'-table'}}[$n];
1296 $i =~ s/^\s+//;
1297 $i =~ s/\s+$//;
1298 $i =~ s/(\s)+/$1/g;
1299 $b .= $i;
1302 # add a separator
1303 $b .= "\n_" if $gh->{'table-headers'} and
1304 $gh->{'-tbl-row'} == 1 and
1305 $gh->{'table-type'} ne "allbox";
1307 $gh->{'-table-head'} .= "$h\n";
1308 $gh->{'-table-body'} .= "$b\n";
1310 @{$gh->{'-table'}} = ();
1311 $gh->{'-tbl-row'}++;
1313 else {
1314 # new table
1315 $gh->_new_mode('table');
1317 @{$gh->{'-table'}} = ();
1318 $gh->{'-tbl-row'} = 1;
1320 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1321 $gh->{'-table-body'} = '';
1324 $str = '';
1325 return $str;
1329 sub _postfix
1331 my ($gh) = @_;
1333 # add to top headings and footers
1334 unshift(@{$gh->{'o'}},".ef '\%' ''");
1335 unshift(@{$gh->{'o'}},".of '' '\%'");
1336 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1337 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1341 ###########################################################
1342 # man Driver
1344 package Grutatxt::man;
1346 @ISA = ("Grutatxt::troff", "Grutatxt");
1348 =head2 man Driver
1350 The man driver is used to generate Unix-like man pages. Note that
1351 all headings have the same level with this output driver.
1353 The additional parameters for a new Grutatxt object are:
1355 =over 4
1357 =item I<section>
1359 The man page section (see man documentation). By default is 1.
1361 =item I<page-name>
1363 The name of the page. This is usually the name of the program
1364 or function the man page is documenting and will be shown in the
1365 page header. By default is the empty string.
1367 =back
1369 =cut
1371 sub new
1373 my ($class, %args) = @_;
1374 my ($gh);
1376 bless(\%args,$class);
1377 $gh = \%args;
1379 $gh->{'-process-urls'} = 0;
1381 $gh->{'section'} ||= 1;
1382 $gh->{'page-name'} ||= "";
1384 return $gh;
1388 sub _prefix
1390 my ($gh) = @_;
1392 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1396 sub _inline
1398 my ($gh, $l) = @_;
1400 # accept only man markup inlines
1401 if ($l =~ /^<<\s*man$/i) {
1402 $gh->{'-inline'} = 'man';
1403 return;
1406 if ($l =~ /^>>$/) {
1407 delete $gh->{'-inline'};
1408 return;
1411 if ($gh->{'-inline'} eq 'man') {
1412 $gh->_push($l);
1417 sub _empty_line
1419 my ($gh) = @_;
1421 return ' ';
1425 sub _new_mode
1427 my ($gh,$mode,$params) = @_;
1429 if ($mode ne $gh->{'-mode'}) {
1430 my $tag;
1432 # flush previous list
1433 if ($gh->{'-mode'} eq 'pre' or
1434 $gh->{'-mode'} eq 'table') {
1435 $gh->_push('.fi');
1438 if ($gh->{'-mode'} eq 'blockquote') {
1439 $gh->_push('.RE');
1442 if ($gh->{'-mode'} eq 'ul') {
1443 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1446 if ($gh->{'-mode'} eq 'ol') {
1447 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1450 # send new one
1451 if ($mode eq 'pre' or $mode eq 'table') {
1452 $gh->_push('.nf');
1455 if ($mode eq 'blockquote') {
1456 $gh->_push('.RS 4');
1459 $gh->{'-mode'} = $mode;
1464 sub _dl
1466 my ($gh, $str) = @_;
1468 $gh->_new_mode('dl');
1469 return ".TP\n.B \"$str\"\n";
1473 sub _ul
1475 my ($gh, $levels) = @_;
1476 my ($ret) = '';
1478 if ($levels > 0) {
1479 $ret = ".RS 4\n";
1481 elsif ($levels < 0) {
1482 $ret = ".RE\n" x abs($levels);
1485 $gh->_new_mode('ul');
1486 return $ret . ".TP 4\n\\(bu\n";
1490 sub _ol
1492 my ($gh, $levels) = @_;
1493 my $l = @{$gh->{'-ol-levels'}};
1494 my $ret = '';
1496 $gh->{'-ol-level'} += $levels;
1498 if ($levels > 0) {
1499 $ret = ".RS 4\n";
1501 $l[$gh->{'-ol-level'}] = 1;
1503 elsif ($levels < 0) {
1504 $ret = ".RE\n" x abs($levels);
1507 $gh->_new_mode('ol');
1508 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1510 return $ret;
1514 sub _hr
1516 my ($gh) = @_;
1518 return '';
1522 sub _heading
1524 my ($gh, $level, $l) = @_;
1526 # all headers are the same depth in man pages
1527 return ".SH \"" . uc($l) . "\"";
1531 sub _table
1533 my ($gh, $str) = @_;
1535 if ($gh->{'-mode'} eq 'table') {
1536 foreach my $r (@{$gh->{'-table-raw'}}) {
1537 $gh->_push("|$r|");
1540 else {
1541 $gh->_new_mode('table');
1544 @{$gh->{'-table'}} = ();
1545 @{$gh->{'-table-raw'}} = ();
1547 $gh->_push($str);
1549 return '';
1553 sub _postfix
1558 ###########################################################
1559 # latex Driver
1561 package Grutatxt::latex;
1563 @ISA = ("Grutatxt");
1565 =head2 LaTeX Driver
1567 The additional parameters for a new Grutatxt object are:
1569 =over 4
1571 =item I<docclass>
1573 The LaTeX document class. By default is 'report'. You can also use
1574 'article' or 'book' (consult your LaTeX documentation for details).
1576 =item I<papersize>
1578 The paper size to be used in the document. By default is 'a4paper'.
1580 =item I<encoding>
1582 The character encoding used in the document. By default is 'latin1'.
1584 =back
1586 Note that you can't nest further than 4 levels in LaTeX; if you do,
1587 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1589 =cut
1591 sub new
1593 my ($class, %args) = @_;
1594 my ($gh);
1596 bless(\%args,$class);
1597 $gh = \%args;
1599 $gh->{'-process-urls'} = 0;
1601 $gh->{'-docclass'} ||= 'report';
1602 $gh->{'-papersize'} ||= 'a4paper';
1603 $gh->{'-encoding'} ||= 'latin1';
1605 return $gh;
1609 sub _prefix
1611 my ($gh) = @_;
1613 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1614 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1616 $gh->_push("\\begin{document}");
1620 sub _inline
1622 my ($gh, $l) = @_;
1624 # accept only latex inlines
1625 if ($l =~ /^<<\s*latex$/i) {
1626 $gh->{'-inline'} = 'latex';
1627 return;
1630 if ($l =~ /^>>$/) {
1631 delete $gh->{'-inline'};
1632 return;
1635 if ($gh->{'-inline'} eq 'latex') {
1636 $gh->_push($l);
1641 sub _escape
1643 my ($gh, $l) = @_;
1645 $l =~ s/ _ / \\_ /g;
1646 $l =~ s/ ~ / \\~ /g;
1647 $l =~ s/ & / \\& /g;
1649 return $l;
1653 sub _escape_post
1655 my ($gh, $l) = @_;
1657 $l =~ s/ # / \\# /g;
1658 $l =~ s/^\\n$//g;
1659 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1661 return $l;
1665 sub _empty_line
1667 my ($gh) = @_;
1669 return "\\n";
1673 sub _strong
1675 my ($gh, $str) = @_;
1676 return "\\textbf{$str}";
1680 sub _em
1682 my ($gh, $str) = @_;
1683 return "\\emph{$str}";
1687 sub _code
1689 my ($gh, $str) = @_;
1690 return "{\\tt $str}";
1694 sub _funcname
1696 my ($gh, $str) = @_;
1697 return "{\\tt $str}";
1701 sub _varname
1703 my ($gh, $str) = @_;
1705 $str =~ s/^\$/\\\$/;
1707 return "{\\tt $str}";
1711 sub _new_mode
1713 my ($gh, $mode, $params) = @_;
1715 # mode equivalences
1716 my %latex_modes = (
1717 'pre' => 'verbatim',
1718 'blockquote' => 'quote',
1719 'table' => 'tabular',
1720 'dl' => 'description',
1721 'ul' => 'itemize',
1722 'ol' => 'enumerate'
1725 if ($mode ne $gh->{'-mode'}) {
1726 # close previous mode
1727 if ($gh->{'-mode'} eq 'ul') {
1728 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1730 elsif ($gh->{'-mode'} eq 'ol') {
1731 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1733 elsif ($gh->{'-mode'} eq 'table') {
1734 $gh->_push("\\end{tabular}\n");
1736 else {
1737 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1738 if $gh->{'-mode'};
1741 # send new one
1742 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1743 if $mode;
1745 $gh->{'-mode'} = $mode;
1747 $gh->{'-ul-levels'} = undef;
1748 $gh->{'-ol-levels'} = undef;
1753 sub _dl
1755 my ($gh, $str) = @_;
1757 $gh->_new_mode('dl');
1758 return "\\item[$str]\n";
1762 sub _ul
1764 my ($gh, $levels) = @_;
1765 my ($ret);
1767 $ret = '';
1769 if ($levels > 0) {
1770 $ret .= "\\begin{itemize}\n";
1772 elsif ($levels < 0) {
1773 $ret .= "\\end{itemize}\n" x abs($levels);
1776 $gh->{'-mode'} = 'ul';
1778 $ret .= "\\item\n";
1780 return $ret;
1784 sub _ol
1786 my ($gh, $levels) = @_;
1787 my ($ret);
1789 $ret = '';
1791 if ($levels > 0) {
1792 $ret .= "\\begin{enumerate}\n";
1794 elsif ($levels < 0) {
1795 $ret .= "\\end{enumerate}\n" x abs($levels);
1798 $gh->{'-mode'} = 'ol';
1800 $ret .= "\\item\n";
1802 return $ret;
1806 sub _blockquote
1808 my ($gh) = @_;
1810 $gh->_new_mode('blockquote');
1811 return "``";
1815 sub _hr
1817 my ($gh) = @_;
1819 return "------------\n";
1823 sub _heading
1825 my ($gh, $level, $l) = @_;
1827 my @latex_headings = ( "\\section*{", "\\subsection*{",
1828 "\\subsubsection*{");
1830 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1832 return $l;
1836 sub _table
1838 my ($gh,$str) = @_;
1840 if ($gh->{'-mode'} eq 'table') {
1841 my ($class) = '';
1842 my (@spans) = $gh->_calc_col_span($str);
1843 my (@cols);
1845 $str = '';
1847 # build columns
1848 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1849 my ($i, $s);
1851 $i = ${$gh->{'-table'}}[$n];
1852 $i = "&nbsp;" if $i =~ /^\s*$/;
1854 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1856 # multispan columns
1857 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1858 if $spans[$n] > 1;
1860 $i =~ s/\s{2,}/ /g;
1861 $i =~ s/^\s+//;
1862 $i =~ s/\s+$//;
1864 push(@cols, $i);
1867 $str .= join('&', @cols) . "\\\\\n\\hline";
1869 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1871 @{$gh->{'-table'}} = ();
1872 $gh->{'-tbl-row'}++;
1874 else {
1875 # new table
1877 # count the number of columns
1878 $str =~ s/[^\+]//g;
1879 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1881 $gh->_push();
1882 $gh->_new_mode('table', $params);
1884 @{$gh->{'-table'}} = ();
1885 $gh->{'-tbl-row'} = 1;
1886 $str = '';
1889 return $str;
1893 sub _postfix
1895 my ($gh) = @_;
1897 $gh->_push("\\end{document}");
1901 ###########################################################
1902 # RTF Driver
1904 package Grutatxt::rtf;
1906 @ISA = ("Grutatxt");
1908 =head2 RTF Driver
1910 The additional parameters for a new Grutatxt object are:
1912 =over 4
1914 =item I<normal-size>
1916 The point size of normal text. By default is 20.
1918 =item I<heading-sizes>
1920 This argument must be a reference to an array containing
1921 the size in points of the 3 different heading levels. By
1922 default, level sizes are [ 34, 30, 28 ].
1924 =back
1926 =cut
1928 sub new
1930 my ($class, %args) = @_;
1931 my ($gh);
1933 bless(\%args, $class);
1934 $gh = \%args;
1936 $gh->{'-process-urls'} = 0;
1938 $gh->{'heading-sizes'} ||= [ 34, 30, 28 ];
1939 $gh->{'normal-size'} ||= 20;
1941 return $gh;
1945 sub _prefix
1947 my $gh = shift;
1949 $gh->_push('{\rtf1\ansi {\plain \fs' . $gh->{'normal-size'} . ' \sa227');
1953 sub _empty_line
1955 my $gh = shift;
1957 return '\par';
1961 sub _heading
1963 my ($gh, $level, $l) = @_;
1965 return '{\b \fs' . $gh->{'heading-sizes'}->[$level] . ' ' . $l . '}';
1969 sub _strong
1971 my ($gh, $str) = @_;
1972 return "{\\b $str}";
1976 sub _em
1978 my ($gh, $str) = @_;
1979 return "{\\i $str}";
1983 sub _code
1985 my ($gh, $str) = @_;
1986 return "{\\tt $str}";
1990 sub _ul
1992 my ($gh, $levels) = @_;
1994 $gh->_new_mode('ul');
1995 return "{{\\bullet \\li" . $levels . ' ';
1999 sub _dl
2001 my ($gh, $str) = @_;
2003 $gh->_new_mode('dl');
2004 return "{{\\b $str \\par} {\\li566 ";
2008 sub _new_mode
2010 my ($gh, $mode, $params) = @_;
2012 if ($mode ne $gh->{'-mode'}) {
2013 if ($gh->{'-mode'} =~ /^(dl|ul)$/) {
2014 $gh->_push('}}');
2017 $gh->{'-mode'} = $mode;
2019 $gh->{'-ul-levels'} = undef;
2020 $gh->{'-ol-levels'} = undef;
2022 else {
2023 if ($mode =~ /^(dl|ul)$/) {
2024 $gh->_push('}\par}');
2030 sub _postfix
2032 my $gh = shift;
2034 @{$gh->{o}} = map { $_ . ' '; } @{$gh->{o}};
2036 $gh->_push('}}');
2040 =head1 AUTHOR
2042 Angel Ortega angel@triptico.com
2044 =cut