Also process mailto: URLS.
[grutatxt.git] / Grutatxt.pm
blob250ac72c5718c41c3351e437604b6abad92764ca
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2010 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.16-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 new
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with two element arrayrefs with the level as first
115 argument and the heading as second.
117 This information can be used to build a table of contents
118 of the processed text.
120 =item I<strip-parens>
122 Function names in the Grutatxt markup are strings of
123 alphanumeric characters immediately followed by a pair
124 of open and close parentheses. If this boolean value is
125 set, function names found in the processed text will have
126 their parentheses deleted.
128 =item I<strip-dollars>
130 Variable names in the Grutatxt markup are strings of
131 alphanumeric characters preceded by a dollar sign.
132 If this boolean value is set, variable names found in
133 the processed text will have the dollar sign deleted.
135 =item I<abstract>
137 The I<abstract> of a Grutatxt document is the fragment of text
138 from the beginning of the document to the end of the first
139 paragraph after the title. If I<abstract> is specified as a
140 reference to scalar, it will contain (after each call to the
141 B<process()> method) the subscript of the element of the output
142 array that marks the end of the subject.
144 =item I<no-pure-verbatim>
146 Since version 2.0.15, text effects as italics and bold are not
147 processed in I<verbatim> (preformatted) mode. If you want to
148 revert to the old behaviour, use this option.
150 =item I<toc>
152 If set, a table of contents will be generated after the abstract.
153 The table of contents will be elaborated using headings from 2
154 and 3 levels.
156 =back
158 =cut
160 sub new
162 my ($class, %args) = @_;
163 my ($gh);
165 $args{'mode'} ||= 'HTML';
167 $class .= "::" . $args{'mode'};
169 $gh = new $class(%args);
171 return $gh;
175 sub escape
176 # escapes special characters, ignoring passthrough code
178 my ($gh, $l) = @_;
180 # splits between << and >>
181 my (@l) = split(/(<<|>>)/, $l);
183 @l = map {
184 my $l = $_;
186 # escape only text outside << and >>
187 unless ($l eq '<<' .. $l eq '>>') {
188 $l = $gh->_escape($l);
191 $_ = $l;
192 } @l;
194 # join again, stripping << and >>
195 $l = join('', grep(!/^(<<|>>)$/, @l));
197 return $l;
201 =head2 process
203 @output = $grutatxt->process($text);
205 Processes a text in Grutatxt format. The result is returned
206 as an array of lines.
208 =cut
210 sub process
212 my ($gh, $content) = @_;
213 my ($p);
215 # clean output
216 @{$gh->{'o'}} = ();
218 # clean title and paragraph numbers
219 $gh->{'-title'} = '';
220 $gh->{'-p'} = 0;
222 # clean marks
223 if (!defined $gh->{marks}) {
224 $gh->{marks} = [];
227 @{$gh->{'marks'}} = ();
229 # clean index
230 if (!$gh->{index}) {
231 $gh->{index} = [];
234 @{$gh->{'index'}} = ();
236 # reset abstract line
237 if (!$gh->{abstract}) {
238 $gh->{abstract} = \$gh->{_abstract};
241 ${$gh->{'abstract'}} = 0;
243 # insert prefix
244 $gh->_prefix();
246 $gh->{'-mode'} = undef;
248 foreach my $l (split(/\n/,$content)) {
249 # inline data (passthrough)
250 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
251 $gh->_inline($l);
252 next;
255 # marks
256 if ($l =~ /^\s*<\->\s*$/) {
257 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
258 if ref($gh->{'marks'});
260 next;
263 # TOC mark
264 if ($l =~ /^\s*<\?>\s*$/) {
265 $gh->{toc} = $gh->{_toc_pos} = scalar(@{$gh->{o}});
266 next;
269 # escape possibly dangerous characters
270 $l = $gh->escape($l);
272 # empty lines
273 $l =~ s/^\r$//ge;
274 if ($l =~ s/^$/$gh->_empty_line()/ge) {
275 # mark the abstract end
276 if ($gh->{'-title'}) {
277 $gh->{'-p'}++;
279 # mark abstract if it's the
280 # second paragraph from the title
281 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
282 if $gh->{'-p'} == 2;
286 # line-mutating process
287 my $ol = $l;
289 if ($gh->{'-process-urls'}) {
290 # URLs followed by a parenthesized phrase
291 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
292 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
293 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
294 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
295 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
296 $l =~ s/(mailto:\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
298 # URLs without phrase
299 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
300 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
301 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
302 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
303 $l =~ s/([^=][^\"])(mailto:)(\S+)/$1.$gh->_url($2.$3,$3)/ge;
305 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
306 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
307 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
308 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
311 # change '''text''' and *text* into strong emphasis
312 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
313 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
314 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
316 # change ''text'' and _text_ into emphasis
317 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
318 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
319 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
321 # change `text' into code
322 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
324 # enclose function names
325 if ($gh->{'strip-parens'}) {
326 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
328 else {
329 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
332 # enclose variable names
333 if ($gh->{'strip-dollars'}) {
334 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
336 else {
337 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
341 # main switch
344 # definition list
345 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
346 $gh->{'-mode-elems'} ++;
349 # unsorted list
350 elsif ($gh->{'-mode'} ne 'pre' and
351 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
352 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
353 $gh->{'-mode-elems'} ++;
356 # sorted list
357 elsif ($gh->{'-mode'} ne 'pre' and
358 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
359 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
360 $gh->{'-mode-elems'} ++;
363 # quoted block
364 elsif ($gh->{'-mode'} ne 'pre' and
365 $l =~ s/^\s\"/$gh->_blockquote()/e) {
368 # table rows
369 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
370 $gh->{'-mode-elems'} ++;
373 # table heading / end of row
374 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
377 # preformatted text
378 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
379 if ($gh->{'-mode'} eq 'pre' &&
380 !$gh->{'no-pure-verbatim'}) {
381 # set line back to original
382 $l = $ol;
386 # anything else
387 else {
388 # back to normal mode
389 $gh->_new_mode(undef);
392 # 1 level heading
393 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
395 # 2 level heading
396 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
398 # 3 level heading
399 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
401 # change ------ into hr
402 $l =~ s/^----*$/$gh->_hr()/e;
404 # push finally
405 $gh->_push($l) if $l;
408 # flush
409 $gh->_new_mode(undef);
411 # postfix
412 $gh->_postfix();
414 # set title
415 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
417 # set abstract, if not set
418 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
419 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
421 # travel all lines again, post-escaping
422 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
424 # add TOC after first paragraph
425 if ($gh->{toc} && @{$gh->{o}}) {
426 my $p = $gh->{_toc_pos} ||
427 $gh->{marks}->[0] ||
428 ${$gh->{abstract}};
430 @{$gh->{o}} = (@{$gh->{o}}[0 .. $p],
431 $gh->_toc(),
432 @{$gh->{o}}[$p + 1 ..
433 scalar(@{$gh->{o}})]);
436 return @{$gh->{'o'}};
440 =head2 process_file
442 @output = $grutatxt->process_file($filename);
444 Processes a file in Grutatxt format.
446 =cut
448 sub process_file
450 my ($gh, $file) = @_;
452 open F, $file or return(undef);
454 my ($content) = join('',<F>);
455 close F;
457 return $gh->process($content);
461 sub _push
463 my ($gh, $l) = @_;
465 push(@{$gh->{'o'}},$l);
469 sub _process_heading
471 my ($gh, $level, $hd) = @_;
472 my ($l);
474 $l = pop(@{$gh->{'o'}});
476 if ($l eq $gh->_empty_line()) {
477 $gh->_push($l);
478 return $hd;
481 # store title
482 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
484 # store index
485 if (ref($gh->{'index'})) {
486 push(@{$gh->{'index'}}, [ $level, $l ]);
489 return $gh->_heading($level,$l);
493 sub _calc_col_span
495 my ($gh, $l) = @_;
496 my (@spans);
498 # strip first + and all -
499 $l =~ s/^\+//;
500 $l =~ s/-//g;
502 my ($t) = 1; @spans = ();
503 for (my $n = 0; $n < length($l); $n++) {
504 if (substr($l, $n, 1) eq '+') {
505 push(@spans, $t);
506 $t = 1;
508 else {
509 # it's a colspan mark:
510 # increment
511 $t++;
515 return @spans;
519 sub _table_row
521 my ($gh, $str) = @_;
523 my @s = split(/\|/,$str);
525 for (my $n = 0; $n < scalar(@s); $n++) {
526 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
529 push(@{$gh->{'-table-raw'}}, $str);
531 return '';
535 sub _pre
537 my ($gh, $l) = @_;
539 # if any other mode is active, add to it
540 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
541 $l =~ s/^\s+//;
543 my ($a) = pop(@{$gh->{'o'}})." ".$l;
544 $gh->_push($a);
545 $l = '';
547 else {
548 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
549 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
551 $gh->_new_mode('pre');
554 return $l;
558 sub _multilevel_list
560 my ($gh, $str, $ind) = @_;
561 my (@l,$level);
563 @l = @{$gh->{$str}};
564 $ind = length($ind);
565 $level = 0;
567 if ($l[-1] < $ind) {
568 # if last level is less indented, increase
569 # nesting level
570 push(@l, $ind);
571 $level++;
573 elsif ($l[-1] > $ind) {
574 # if last level is more indented, decrease
575 # levels until the same is found (or back to
576 # the beginning if not)
577 while (pop(@l)) {
578 $level--;
579 last if $l[-1] == $ind;
583 $gh->{$str} = \@l;
585 return $level;
589 sub _unsorted_list
591 my ($gh, $ind) = @_;
593 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
597 sub _ordered_list
599 my ($gh, $ind) = @_;
601 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
605 # empty stubs for falling through the superclass
607 sub _inline { my ($gh, $l) = @_; $l; }
608 sub _escape { my ($gh, $l) = @_; $l; }
609 sub _escape_post { my ($gh, $l) = @_; $l; }
610 sub _empty_line { my ($gh) = @_; ''; }
611 sub _url { my ($gh, $url, $label) = @_; ''; }
612 sub _strong { my ($gh, $str) = @_; $str; }
613 sub _em { my ($gh, $str) = @_; $str; }
614 sub _code { my ($gh, $str) = @_; $str; }
615 sub _funcname { my ($gh, $str) = @_; $str; }
616 sub _varname { my ($gh, $str) = @_; $str; }
617 sub _new_mode { my ($gh, $mode) = @_; }
618 sub _dl { my ($gh, $str) = @_; $str; }
619 sub _ul { my ($gh, $level) = @_; ''; }
620 sub _ol { my ($gh, $level) = @_; ''; }
621 sub _blockquote { my ($gh, $str) = @_; $str; }
622 sub _hr { my ($gh) = @_; ''; }
623 sub _heading { my ($gh, $level, $l) = @_; $l; }
624 sub _table { my ($gh, $str) = @_; $str; }
625 sub _prefix { my ($gh) = @_; }
626 sub _postfix { my ($gh) = @_; }
627 sub _toc { my ($gh) = @_; return (); }
629 ###########################################################
631 =head1 DRIVER SPECIFIC INFORMATION
633 =cut
635 ###########################################################
636 # HTML Driver
638 package Grutatxt::HTML;
640 @ISA = ("Grutatxt");
642 =head2 HTML Driver
644 The additional parameters for a new Grutatxt object are:
646 =over 4
648 =item I<table-headers>
650 If this boolean value is set, the first row in tables
651 is assumed to be the heading and rendered using 'th'
652 instead of 'td' tags.
654 =item I<center-tables>
656 If this boolean value is set, tables are centered.
658 =item I<expand-tables>
660 If this boolean value is set, tables are expanded (width 100%).
662 =item I<dl-as-dl>
664 If this boolean value is set, definition lists will be
665 rendered using 'dl', 'dt' and 'dd' instead of tables.
667 =item I<header-offset>
669 Offset to be summed to the heading level when rendering
670 'h?' tags (default is 0).
672 =item I<class-oddeven>
674 If this boolean value is set, tables will be rendered
675 with an "oddeven" CSS class, and rows alternately classed
676 as "even" or "odd". If it's not set, no CSS class info
677 is added to tables.
679 =item I<url-label-max>
681 If an URL without label is given (that is, the URL itself
682 is used as the label), it's trimmed to have as much
683 characters as this value says. By default it's 80.
685 =back
687 =cut
689 sub new
691 my ($class, %args) = @_;
692 my ($gh);
694 bless(\%args, $class);
695 $gh = \%args;
697 $gh->{'-process-urls'} = 1;
698 $gh->{'url-label-max'} ||= 80;
700 return $gh;
704 sub _inline
706 my ($gh, $l) = @_;
708 # accept unnamed and HTML inlines
709 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
710 $gh->{'-inline'} = 'HTML';
711 return;
714 if ($l =~ /^>>$/) {
715 delete $gh->{'-inline'};
716 return;
719 if ($gh->{'-inline'} eq 'HTML') {
720 $gh->_push($l);
725 sub _escape
727 my ($gh, $l) = @_;
729 $l =~ s/&/&amp;/g;
730 $l =~ s/</&lt;/g;
731 $l =~ s/>/&gt;/g;
733 return $l;
737 sub _empty_line
739 my ($gh) = @_;
741 return('<p>');
745 sub _url
747 my ($gh, $url, $label) = @_;
749 if (!$label) {
750 $label = $url;
752 if (length($label) > $gh->{'url-label-max'}) {
753 $label = substr($label, 0,
754 $gh->{'url-label-max'}) . '...';
758 return "<a href = \"$url\">$label</a>";
762 sub _strong
764 my ($gh, $str) = @_;
765 return "<strong>$str</strong>";
769 sub _em
771 my ($gh, $str) = @_;
772 return "<em>$str</em>";
776 sub _code
778 my ($gh, $str) = @_;
779 return "<code class = 'literal'>$str</code>";
783 sub _funcname
785 my ($gh, $str) = @_;
786 return "<code class = 'funcname'>$str</code>";
790 sub _varname
792 my ($gh, $str) = @_;
793 return "<code class = 'var'>$str</code>";
797 sub _new_mode
799 my ($gh, $mode, $params) = @_;
801 if ($mode ne $gh->{'-mode'}) {
802 my $tag;
804 # clean list levels
805 if ($gh->{'-mode'} eq 'ul') {
806 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
808 elsif ($gh->{'-mode'} eq 'ol') {
809 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
811 elsif ($gh->{'-mode'}) {
812 $gh->_push("</$gh->{'-mode'}>");
815 # send new one
816 $tag = $params ? "<$mode $params>" : "<$mode>";
817 $gh->_push($tag) if $mode;
819 $gh->{'-mode'} = $mode;
820 $gh->{'-mode-elems'} = 0;
822 # clean previous lists
823 $gh->{'-ul-levels'} = undef;
824 $gh->{'-ol-levels'} = undef;
829 sub _dl
831 my ($gh, $str) = @_;
832 my ($ret) = '';
834 if ($gh->{'dl-as-dl'}) {
835 $gh->_new_mode('dl');
836 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
838 else {
839 $gh->_new_mode('table');
840 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
843 return $ret;
847 sub _ul
849 my ($gh, $levels) = @_;
850 my ($ret);
852 $ret = '';
854 if ($levels > 0) {
855 $ret .= '<ul>';
857 elsif ($levels < 0) {
858 $ret .= '</li></ul>' x abs($levels);
861 if ($gh->{'-mode'} ne 'ul') {
862 $gh->{'-mode'} = 'ul';
864 else {
865 $ret .= '</li>' if $levels <= 0;
868 $ret .= '<li>';
870 return $ret;
874 sub _ol
876 my ($gh, $levels) = @_;
877 my ($ret);
879 $ret = '';
881 if ($levels > 0) {
882 $ret .= '<ol>';
884 elsif ($levels < 0) {
885 $ret .= '</li></ol>' x abs($levels);
888 if ($gh->{'-mode'} ne 'ol') {
889 $gh->{'-mode'} = 'ol';
891 else {
892 $ret .= '</li>' if $levels <= 0;
895 $ret .= '<li>';
897 return $ret;
901 sub _blockquote
903 my ($gh) = @_;
905 $gh->_new_mode('blockquote');
906 return "\"";
910 sub _hr
912 my ($gh) = @_;
914 return "<hr size = '1' noshade = 'noshade'>";
918 sub __mkanchor
920 my $gh = shift;
921 my $a = shift;
923 $a = lc($a);
924 $a =~ s/[\"\'\/]//g;
925 $a =~ s/\s/_/g;
926 $a =~ s/<[^>]+>//g;
928 return $a;
932 sub _heading
934 my ($gh, $level, $l) = @_;
936 # creates a valid anchor
937 my $a = $gh->__mkanchor($l);
939 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
940 $a, $level+$gh->{'header-offset'},
941 $l, $level+$gh->{'header-offset'});
943 return $l;
947 sub _table
949 my ($gh, $str) = @_;
951 if ($gh->{'-mode'} eq 'table') {
952 my ($class) = '';
953 my (@spans) = $gh->_calc_col_span($str);
955 # calculate CSS class, if any
956 if ($gh->{'class-oddeven'}) {
957 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
960 $str = "<tr $class>";
962 # build columns
963 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
964 my ($i,$s);
966 $i = ${$gh->{'-table'}}[$n];
967 $i = "&nbsp;" if $i =~ /^\s*$/;
969 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
971 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
972 $str .= "<th $class $s>$i</th>";
974 else {
975 $str .= "<td $class $s>$i</td>";
979 $str .= '</tr>';
981 @{$gh->{'-table'}} = ();
982 $gh->{'-tbl-row'}++;
984 else {
985 # new table
986 my ($params);
988 $params = "border = '1'";
989 $params .= " width = '100\%'" if $gh->{'expand-tables'};
990 $params .= " align = 'center'" if $gh->{'center-tables'};
991 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
993 $gh->_new_mode('table', $params);
995 @{$gh->{'-table'}} = ();
996 $gh->{'-tbl-row'} = 1;
997 $str = '';
1000 return $str;
1004 sub _toc
1006 my $gh = shift;
1007 my @t = ();
1009 push(@t, "<div class = 'TOC'>");
1011 my $l = 0;
1013 foreach my $e (@{$gh->{index}}) {
1014 # ignore level 1 headings
1015 if ($e->[0] == 1) {
1016 next;
1019 if ($l < $e->[0]) {
1020 push(@t, '<ol>');
1022 elsif ($l > $e->[0]) {
1023 push(@t, '</ol>');
1026 $l = $e->[0];
1028 push(@t, sprintf("<li><a href = '#%s'>%s</a></li>",
1029 $gh->__mkanchor($e->[1]), $e->[1]));
1032 while (--$l) {
1033 push(@t, '</ol>');
1036 push(@t, "</div>");
1038 return @t;
1041 ###########################################################
1042 # troff Driver
1044 package Grutatxt::troff;
1046 @ISA = ("Grutatxt");
1048 =head2 troff Driver
1050 The troff driver uses the B<-me> macros and B<tbl>. A
1051 good way to post-process this output (to PostScript in
1052 the example) could be by using
1054 groff -t -me -Tps
1056 The additional parameters for a new Grutatxt object are:
1058 =over 4
1060 =item I<normal-size>
1062 The point size of normal text. By default is 10.
1064 =item I<heading-sizes>
1066 This argument must be a reference to an array containing
1067 the size in points of the 3 different heading levels. By
1068 default, level sizes are [ 20, 18, 15 ].
1070 =item I<table-type>
1072 The type of table to be rendered by B<tbl>. Can be
1073 I<allbox> (all lines rendered; this is the default value),
1074 I<box> (only outlined) or I<doublebox> (only outlined by
1075 a double line).
1077 =back
1079 =cut
1081 sub new
1083 my ($class, %args) = @_;
1084 my ($gh);
1086 bless(\%args,$class);
1087 $gh = \%args;
1089 $gh->{'-process-urls'} = 0;
1091 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
1092 $gh->{'normal-size'} ||= 10;
1093 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
1095 return $gh;
1099 sub _prefix
1101 my ($gh) = @_;
1103 $gh->_push(".nr pp $gh->{'normal-size'}");
1104 $gh->_push(".nh");
1108 sub _inline
1110 my ($gh,$l) = @_;
1112 # accept only troff inlines
1113 if ($l =~ /^<<\s*troff$/i) {
1114 $gh->{'-inline'} = 'troff';
1115 return;
1118 if ($l =~ /^>>$/) {
1119 delete $gh->{'-inline'};
1120 return;
1123 if ($gh->{'-inline'} eq 'troff') {
1124 $gh->_push($l);
1129 sub _escape
1131 my ($gh,$l) = @_;
1133 $l =~ s/\\/\\\\/g;
1134 $l =~ s/^'/\\&'/;
1136 return $l;
1140 sub _empty_line
1142 my ($gh) = @_;
1144 return '.lp';
1148 sub _strong
1150 my ($gh, $str) = @_;
1151 return "\\fB$str\\fP";
1155 sub _em
1157 my ($gh, $str) = @_;
1158 return "\\fI$str\\fP";
1162 sub _code
1164 my ($gh, $str) = @_;
1165 return "\\fI$str\\fP";
1169 sub _funcname
1171 my ($gh, $str) = @_;
1172 return "\\fB$str\\fP";
1176 sub _varname
1178 my ($gh, $str) = @_;
1179 return "\\fI$str\\fP";
1183 sub _new_mode
1185 my ($gh, $mode, $params) = @_;
1187 if ($mode ne $gh->{'-mode'}) {
1188 my $tag;
1190 # flush previous list
1191 if ($gh->{'-mode'} eq 'pre') {
1192 $gh->_push('.)l');
1194 elsif ($gh->{'-mode'} eq 'table') {
1195 chomp($gh->{'-table-head'});
1196 $gh->{'-table-head'} =~ s/\s+$//;
1197 $gh->_push($gh->{'-table-head'} . '.');
1198 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1200 elsif ($gh->{'-mode'} eq 'blockquote') {
1201 $gh->_push('.)q');
1204 # send new one
1205 if ($mode eq 'pre') {
1206 $gh->_push('.(l L');
1208 elsif ($mode eq 'blockquote') {
1209 $gh->_push('.(q');
1212 $gh->{'-mode'} = $mode;
1217 sub _dl
1219 my ($gh, $str) = @_;
1221 $gh->_new_mode('dl');
1222 return ".ip \"$str\"\n";
1226 sub _ul
1228 my ($gh) = @_;
1230 $gh->_new_mode('ul');
1231 return ".bu\n";
1235 sub _ol
1237 my ($gh) = @_;
1239 $gh->_new_mode('ol');
1240 return ".np\n";
1244 sub _blockquote
1246 my ($gh) = @_;
1248 $gh->_new_mode('blockquote');
1249 return "\"";
1253 sub _hr
1255 my ($gh) = @_;
1257 return '.hl';
1261 sub _heading
1263 my ($gh, $level, $l) = @_;
1265 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1267 return $l;
1271 sub _table
1273 my ($gh, $str) = @_;
1275 if ($gh->{'-mode'} eq 'table') {
1276 my ($h, $b);
1277 my (@spans) = $gh->_calc_col_span($str);
1279 # build columns
1280 $h = '';
1281 $b = '';
1282 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1283 my ($i);
1285 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1286 $h .= 'cB ';
1288 else {
1289 $h .= 'l ';
1292 # add span columns
1293 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1295 $b .= '#' if $n;
1297 $i = ${$gh->{'-table'}}[$n];
1298 $i =~ s/^\s+//;
1299 $i =~ s/\s+$//;
1300 $i =~ s/(\s)+/$1/g;
1301 $b .= $i;
1304 # add a separator
1305 $b .= "\n_" if $gh->{'table-headers'} and
1306 $gh->{'-tbl-row'} == 1 and
1307 $gh->{'table-type'} ne "allbox";
1309 $gh->{'-table-head'} .= "$h\n";
1310 $gh->{'-table-body'} .= "$b\n";
1312 @{$gh->{'-table'}} = ();
1313 $gh->{'-tbl-row'}++;
1315 else {
1316 # new table
1317 $gh->_new_mode('table');
1319 @{$gh->{'-table'}} = ();
1320 $gh->{'-tbl-row'} = 1;
1322 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1323 $gh->{'-table-body'} = '';
1326 $str = '';
1327 return $str;
1331 sub _postfix
1333 my ($gh) = @_;
1335 # add to top headings and footers
1336 unshift(@{$gh->{'o'}},".ef '\%' ''");
1337 unshift(@{$gh->{'o'}},".of '' '\%'");
1338 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1339 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1343 ###########################################################
1344 # man Driver
1346 package Grutatxt::man;
1348 @ISA = ("Grutatxt::troff", "Grutatxt");
1350 =head2 man Driver
1352 The man driver is used to generate Unix-like man pages. Note that
1353 all headings have the same level with this output driver.
1355 The additional parameters for a new Grutatxt object are:
1357 =over 4
1359 =item I<section>
1361 The man page section (see man documentation). By default is 1.
1363 =item I<page-name>
1365 The name of the page. This is usually the name of the program
1366 or function the man page is documenting and will be shown in the
1367 page header. By default is the empty string.
1369 =back
1371 =cut
1373 sub new
1375 my ($class, %args) = @_;
1376 my ($gh);
1378 bless(\%args,$class);
1379 $gh = \%args;
1381 $gh->{'-process-urls'} = 0;
1383 $gh->{'section'} ||= 1;
1384 $gh->{'page-name'} ||= "";
1386 return $gh;
1390 sub _prefix
1392 my ($gh) = @_;
1394 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1398 sub _inline
1400 my ($gh, $l) = @_;
1402 # accept only man markup inlines
1403 if ($l =~ /^<<\s*man$/i) {
1404 $gh->{'-inline'} = 'man';
1405 return;
1408 if ($l =~ /^>>$/) {
1409 delete $gh->{'-inline'};
1410 return;
1413 if ($gh->{'-inline'} eq 'man') {
1414 $gh->_push($l);
1419 sub _empty_line
1421 my ($gh) = @_;
1423 return ' ';
1427 sub _new_mode
1429 my ($gh,$mode,$params) = @_;
1431 if ($mode ne $gh->{'-mode'}) {
1432 my $tag;
1434 # flush previous list
1435 if ($gh->{'-mode'} eq 'pre' or
1436 $gh->{'-mode'} eq 'table') {
1437 $gh->_push('.fi');
1440 if ($gh->{'-mode'} eq 'blockquote') {
1441 $gh->_push('.RE');
1444 if ($gh->{'-mode'} eq 'ul') {
1445 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1448 if ($gh->{'-mode'} eq 'ol') {
1449 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1452 # send new one
1453 if ($mode eq 'pre' or $mode eq 'table') {
1454 $gh->_push('.nf');
1457 if ($mode eq 'blockquote') {
1458 $gh->_push('.RS 4');
1461 $gh->{'-mode'} = $mode;
1466 sub _dl
1468 my ($gh, $str) = @_;
1470 $gh->_new_mode('dl');
1471 return ".TP\n.B \"$str\"\n";
1475 sub _ul
1477 my ($gh, $levels) = @_;
1478 my ($ret) = '';
1480 if ($levels > 0) {
1481 $ret = ".RS 4\n";
1483 elsif ($levels < 0) {
1484 $ret = ".RE\n" x abs($levels);
1487 $gh->_new_mode('ul');
1488 return $ret . ".TP 4\n\\(bu\n";
1492 sub _ol
1494 my ($gh, $levels) = @_;
1495 my $l = @{$gh->{'-ol-levels'}};
1496 my $ret = '';
1498 $gh->{'-ol-level'} += $levels;
1500 if ($levels > 0) {
1501 $ret = ".RS 4\n";
1503 $l[$gh->{'-ol-level'}] = 1;
1505 elsif ($levels < 0) {
1506 $ret = ".RE\n" x abs($levels);
1509 $gh->_new_mode('ol');
1510 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1512 return $ret;
1516 sub _hr
1518 my ($gh) = @_;
1520 return '';
1524 sub _heading
1526 my ($gh, $level, $l) = @_;
1528 # all headers are the same depth in man pages
1529 return ".SH \"" . uc($l) . "\"";
1533 sub _table
1535 my ($gh, $str) = @_;
1537 if ($gh->{'-mode'} eq 'table') {
1538 foreach my $r (@{$gh->{'-table-raw'}}) {
1539 $gh->_push("|$r|");
1542 else {
1543 $gh->_new_mode('table');
1546 @{$gh->{'-table'}} = ();
1547 @{$gh->{'-table-raw'}} = ();
1549 $gh->_push($str);
1551 return '';
1555 sub _postfix
1560 ###########################################################
1561 # latex Driver
1563 package Grutatxt::latex;
1565 @ISA = ("Grutatxt");
1567 =head2 LaTeX Driver
1569 The additional parameters for a new Grutatxt object are:
1571 =over 4
1573 =item I<docclass>
1575 The LaTeX document class. By default is 'report'. You can also use
1576 'article' or 'book' (consult your LaTeX documentation for details).
1578 =item I<papersize>
1580 The paper size to be used in the document. By default is 'a4paper'.
1582 =item I<encoding>
1584 The character encoding used in the document. By default is 'latin1'.
1586 =back
1588 Note that you can't nest further than 4 levels in LaTeX; if you do,
1589 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1591 =cut
1593 sub new
1595 my ($class, %args) = @_;
1596 my ($gh);
1598 bless(\%args,$class);
1599 $gh = \%args;
1601 $gh->{'-process-urls'} = 0;
1603 $gh->{'-docclass'} ||= 'report';
1604 $gh->{'-papersize'} ||= 'a4paper';
1605 $gh->{'-encoding'} ||= 'latin1';
1607 return $gh;
1611 sub _prefix
1613 my ($gh) = @_;
1615 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1616 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1618 $gh->_push("\\begin{document}");
1622 sub _inline
1624 my ($gh, $l) = @_;
1626 # accept only latex inlines
1627 if ($l =~ /^<<\s*latex$/i) {
1628 $gh->{'-inline'} = 'latex';
1629 return;
1632 if ($l =~ /^>>$/) {
1633 delete $gh->{'-inline'};
1634 return;
1637 if ($gh->{'-inline'} eq 'latex') {
1638 $gh->_push($l);
1643 sub _escape
1645 my ($gh, $l) = @_;
1647 $l =~ s/ _ / \\_ /g;
1648 $l =~ s/ ~ / \\~ /g;
1649 $l =~ s/ & / \\& /g;
1651 return $l;
1655 sub _escape_post
1657 my ($gh, $l) = @_;
1659 $l =~ s/ # / \\# /g;
1660 $l =~ s/^\\n$//g;
1661 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1663 return $l;
1667 sub _empty_line
1669 my ($gh) = @_;
1671 return "\\n";
1675 sub _strong
1677 my ($gh, $str) = @_;
1678 return "\\textbf{$str}";
1682 sub _em
1684 my ($gh, $str) = @_;
1685 return "\\emph{$str}";
1689 sub _code
1691 my ($gh, $str) = @_;
1692 return "{\\tt $str}";
1696 sub _funcname
1698 my ($gh, $str) = @_;
1699 return "{\\tt $str}";
1703 sub _varname
1705 my ($gh, $str) = @_;
1707 $str =~ s/^\$/\\\$/;
1709 return "{\\tt $str}";
1713 sub _new_mode
1715 my ($gh, $mode, $params) = @_;
1717 # mode equivalences
1718 my %latex_modes = (
1719 'pre' => 'verbatim',
1720 'blockquote' => 'quote',
1721 'table' => 'tabular',
1722 'dl' => 'description',
1723 'ul' => 'itemize',
1724 'ol' => 'enumerate'
1727 if ($mode ne $gh->{'-mode'}) {
1728 # close previous mode
1729 if ($gh->{'-mode'} eq 'ul') {
1730 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1732 elsif ($gh->{'-mode'} eq 'ol') {
1733 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1735 elsif ($gh->{'-mode'} eq 'table') {
1736 $gh->_push("\\end{tabular}\n");
1738 else {
1739 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1740 if $gh->{'-mode'};
1743 # send new one
1744 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1745 if $mode;
1747 $gh->{'-mode'} = $mode;
1749 $gh->{'-ul-levels'} = undef;
1750 $gh->{'-ol-levels'} = undef;
1755 sub _dl
1757 my ($gh, $str) = @_;
1759 $gh->_new_mode('dl');
1760 return "\\item[$str]\n";
1764 sub _ul
1766 my ($gh, $levels) = @_;
1767 my ($ret);
1769 $ret = '';
1771 if ($levels > 0) {
1772 $ret .= "\\begin{itemize}\n";
1774 elsif ($levels < 0) {
1775 $ret .= "\\end{itemize}\n" x abs($levels);
1778 $gh->{'-mode'} = 'ul';
1780 $ret .= "\\item\n";
1782 return $ret;
1786 sub _ol
1788 my ($gh, $levels) = @_;
1789 my ($ret);
1791 $ret = '';
1793 if ($levels > 0) {
1794 $ret .= "\\begin{enumerate}\n";
1796 elsif ($levels < 0) {
1797 $ret .= "\\end{enumerate}\n" x abs($levels);
1800 $gh->{'-mode'} = 'ol';
1802 $ret .= "\\item\n";
1804 return $ret;
1808 sub _blockquote
1810 my ($gh) = @_;
1812 $gh->_new_mode('blockquote');
1813 return "``";
1817 sub _hr
1819 my ($gh) = @_;
1821 return "------------\n";
1825 sub _heading
1827 my ($gh, $level, $l) = @_;
1829 my @latex_headings = ( "\\section*{", "\\subsection*{",
1830 "\\subsubsection*{");
1832 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1834 return $l;
1838 sub _table
1840 my ($gh,$str) = @_;
1842 if ($gh->{'-mode'} eq 'table') {
1843 my ($class) = '';
1844 my (@spans) = $gh->_calc_col_span($str);
1845 my (@cols);
1847 $str = '';
1849 # build columns
1850 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1851 my ($i, $s);
1853 $i = ${$gh->{'-table'}}[$n];
1854 $i = "&nbsp;" if $i =~ /^\s*$/;
1856 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1858 # multispan columns
1859 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1860 if $spans[$n] > 1;
1862 $i =~ s/\s{2,}/ /g;
1863 $i =~ s/^\s+//;
1864 $i =~ s/\s+$//;
1866 push(@cols, $i);
1869 $str .= join('&', @cols) . "\\\\\n\\hline";
1871 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1873 @{$gh->{'-table'}} = ();
1874 $gh->{'-tbl-row'}++;
1876 else {
1877 # new table
1879 # count the number of columns
1880 $str =~ s/[^\+]//g;
1881 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1883 $gh->_push();
1884 $gh->_new_mode('table', $params);
1886 @{$gh->{'-table'}} = ();
1887 $gh->{'-tbl-row'} = 1;
1888 $str = '';
1891 return $str;
1895 sub _postfix
1897 my ($gh) = @_;
1899 $gh->_push("\\end{document}");
1903 ###########################################################
1904 # RTF Driver
1906 package Grutatxt::rtf;
1908 @ISA = ("Grutatxt");
1910 =head2 RTF Driver
1912 The additional parameters for a new Grutatxt object are:
1914 =over 4
1916 =item I<normal-size>
1918 The point size of normal text. By default is 20.
1920 =item I<heading-sizes>
1922 This argument must be a reference to an array containing
1923 the size in points of the 3 different heading levels. By
1924 default, level sizes are [ 34, 30, 28 ].
1926 =back
1928 =cut
1930 sub new
1932 my ($class, %args) = @_;
1933 my ($gh);
1935 bless(\%args, $class);
1936 $gh = \%args;
1938 $gh->{'-process-urls'} = 0;
1940 $gh->{'heading-sizes'} ||= [ 34, 30, 28 ];
1941 $gh->{'normal-size'} ||= 20;
1943 return $gh;
1947 sub _prefix
1949 my $gh = shift;
1951 $gh->_push('{\rtf1\ansi {\plain \fs' . $gh->{'normal-size'} . ' \sa227');
1955 sub _empty_line
1957 my $gh = shift;
1959 return '\par';
1963 sub _heading
1965 my ($gh, $level, $l) = @_;
1967 return '{\b \fs' . $gh->{'heading-sizes'}->[$level] . ' ' . $l . '}';
1971 sub _strong
1973 my ($gh, $str) = @_;
1974 return "{\\b $str}";
1978 sub _em
1980 my ($gh, $str) = @_;
1981 return "{\\i $str}";
1985 sub _code
1987 my ($gh, $str) = @_;
1988 return "{\\tt $str}";
1992 sub _ul
1994 my ($gh, $levels) = @_;
1996 $gh->_new_mode('ul');
1997 return "{{\\bullet \\li" . $levels . ' ';
2001 sub _dl
2003 my ($gh, $str) = @_;
2005 $gh->_new_mode('dl');
2006 return "{{\\b $str \\par} {\\li566 ";
2010 sub _new_mode
2012 my ($gh, $mode, $params) = @_;
2014 if ($mode ne $gh->{'-mode'}) {
2015 if ($gh->{'-mode'} =~ /^(dl|ul)$/) {
2016 $gh->_push('}}');
2019 $gh->{'-mode'} = $mode;
2021 $gh->{'-ul-levels'} = undef;
2022 $gh->{'-ol-levels'} = undef;
2024 else {
2025 if ($mode =~ /^(dl|ul)$/) {
2026 $gh->_push('}\par}');
2032 sub _postfix
2034 my $gh = shift;
2036 @{$gh->{o}} = map { $_ . ' '; } @{$gh->{o}};
2038 $gh->_push('}}');
2042 =head1 AUTHOR
2044 Angel Ortega angel@triptico.com
2046 =cut