New file .my-git-push.
[grutatxt.git] / Grutatxt.pm
blob33f645f4022a00948cb22bed1559ad1740cdaf3c
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2011 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.17-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 new
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with two element arrayrefs with the level as first
115 argument and the heading as second.
117 This information can be used to build a table of contents
118 of the processed text.
120 =item I<strip-parens>
122 Function names in the Grutatxt markup are strings of
123 alphanumeric characters immediately followed by a pair
124 of open and close parentheses. If this boolean value is
125 set, function names found in the processed text will have
126 their parentheses deleted.
128 =item I<strip-dollars>
130 Variable names in the Grutatxt markup are strings of
131 alphanumeric characters preceded by a dollar sign.
132 If this boolean value is set, variable names found in
133 the processed text will have the dollar sign deleted.
135 =item I<abstract>
137 The I<abstract> of a Grutatxt document is the fragment of text
138 from the beginning of the document to the end of the first
139 paragraph after the title. If I<abstract> is specified as a
140 reference to scalar, it will contain (after each call to the
141 B<process()> method) the subscript of the element of the output
142 array that marks the end of the subject.
144 =item I<no-pure-verbatim>
146 Since version 2.0.15, text effects as italics and bold are not
147 processed in I<verbatim> (preformatted) mode. If you want to
148 revert to the old behaviour, use this option.
150 =item I<toc>
152 If set, a table of contents will be generated after the abstract.
153 The table of contents will be elaborated using headings from 2
154 and 3 levels.
156 =back
158 =cut
160 sub new
162 my ($class, %args) = @_;
163 my ($gh);
165 $args{'mode'} ||= 'HTML';
167 $class .= "::" . $args{'mode'};
169 $gh = new $class(%args);
171 return $gh;
175 sub escape
176 # escapes special characters, ignoring passthrough code
178 my ($gh, $l) = @_;
180 # splits between << and >>
181 my (@l) = split(/(<<|>>)/, $l);
183 @l = map {
184 my $l = $_;
186 # escape only text outside << and >>
187 unless ($l eq '<<' .. $l eq '>>') {
188 $l = $gh->_escape($l);
191 $_ = $l;
192 } @l;
194 # join again, stripping << and >>
195 $l = join('', grep(!/^(<<|>>)$/, @l));
197 return $l;
201 =head2 process
203 @output = $grutatxt->process($text);
205 Processes a text in Grutatxt format. The result is returned
206 as an array of lines.
208 =cut
210 sub process
212 my ($gh, $content) = @_;
213 my ($p);
215 # clean output
216 @{$gh->{'o'}} = ();
218 # clean title and paragraph numbers
219 $gh->{'-title'} = '';
220 $gh->{'-p'} = 0;
222 # clean marks
223 if (!defined $gh->{marks}) {
224 $gh->{marks} = [];
227 @{$gh->{'marks'}} = ();
229 # clean index
230 if (!$gh->{index}) {
231 $gh->{index} = [];
234 @{$gh->{'index'}} = ();
236 # reset abstract line
237 if (!$gh->{abstract}) {
238 $gh->{abstract} = \$gh->{_abstract};
241 ${$gh->{'abstract'}} = 0;
243 # insert prefix
244 $gh->_prefix();
246 $gh->{'-mode'} = undef;
248 foreach my $l (split(/\n/,$content)) {
249 # inline data (passthrough)
250 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
251 $gh->_inline($l);
252 next;
255 # marks
256 if ($l =~ /^\s*<\->\s*$/) {
257 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
258 if ref($gh->{'marks'});
260 next;
263 # TOC mark
264 if ($l =~ /^\s*<\?>\s*$/) {
265 $gh->{toc} = $gh->{_toc_pos} = scalar(@{$gh->{o}});
266 next;
269 # escape possibly dangerous characters
270 $l = $gh->escape($l);
272 # empty lines
273 $l =~ s/^\r$//ge;
274 if ($l =~ s/^$/$gh->_empty_line()/ge) {
275 # mark the abstract end
276 if ($gh->{'-title'}) {
277 $gh->{'-p'}++;
279 # mark abstract if it's the
280 # second paragraph from the title
281 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
282 if $gh->{'-p'} == 2;
286 # line-mutating process
287 my $ol = $l;
289 if ($gh->{'-process-urls'}) {
290 # URLs followed by a parenthesized phrase
291 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
292 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
293 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
294 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
295 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
296 $l =~ s/(mailto:\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
298 # URLs without phrase
299 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
300 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
301 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
302 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
303 $l =~ s/([^=][^\"])(mailto:)(\S+)/$1.$gh->_url($2.$3,$3)/ge;
305 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
306 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
307 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
308 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
311 # change '''text''' and *text* into strong emphasis
312 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
313 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
314 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
316 # change ''text'' and _text_ into emphasis
317 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
318 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
319 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
321 # change `text' into code
322 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
324 # enclose function names
325 if ($gh->{'strip-parens'}) {
326 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
328 else {
329 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
332 # enclose variable names
333 if ($gh->{'strip-dollars'}) {
334 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
336 else {
337 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
341 # main switch
344 # definition list
345 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
346 $gh->{'-mode-elems'} ++;
349 # unsorted list
350 elsif ($gh->{'-mode'} ne 'pre' and
351 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
352 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
353 $gh->{'-mode-elems'} ++;
356 # sorted list
357 elsif ($gh->{'-mode'} ne 'pre' and
358 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
359 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
360 $gh->{'-mode-elems'} ++;
363 # quoted block
364 elsif ($gh->{'-mode'} ne 'pre' and
365 $l =~ s/^\s\"/$gh->_blockquote()/e) {
368 # table rows
369 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
370 $gh->{'-mode-elems'} ++;
373 # table heading / end of row
374 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
377 # preformatted text
378 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
379 if ($gh->{'-mode'} eq 'pre' &&
380 !$gh->{'no-pure-verbatim'}) {
381 # set line back to original
382 $l = $ol;
386 # anything else
387 else {
388 # back to normal mode
389 $gh->_new_mode(undef);
392 # 1 level heading
393 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
395 # 2 level heading
396 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
398 # 3 level heading
399 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
401 # change ------ into hr
402 $l =~ s/^----*$/$gh->_hr()/e;
404 # push finally
405 $gh->_push($l) if $l;
408 # flush
409 $gh->_new_mode(undef);
411 # postfix
412 $gh->_postfix();
414 # set title
415 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
417 # set abstract, if not set
418 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
419 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
421 # travel all lines again, post-escaping
422 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
424 # add TOC after first paragraph
425 if ($gh->{toc} && @{$gh->{o}}) {
426 my $p = $gh->{_toc_pos} ||
427 $gh->{marks}->[0] ||
428 ${$gh->{abstract}};
430 @{$gh->{o}} = (@{$gh->{o}}[0 .. $p],
431 $gh->_toc(),
432 @{$gh->{o}}[$p + 1 ..
433 scalar(@{$gh->{o}})]);
436 return @{$gh->{'o'}};
440 =head2 process_file
442 @output = $grutatxt->process_file($filename);
444 Processes a file in Grutatxt format.
446 =cut
448 sub process_file
450 my ($gh, $file) = @_;
452 open F, $file or return(undef);
454 my ($content) = join('',<F>);
455 close F;
457 return $gh->process($content);
461 sub _push
463 my ($gh, $l) = @_;
465 push(@{$gh->{'o'}},$l);
469 sub _process_heading
471 my ($gh, $level, $hd) = @_;
472 my $l;
473 my $is_title = 0;
475 $l = pop(@{$gh->{'o'}});
477 if ($l eq $gh->_empty_line()) {
478 $gh->_push($l);
479 return $hd;
482 # store title
483 if ($level == 1 and not $gh->{'-title'}) {
484 $gh->{'-title'} = $l;
485 $is_title = 1;
488 # store index
489 if (ref($gh->{'index'})) {
490 push(@{$gh->{'index'}}, [ $level, $l ]);
493 return $gh->_heading($level, $l, $is_title);
497 sub _calc_col_span
499 my ($gh, $l) = @_;
500 my (@spans);
502 # strip first + and all -
503 $l =~ s/^\+//;
504 $l =~ s/-//g;
506 my ($t) = 1; @spans = ();
507 for (my $n = 0; $n < length($l); $n++) {
508 if (substr($l, $n, 1) eq '+') {
509 push(@spans, $t);
510 $t = 1;
512 else {
513 # it's a colspan mark:
514 # increment
515 $t++;
519 return @spans;
523 sub _table_row
525 my ($gh, $str) = @_;
527 my @s = split(/\|/,$str);
529 for (my $n = 0; $n < scalar(@s); $n++) {
530 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
533 push(@{$gh->{'-table-raw'}}, $str);
535 return '';
539 sub _pre
541 my ($gh, $l) = @_;
543 # if any other mode is active, add to it
544 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
545 $l =~ s/^\s+//;
547 my ($a) = pop(@{$gh->{'o'}})." ".$l;
548 $gh->_push($a);
549 $l = '';
551 else {
552 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
553 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
555 $gh->_new_mode('pre');
558 return $l;
562 sub _multilevel_list
564 my ($gh, $str, $ind) = @_;
565 my (@l,$level);
567 @l = @{$gh->{$str}};
568 $ind = length($ind);
569 $level = 0;
571 if ($l[-1] < $ind) {
572 # if last level is less indented, increase
573 # nesting level
574 push(@l, $ind);
575 $level++;
577 elsif ($l[-1] > $ind) {
578 # if last level is more indented, decrease
579 # levels until the same is found (or back to
580 # the beginning if not)
581 while (pop(@l)) {
582 $level--;
583 last if $l[-1] == $ind;
587 $gh->{$str} = \@l;
589 return $level;
593 sub _unsorted_list
595 my ($gh, $ind) = @_;
597 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
601 sub _ordered_list
603 my ($gh, $ind) = @_;
605 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
609 # empty stubs for falling through the superclass
611 sub _inline { my ($gh, $l) = @_; $l; }
612 sub _escape { my ($gh, $l) = @_; $l; }
613 sub _escape_post { my ($gh, $l) = @_; $l; }
614 sub _empty_line { my ($gh) = @_; ''; }
615 sub _url { my ($gh, $url, $label) = @_; ''; }
616 sub _strong { my ($gh, $str) = @_; $str; }
617 sub _em { my ($gh, $str) = @_; $str; }
618 sub _code { my ($gh, $str) = @_; $str; }
619 sub _funcname { my ($gh, $str) = @_; $str; }
620 sub _varname { my ($gh, $str) = @_; $str; }
621 sub _new_mode { my ($gh, $mode) = @_; }
622 sub _dl { my ($gh, $str) = @_; $str; }
623 sub _ul { my ($gh, $level) = @_; ''; }
624 sub _ol { my ($gh, $level) = @_; ''; }
625 sub _blockquote { my ($gh, $str) = @_; $str; }
626 sub _hr { my ($gh) = @_; ''; }
627 sub _heading { my ($gh, $level, $l) = @_; $l; }
628 sub _table { my ($gh, $str) = @_; $str; }
629 sub _prefix { my ($gh) = @_; }
630 sub _postfix { my ($gh) = @_; }
631 sub _toc { my ($gh) = @_; return (); }
633 ###########################################################
635 =head1 DRIVER SPECIFIC INFORMATION
637 =cut
639 ###########################################################
640 # HTML Driver
642 package Grutatxt::HTML;
644 @ISA = ("Grutatxt");
646 =head2 HTML Driver
648 The additional parameters for a new Grutatxt object are:
650 =over 4
652 =item I<table-headers>
654 If this boolean value is set, the first row in tables
655 is assumed to be the heading and rendered using 'th'
656 instead of 'td' tags.
658 =item I<center-tables>
660 If this boolean value is set, tables are centered.
662 =item I<expand-tables>
664 If this boolean value is set, tables are expanded (width 100%).
666 =item I<dl-as-dl>
668 If this boolean value is set, definition lists will be
669 rendered using 'dl', 'dt' and 'dd' instead of tables.
671 =item I<header-offset>
673 Offset to be summed to the heading level when rendering
674 'h?' tags (default is 0).
676 =item I<class-oddeven>
678 If this boolean value is set, tables will be rendered
679 with an "oddeven" CSS class, and rows alternately classed
680 as "even" or "odd". If it's not set, no CSS class info
681 is added to tables.
683 =item I<url-label-max>
685 If an URL without label is given (that is, the URL itself
686 is used as the label), it's trimmed to have as much
687 characters as this value says. By default it's 80.
689 =back
691 =cut
693 sub new
695 my ($class, %args) = @_;
696 my ($gh);
698 bless(\%args, $class);
699 $gh = \%args;
701 $gh->{'-process-urls'} = 1;
702 $gh->{'url-label-max'} ||= 80;
704 return $gh;
708 sub _inline
710 my ($gh, $l) = @_;
712 # accept unnamed and HTML inlines
713 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
714 $gh->{'-inline'} = 'HTML';
715 return;
718 if ($l =~ /^>>$/) {
719 delete $gh->{'-inline'};
720 return;
723 if ($gh->{'-inline'} eq 'HTML') {
724 $gh->_push($l);
729 sub _escape
731 my ($gh, $l) = @_;
733 $l =~ s/&/&amp;/g;
734 $l =~ s/</&lt;/g;
735 $l =~ s/>/&gt;/g;
737 return $l;
741 sub _empty_line
743 my ($gh) = @_;
745 return('<p>');
749 sub _url
751 my ($gh, $url, $label) = @_;
753 if (!$label) {
754 $label = $url;
756 if (length($label) > $gh->{'url-label-max'}) {
757 $label = substr($label, 0,
758 $gh->{'url-label-max'}) . '...';
762 return "<a href = \"$url\">$label</a>";
766 sub _strong
768 my ($gh, $str) = @_;
769 return "<strong>$str</strong>";
773 sub _em
775 my ($gh, $str) = @_;
776 return "<em>$str</em>";
780 sub _code
782 my ($gh, $str) = @_;
783 return "<code class = 'literal'>$str</code>";
787 sub _funcname
789 my ($gh, $str) = @_;
790 return "<code class = 'funcname'>$str</code>";
794 sub _varname
796 my ($gh, $str) = @_;
797 return "<code class = 'var'>$str</code>";
801 sub _new_mode
803 my ($gh, $mode, $params) = @_;
805 if ($mode ne $gh->{'-mode'}) {
806 my $tag;
808 # clean list levels
809 if ($gh->{'-mode'} eq 'ul') {
810 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
812 elsif ($gh->{'-mode'} eq 'ol') {
813 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
815 elsif ($gh->{'-mode'}) {
816 $gh->_push("</$gh->{'-mode'}>");
819 # send new one
820 $tag = $params ? "<$mode $params>" : "<$mode>";
821 $gh->_push($tag) if $mode;
823 $gh->{'-mode'} = $mode;
824 $gh->{'-mode-elems'} = 0;
826 # clean previous lists
827 $gh->{'-ul-levels'} = undef;
828 $gh->{'-ol-levels'} = undef;
833 sub _dl
835 my ($gh, $str) = @_;
836 my ($ret) = '';
838 if ($gh->{'dl-as-dl'}) {
839 $gh->_new_mode('dl');
840 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
842 else {
843 $gh->_new_mode('table');
844 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
847 return $ret;
851 sub _ul
853 my ($gh, $levels) = @_;
854 my ($ret);
856 $ret = '';
858 if ($levels > 0) {
859 $ret .= '<ul>';
861 elsif ($levels < 0) {
862 $ret .= '</li></ul>' x abs($levels);
865 if ($gh->{'-mode'} ne 'ul') {
866 $gh->{'-mode'} = 'ul';
868 else {
869 $ret .= '</li>' if $levels <= 0;
872 $ret .= '<li>';
874 return $ret;
878 sub _ol
880 my ($gh, $levels) = @_;
881 my ($ret);
883 $ret = '';
885 if ($levels > 0) {
886 $ret .= '<ol>';
888 elsif ($levels < 0) {
889 $ret .= '</li></ol>' x abs($levels);
892 if ($gh->{'-mode'} ne 'ol') {
893 $gh->{'-mode'} = 'ol';
895 else {
896 $ret .= '</li>' if $levels <= 0;
899 $ret .= '<li>';
901 return $ret;
905 sub _blockquote
907 my ($gh) = @_;
909 $gh->_new_mode('blockquote');
910 return "\"";
914 sub _hr
916 my ($gh) = @_;
918 return "<hr size = '1' noshade = 'noshade'>";
922 sub __mkanchor
924 my $gh = shift;
925 my $a = shift;
927 $a = lc($a);
928 $a =~ s/[\"\'\/]//g;
929 $a =~ s/\s/_/g;
930 $a =~ s/<[^>]+>//g;
932 return $a;
936 sub _heading
938 my ($gh, $level, $l, $title) = @_;
940 # creates a valid anchor
941 my $a = $gh->__mkanchor($l);
943 $l = sprintf(
944 "<a %s name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
945 $title ? "class = 'title'" : '',
947 $level + $gh->{'header-offset'},
949 $level + $gh->{'header-offset'}
952 return $l;
956 sub _table
958 my ($gh, $str) = @_;
960 if ($gh->{'-mode'} eq 'table') {
961 my ($class) = '';
962 my (@spans) = $gh->_calc_col_span($str);
964 # calculate CSS class, if any
965 if ($gh->{'class-oddeven'}) {
966 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
969 $str = "<tr $class>";
971 # build columns
972 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
973 my ($i,$s);
975 $i = ${$gh->{'-table'}}[$n];
976 $i = "&nbsp;" if $i =~ /^\s*$/;
978 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
980 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
981 $str .= "<th $class $s>$i</th>";
983 else {
984 $str .= "<td $class $s>$i</td>";
988 $str .= '</tr>';
990 @{$gh->{'-table'}} = ();
991 $gh->{'-tbl-row'}++;
993 else {
994 # new table
995 my ($params);
997 $params = "border = '1'";
998 $params .= " width = '100\%'" if $gh->{'expand-tables'};
999 $params .= " align = 'center'" if $gh->{'center-tables'};
1000 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
1002 $gh->_new_mode('table', $params);
1004 @{$gh->{'-table'}} = ();
1005 $gh->{'-tbl-row'} = 1;
1006 $str = '';
1009 return $str;
1013 sub _toc
1015 my $gh = shift;
1016 my @t = ();
1018 push(@t, "<div class = 'TOC'>");
1020 my $l = 0;
1022 foreach my $e (@{$gh->{index}}) {
1023 # ignore level 1 headings
1024 if ($e->[0] == 1) {
1025 next;
1028 if ($l < $e->[0]) {
1029 push(@t, '<ol>');
1031 elsif ($l > $e->[0]) {
1032 push(@t, '</ol>');
1035 $l = $e->[0];
1037 push(@t, sprintf("<li><a href = '#%s'>%s</a></li>",
1038 $gh->__mkanchor($e->[1]), $e->[1]));
1041 while (--$l) {
1042 push(@t, '</ol>');
1045 push(@t, "</div>");
1047 return @t;
1050 ###########################################################
1051 # troff Driver
1053 package Grutatxt::troff;
1055 @ISA = ("Grutatxt");
1057 =head2 troff Driver
1059 The troff driver uses the B<-me> macros and B<tbl>. A
1060 good way to post-process this output (to PostScript in
1061 the example) could be by using
1063 groff -t -me -Tps
1065 The additional parameters for a new Grutatxt object are:
1067 =over 4
1069 =item I<normal-size>
1071 The point size of normal text. By default is 10.
1073 =item I<heading-sizes>
1075 This argument must be a reference to an array containing
1076 the size in points of the 3 different heading levels. By
1077 default, level sizes are [ 20, 18, 15 ].
1079 =item I<table-type>
1081 The type of table to be rendered by B<tbl>. Can be
1082 I<allbox> (all lines rendered; this is the default value),
1083 I<box> (only outlined) or I<doublebox> (only outlined by
1084 a double line).
1086 =back
1088 =cut
1090 sub new
1092 my ($class, %args) = @_;
1093 my ($gh);
1095 bless(\%args,$class);
1096 $gh = \%args;
1098 $gh->{'-process-urls'} = 0;
1100 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
1101 $gh->{'normal-size'} ||= 10;
1102 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
1104 return $gh;
1108 sub _prefix
1110 my ($gh) = @_;
1112 $gh->_push(".nr pp $gh->{'normal-size'}");
1113 $gh->_push(".nh");
1117 sub _inline
1119 my ($gh,$l) = @_;
1121 # accept only troff inlines
1122 if ($l =~ /^<<\s*troff$/i) {
1123 $gh->{'-inline'} = 'troff';
1124 return;
1127 if ($l =~ /^>>$/) {
1128 delete $gh->{'-inline'};
1129 return;
1132 if ($gh->{'-inline'} eq 'troff') {
1133 $gh->_push($l);
1138 sub _escape
1140 my ($gh,$l) = @_;
1142 $l =~ s/\\/\\\\/g;
1143 $l =~ s/^'/\\&'/;
1145 return $l;
1149 sub _empty_line
1151 my ($gh) = @_;
1153 return '.lp';
1157 sub _strong
1159 my ($gh, $str) = @_;
1160 return "\\fB$str\\fP";
1164 sub _em
1166 my ($gh, $str) = @_;
1167 return "\\fI$str\\fP";
1171 sub _code
1173 my ($gh, $str) = @_;
1174 return "\\fI$str\\fP";
1178 sub _funcname
1180 my ($gh, $str) = @_;
1181 return "\\fB$str\\fP";
1185 sub _varname
1187 my ($gh, $str) = @_;
1188 return "\\fI$str\\fP";
1192 sub _new_mode
1194 my ($gh, $mode, $params) = @_;
1196 if ($mode ne $gh->{'-mode'}) {
1197 my $tag;
1199 # flush previous list
1200 if ($gh->{'-mode'} eq 'pre') {
1201 $gh->_push('.)l');
1203 elsif ($gh->{'-mode'} eq 'table') {
1204 chomp($gh->{'-table-head'});
1205 $gh->{'-table-head'} =~ s/\s+$//;
1206 $gh->_push($gh->{'-table-head'} . '.');
1207 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1209 elsif ($gh->{'-mode'} eq 'blockquote') {
1210 $gh->_push('.)q');
1213 # send new one
1214 if ($mode eq 'pre') {
1215 $gh->_push('.(l L');
1217 elsif ($mode eq 'blockquote') {
1218 $gh->_push('.(q');
1221 $gh->{'-mode'} = $mode;
1226 sub _dl
1228 my ($gh, $str) = @_;
1230 $gh->_new_mode('dl');
1231 return ".ip \"$str\"\n";
1235 sub _ul
1237 my ($gh) = @_;
1239 $gh->_new_mode('ul');
1240 return ".bu\n";
1244 sub _ol
1246 my ($gh) = @_;
1248 $gh->_new_mode('ol');
1249 return ".np\n";
1253 sub _blockquote
1255 my ($gh) = @_;
1257 $gh->_new_mode('blockquote');
1258 return "\"";
1262 sub _hr
1264 my ($gh) = @_;
1266 return '.hl';
1270 sub _heading
1272 my ($gh, $level, $l) = @_;
1274 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1276 return $l;
1280 sub _table
1282 my ($gh, $str) = @_;
1284 if ($gh->{'-mode'} eq 'table') {
1285 my ($h, $b);
1286 my (@spans) = $gh->_calc_col_span($str);
1288 # build columns
1289 $h = '';
1290 $b = '';
1291 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1292 my ($i);
1294 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1295 $h .= 'cB ';
1297 else {
1298 $h .= 'l ';
1301 # add span columns
1302 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1304 $b .= '#' if $n;
1306 $i = ${$gh->{'-table'}}[$n];
1307 $i =~ s/^\s+//;
1308 $i =~ s/\s+$//;
1309 $i =~ s/(\s)+/$1/g;
1310 $b .= $i;
1313 # add a separator
1314 $b .= "\n_" if $gh->{'table-headers'} and
1315 $gh->{'-tbl-row'} == 1 and
1316 $gh->{'table-type'} ne "allbox";
1318 $gh->{'-table-head'} .= "$h\n";
1319 $gh->{'-table-body'} .= "$b\n";
1321 @{$gh->{'-table'}} = ();
1322 $gh->{'-tbl-row'}++;
1324 else {
1325 # new table
1326 $gh->_new_mode('table');
1328 @{$gh->{'-table'}} = ();
1329 $gh->{'-tbl-row'} = 1;
1331 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1332 $gh->{'-table-body'} = '';
1335 $str = '';
1336 return $str;
1340 sub _postfix
1342 my ($gh) = @_;
1344 # add to top headings and footers
1345 unshift(@{$gh->{'o'}},".ef '\%' ''");
1346 unshift(@{$gh->{'o'}},".of '' '\%'");
1347 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1348 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1352 ###########################################################
1353 # man Driver
1355 package Grutatxt::man;
1357 @ISA = ("Grutatxt::troff", "Grutatxt");
1359 =head2 man Driver
1361 The man driver is used to generate Unix-like man pages. Note that
1362 all headings have the same level with this output driver.
1364 The additional parameters for a new Grutatxt object are:
1366 =over 4
1368 =item I<section>
1370 The man page section (see man documentation). By default is 1.
1372 =item I<page-name>
1374 The name of the page. This is usually the name of the program
1375 or function the man page is documenting and will be shown in the
1376 page header. By default is the empty string.
1378 =back
1380 =cut
1382 sub new
1384 my ($class, %args) = @_;
1385 my ($gh);
1387 bless(\%args,$class);
1388 $gh = \%args;
1390 $gh->{'-process-urls'} = 0;
1392 $gh->{'section'} ||= 1;
1393 $gh->{'page-name'} ||= "";
1395 return $gh;
1399 sub _prefix
1401 my ($gh) = @_;
1403 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1407 sub _inline
1409 my ($gh, $l) = @_;
1411 # accept only man markup inlines
1412 if ($l =~ /^<<\s*man$/i) {
1413 $gh->{'-inline'} = 'man';
1414 return;
1417 if ($l =~ /^>>$/) {
1418 delete $gh->{'-inline'};
1419 return;
1422 if ($gh->{'-inline'} eq 'man') {
1423 $gh->_push($l);
1428 sub _empty_line
1430 my ($gh) = @_;
1432 return ' ';
1436 sub _new_mode
1438 my ($gh,$mode,$params) = @_;
1440 if ($mode ne $gh->{'-mode'}) {
1441 my $tag;
1443 # flush previous list
1444 if ($gh->{'-mode'} eq 'pre' or
1445 $gh->{'-mode'} eq 'table') {
1446 $gh->_push('.fi');
1449 if ($gh->{'-mode'} eq 'blockquote') {
1450 $gh->_push('.RE');
1453 if ($gh->{'-mode'} eq 'ul') {
1454 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1457 if ($gh->{'-mode'} eq 'ol') {
1458 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1461 # send new one
1462 if ($mode eq 'pre' or $mode eq 'table') {
1463 $gh->_push('.nf');
1466 if ($mode eq 'blockquote') {
1467 $gh->_push('.RS 4');
1470 $gh->{'-mode'} = $mode;
1475 sub _dl
1477 my ($gh, $str) = @_;
1479 $gh->_new_mode('dl');
1480 return ".TP\n.B \"$str\"\n";
1484 sub _ul
1486 my ($gh, $levels) = @_;
1487 my ($ret) = '';
1489 if ($levels > 0) {
1490 $ret = ".RS 4\n";
1492 elsif ($levels < 0) {
1493 $ret = ".RE\n" x abs($levels);
1496 $gh->_new_mode('ul');
1497 return $ret . ".TP 4\n\\(bu\n";
1501 sub _ol
1503 my ($gh, $levels) = @_;
1504 my $l = @{$gh->{'-ol-levels'}};
1505 my $ret = '';
1507 $gh->{'-ol-level'} += $levels;
1509 if ($levels > 0) {
1510 $ret = ".RS 4\n";
1512 $l[$gh->{'-ol-level'}] = 1;
1514 elsif ($levels < 0) {
1515 $ret = ".RE\n" x abs($levels);
1518 $gh->_new_mode('ol');
1519 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1521 return $ret;
1525 sub _hr
1527 my ($gh) = @_;
1529 return '';
1533 sub _heading
1535 my ($gh, $level, $l) = @_;
1537 # all headers are the same depth in man pages
1538 return ".SH \"" . uc($l) . "\"";
1542 sub _table
1544 my ($gh, $str) = @_;
1546 if ($gh->{'-mode'} eq 'table') {
1547 foreach my $r (@{$gh->{'-table-raw'}}) {
1548 $gh->_push("|$r|");
1551 else {
1552 $gh->_new_mode('table');
1555 @{$gh->{'-table'}} = ();
1556 @{$gh->{'-table-raw'}} = ();
1558 $gh->_push($str);
1560 return '';
1564 sub _postfix
1569 ###########################################################
1570 # latex Driver
1572 package Grutatxt::latex;
1574 @ISA = ("Grutatxt");
1576 =head2 LaTeX Driver
1578 The additional parameters for a new Grutatxt object are:
1580 =over 4
1582 =item I<docclass>
1584 The LaTeX document class. By default is 'report'. You can also use
1585 'article' or 'book' (consult your LaTeX documentation for details).
1587 =item I<papersize>
1589 The paper size to be used in the document. By default is 'a4paper'.
1591 =item I<encoding>
1593 The character encoding used in the document. By default is 'latin1'.
1595 =back
1597 Note that you can't nest further than 4 levels in LaTeX; if you do,
1598 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1600 =cut
1602 sub new
1604 my ($class, %args) = @_;
1605 my ($gh);
1607 bless(\%args,$class);
1608 $gh = \%args;
1610 $gh->{'-process-urls'} = 0;
1612 $gh->{'-docclass'} ||= 'report';
1613 $gh->{'-papersize'} ||= 'a4paper';
1614 $gh->{'-encoding'} ||= 'latin1';
1616 return $gh;
1620 sub _prefix
1622 my ($gh) = @_;
1624 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1625 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1627 $gh->_push("\\begin{document}");
1631 sub _inline
1633 my ($gh, $l) = @_;
1635 # accept only latex inlines
1636 if ($l =~ /^<<\s*latex$/i) {
1637 $gh->{'-inline'} = 'latex';
1638 return;
1641 if ($l =~ /^>>$/) {
1642 delete $gh->{'-inline'};
1643 return;
1646 if ($gh->{'-inline'} eq 'latex') {
1647 $gh->_push($l);
1652 sub _escape
1654 my ($gh, $l) = @_;
1656 $l =~ s/ _ / \\_ /g;
1657 $l =~ s/ ~ / \\~ /g;
1658 $l =~ s/ & / \\& /g;
1660 return $l;
1664 sub _escape_post
1666 my ($gh, $l) = @_;
1668 $l =~ s/ # / \\# /g;
1669 $l =~ s/^\\n$//g;
1670 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1672 return $l;
1676 sub _empty_line
1678 my ($gh) = @_;
1680 return "\\n";
1684 sub _strong
1686 my ($gh, $str) = @_;
1687 return "\\textbf{$str}";
1691 sub _em
1693 my ($gh, $str) = @_;
1694 return "\\emph{$str}";
1698 sub _code
1700 my ($gh, $str) = @_;
1701 return "{\\tt $str}";
1705 sub _funcname
1707 my ($gh, $str) = @_;
1708 return "{\\tt $str}";
1712 sub _varname
1714 my ($gh, $str) = @_;
1716 $str =~ s/^\$/\\\$/;
1718 return "{\\tt $str}";
1722 sub _new_mode
1724 my ($gh, $mode, $params) = @_;
1726 # mode equivalences
1727 my %latex_modes = (
1728 'pre' => 'verbatim',
1729 'blockquote' => 'quote',
1730 'table' => 'tabular',
1731 'dl' => 'description',
1732 'ul' => 'itemize',
1733 'ol' => 'enumerate'
1736 if ($mode ne $gh->{'-mode'}) {
1737 # close previous mode
1738 if ($gh->{'-mode'} eq 'ul') {
1739 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1741 elsif ($gh->{'-mode'} eq 'ol') {
1742 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1744 elsif ($gh->{'-mode'} eq 'table') {
1745 $gh->_push("\\end{tabular}\n");
1747 else {
1748 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1749 if $gh->{'-mode'};
1752 # send new one
1753 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1754 if $mode;
1756 $gh->{'-mode'} = $mode;
1758 $gh->{'-ul-levels'} = undef;
1759 $gh->{'-ol-levels'} = undef;
1764 sub _dl
1766 my ($gh, $str) = @_;
1768 $gh->_new_mode('dl');
1769 return "\\item[$str]\n";
1773 sub _ul
1775 my ($gh, $levels) = @_;
1776 my ($ret);
1778 $ret = '';
1780 if ($levels > 0) {
1781 $ret .= "\\begin{itemize}\n";
1783 elsif ($levels < 0) {
1784 $ret .= "\\end{itemize}\n" x abs($levels);
1787 $gh->{'-mode'} = 'ul';
1789 $ret .= "\\item\n";
1791 return $ret;
1795 sub _ol
1797 my ($gh, $levels) = @_;
1798 my ($ret);
1800 $ret = '';
1802 if ($levels > 0) {
1803 $ret .= "\\begin{enumerate}\n";
1805 elsif ($levels < 0) {
1806 $ret .= "\\end{enumerate}\n" x abs($levels);
1809 $gh->{'-mode'} = 'ol';
1811 $ret .= "\\item\n";
1813 return $ret;
1817 sub _blockquote
1819 my ($gh) = @_;
1821 $gh->_new_mode('blockquote');
1822 return "``";
1826 sub _hr
1828 my ($gh) = @_;
1830 return "------------\n";
1834 sub _heading
1836 my ($gh, $level, $l) = @_;
1838 my @latex_headings = ( "\\section*{", "\\subsection*{",
1839 "\\subsubsection*{");
1841 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1843 return $l;
1847 sub _table
1849 my ($gh,$str) = @_;
1851 if ($gh->{'-mode'} eq 'table') {
1852 my ($class) = '';
1853 my (@spans) = $gh->_calc_col_span($str);
1854 my (@cols);
1856 $str = '';
1858 # build columns
1859 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1860 my ($i, $s);
1862 $i = ${$gh->{'-table'}}[$n];
1863 $i = "&nbsp;" if $i =~ /^\s*$/;
1865 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1867 # multispan columns
1868 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1869 if $spans[$n] > 1;
1871 $i =~ s/\s{2,}/ /g;
1872 $i =~ s/^\s+//;
1873 $i =~ s/\s+$//;
1875 push(@cols, $i);
1878 $str .= join('&', @cols) . "\\\\\n\\hline";
1880 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1882 @{$gh->{'-table'}} = ();
1883 $gh->{'-tbl-row'}++;
1885 else {
1886 # new table
1888 # count the number of columns
1889 $str =~ s/[^\+]//g;
1890 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1892 $gh->_push();
1893 $gh->_new_mode('table', $params);
1895 @{$gh->{'-table'}} = ();
1896 $gh->{'-tbl-row'} = 1;
1897 $str = '';
1900 return $str;
1904 sub _postfix
1906 my ($gh) = @_;
1908 $gh->_push("\\end{document}");
1912 ###########################################################
1913 # RTF Driver
1915 package Grutatxt::rtf;
1917 @ISA = ("Grutatxt");
1919 =head2 RTF Driver
1921 The additional parameters for a new Grutatxt object are:
1923 =over 4
1925 =item I<normal-size>
1927 The point size of normal text. By default is 20.
1929 =item I<heading-sizes>
1931 This argument must be a reference to an array containing
1932 the size in points of the 3 different heading levels. By
1933 default, level sizes are [ 34, 30, 28 ].
1935 =back
1937 =cut
1939 sub new
1941 my ($class, %args) = @_;
1942 my ($gh);
1944 bless(\%args, $class);
1945 $gh = \%args;
1947 $gh->{'-process-urls'} = 0;
1949 $gh->{'heading-sizes'} ||= [ 34, 30, 28 ];
1950 $gh->{'normal-size'} ||= 20;
1952 return $gh;
1956 sub _prefix
1958 my $gh = shift;
1960 $gh->_push('{\rtf1\ansi {\plain \fs' . $gh->{'normal-size'} . ' \sa227');
1964 sub _empty_line
1966 my $gh = shift;
1968 return '\par';
1972 sub _heading
1974 my ($gh, $level, $l) = @_;
1976 return '{\b \fs' . $gh->{'heading-sizes'}->[$level] . ' ' . $l . '}';
1980 sub _strong
1982 my ($gh, $str) = @_;
1983 return "{\\b $str}";
1987 sub _em
1989 my ($gh, $str) = @_;
1990 return "{\\i $str}";
1994 sub _code
1996 my ($gh, $str) = @_;
1997 return "{\\tt $str}";
2001 sub _ul
2003 my ($gh, $levels) = @_;
2005 $gh->_new_mode('ul');
2006 return "{{\\bullet \\li" . $levels . ' ';
2010 sub _dl
2012 my ($gh, $str) = @_;
2014 $gh->_new_mode('dl');
2015 return "{{\\b $str \\par} {\\li566 ";
2019 sub _new_mode
2021 my ($gh, $mode, $params) = @_;
2023 if ($mode ne $gh->{'-mode'}) {
2024 if ($gh->{'-mode'} =~ /^(dl|ul)$/) {
2025 $gh->_push('}}');
2028 $gh->{'-mode'} = $mode;
2030 $gh->{'-ul-levels'} = undef;
2031 $gh->{'-ol-levels'} = undef;
2033 else {
2034 if ($mode =~ /^(dl|ul)$/) {
2035 $gh->_push('}\par}');
2041 sub _postfix
2043 my $gh = shift;
2045 @{$gh->{o}} = map { $_ . ' '; } @{$gh->{o}};
2047 $gh->_push('}}');
2051 =head1 AUTHOR
2053 Angel Ortega angel@triptico.com
2055 =cut