Updated TODO.
[grutatxt.git] / Grutatxt.pm
blobe93dd8ab584bbc2e7279acaff6a2d117c7563225
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2008 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.15-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://www.triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 B<new>
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with strings in the format
116 level,heading
118 This information can be used to build a table of contents
119 of the processed text.
121 =item I<strip-parens>
123 Function names in the Grutatxt markup are strings of
124 alphanumeric characters immediately followed by a pair
125 of open and close parentheses. If this boolean value is
126 set, function names found in the processed text will have
127 their parentheses deleted.
129 =item I<strip-dollars>
131 Variable names in the Grutatxt markup are strings of
132 alphanumeric characters preceded by a dollar sign.
133 If this boolean value is set, variable names found in
134 the processed text will have the dollar sign deleted.
136 =item I<abstract>
138 The I<abstract> of a Grutatxt document is the fragment of text
139 from the beginning of the document to the end of the first
140 paragraph after the title. If I<abstract> is specified as a
141 reference to scalar, it will contain (after each call to the
142 B<process()> method) the subscript of the element of the output
143 array that marks the end of the subject.
145 =back
147 =cut
149 sub new
151 my ($class, %args) = @_;
152 my ($gh);
154 $args{'mode'} ||= 'HTML';
156 $class .= "::" . $args{'mode'};
158 $gh = new $class(%args);
160 return $gh;
164 sub escape
165 # escapes special characters, ignoring passthrough code
167 my ($gh, $l) = @_;
169 # splits between << and >>
170 my (@l) = split(/(<<|>>)/, $l);
172 @l = map {
173 my $l = $_;
175 # escape only text outside << and >>
176 unless ($l eq '<<' .. $l eq '>>') {
177 $l = $gh->_escape($l);
180 $_ = $l;
181 } @l;
183 # join again, stripping << and >>
184 $l = join('', grep(!/^(<<|>>)$/, @l));
186 return $l;
190 =head2 B<process>
192 @output = $grutatxt->process($text);
194 Processes a text in Grutatxt format. The result is returned
195 as an array of lines.
197 =cut
199 sub process
201 my ($gh, $content) = @_;
202 my ($p);
204 # clean output
205 @{$gh->{'o'}} = ();
207 # clean title and paragraph numbers
208 $gh->{'-title'} = '';
209 $gh->{'-p'} = 0;
211 # clean marks
212 @{$gh->{'marks'}} = () if ref($gh->{'marks'});
214 # clean index
215 @{$gh->{'index'}} = () if ref($gh->{'index'});
217 # reset abstract line
218 ${$gh->{'abstract'}} = 0 if ref($gh->{'abstract'});
220 # insert prefix
221 $gh->_prefix();
223 $gh->{'-mode'} = undef;
225 foreach my $l (split(/\n/,$content)) {
226 # inline data (passthrough)
227 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
228 $gh->_inline($l);
229 next;
232 # marks
233 if ($l =~ /^\s*<\->\s*$/) {
234 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
235 if ref($gh->{'marks'});
237 next;
240 # escape possibly dangerous characters
241 $l = $gh->escape($l);
243 # empty lines
244 $l =~ s/^\r$//ge;
245 if ($l =~ s/^$/$gh->_empty_line()/ge) {
246 # mark the abstract end
247 if ($gh->{'-title'}) {
248 $gh->{'-p'}++;
250 # mark abstract if it's the
251 # second paragraph from the title
252 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
253 if $gh->{'-p'} == 2;
257 # line-mutating process
258 my $ol = $l;
260 if ($gh->{'-process-urls'}) {
261 # URLs followed by a parenthesized phrase
262 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
263 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
264 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
265 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
266 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
268 # URLs without phrase
269 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
270 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
271 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
272 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
274 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
275 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
276 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
277 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
280 # change '''text''' and *text* into strong emphasis
281 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
282 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
283 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
285 # change ''text'' and _text_ into emphasis
286 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
287 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
288 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
290 # change `text' into code
291 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
293 # enclose function names
294 if ($gh->{'strip-parens'}) {
295 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
297 else {
298 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
301 # enclose variable names
302 if ($gh->{'strip-dollars'}) {
303 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
305 else {
306 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
310 # main switch
313 # definition list
314 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
315 $gh->{'-mode-elems'} ++;
318 # unsorted list
319 elsif ($gh->{'-mode'} ne 'pre' and
320 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
321 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
322 $gh->{'-mode-elems'} ++;
325 # sorted list
326 elsif ($gh->{'-mode'} ne 'pre' and
327 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
328 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
329 $gh->{'-mode-elems'} ++;
332 # quoted block
333 elsif ($l =~ s/^\s\"/$gh->_blockquote()/e) {
336 # table rows
337 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
338 $gh->{'-mode-elems'} ++;
341 # table heading / end of row
342 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
345 # preformatted text
346 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
347 if ($gh->{'-mode'} eq 'pre') {
348 # set line back to original
349 $l = $ol;
353 # anything else
354 else {
355 # back to normal mode
356 $gh->_new_mode(undef);
359 # 1 level heading
360 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
362 # 2 level heading
363 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
365 # 3 level heading
366 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
368 # change ------ into hr
369 $l =~ s/^----*$/$gh->_hr()/e;
371 # push finally
372 $gh->_push($l) if $l;
375 # flush
376 $gh->_new_mode(undef);
378 # postfix
379 $gh->_postfix();
381 # set title
382 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
384 # set abstract, if not set
385 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
386 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
388 # travel all lines again, post-escaping
389 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
391 return @{$gh->{'o'}};
395 =head2 B<process_file>
397 @output = $grutatxt->process_file($filename);
399 Processes a file in Grutatxt format.
401 =cut
403 sub process_file
405 my ($gh, $file) = @_;
407 open F, $file or return(undef);
409 my ($content) = join('',<F>);
410 close F;
412 return $gh->process($content);
416 sub _push
418 my ($gh, $l) = @_;
420 push(@{$gh->{'o'}},$l);
424 sub _process_heading
426 my ($gh, $level, $hd) = @_;
427 my ($l);
429 $l = pop(@{$gh->{'o'}});
431 if ($l eq $gh->_empty_line()) {
432 $gh->_push($l);
433 return $hd;
436 # store title
437 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
439 # store index
440 if (ref($gh->{'index'})) {
441 push(@{$gh->{'index'}},"$level,$l");
444 return $gh->_heading($level,$l);
448 sub _calc_col_span
450 my ($gh, $l) = @_;
451 my (@spans);
453 # strip first + and all -
454 $l =~ s/^\+//;
455 $l =~ s/-//g;
457 my ($t) = 1; @spans = ();
458 for (my $n = 0; $n < length($l); $n++) {
459 if (substr($l, $n, 1) eq '+') {
460 push(@spans, $t);
461 $t = 1;
463 else {
464 # it's a colspan mark:
465 # increment
466 $t++;
470 return @spans;
474 sub _table_row
476 my ($gh, $str) = @_;
478 my @s = split(/\|/,$str);
480 for (my $n = 0; $n < scalar(@s); $n++) {
481 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
484 push(@{$gh->{'-table-raw'}}, $str);
486 return '';
490 sub _pre
492 my ($gh, $l) = @_;
494 # if any other mode is active, add to it
495 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
496 $l =~ s/^\s+//;
498 my ($a) = pop(@{$gh->{'o'}})." ".$l;
499 $gh->_push($a);
500 $l = '';
502 else {
503 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
504 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
506 $gh->_new_mode('pre');
509 return $l;
513 sub _multilevel_list
515 my ($gh, $str, $ind) = @_;
516 my (@l,$level);
518 @l = @{$gh->{$str}};
519 $ind = length($ind);
520 $level = 0;
522 if ($l[-1] < $ind) {
523 # if last level is less indented, increase
524 # nesting level
525 push(@l, $ind);
526 $level++;
528 elsif ($l[-1] > $ind) {
529 # if last level is more indented, decrease
530 # levels until the same is found (or back to
531 # the beginning if not)
532 while (pop(@l)) {
533 $level--;
534 last if $l[-1] == $ind;
538 $gh->{$str} = \@l;
540 return $level;
544 sub _unsorted_list
546 my ($gh, $ind) = @_;
548 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
552 sub _ordered_list
554 my ($gh, $ind) = @_;
556 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
560 # empty stubs for falling through the superclass
562 sub _inline { my ($gh, $l) = @_; $l; }
563 sub _escape { my ($gh, $l) = @_; $l; }
564 sub _escape_post { my ($gh, $l) = @_; $l; }
565 sub _empty_line { my ($gh) = @_; ''; }
566 sub _url { my ($gh, $url, $label) = @_; ''; }
567 sub _strong { my ($gh, $str) = @_; $str; }
568 sub _em { my ($gh, $str) = @_; $str; }
569 sub _code { my ($gh, $str) = @_; $str; }
570 sub _funcname { my ($gh, $str) = @_; $str; }
571 sub _varname { my ($gh, $str) = @_; $str; }
572 sub _new_mode { my ($gh, $mode) = @_; }
573 sub _dl { my ($gh, $str) = @_; $str; }
574 sub _ul { my ($gh, $level) = @_; ''; }
575 sub _ol { my ($gh, $level) = @_; ''; }
576 sub _blockquote { my ($gh, $str) = @_; $str; }
577 sub _hr { my ($gh) = @_; ''; }
578 sub _heading { my ($gh, $level, $l) = @_; $l; }
579 sub _table { my ($gh, $str) = @_; $str; }
580 sub _prefix { my ($gh) = @_; }
581 sub _postfix { my ($gh) = @_; }
583 ###########################################################
585 =head1 DRIVER SPECIFIC INFORMATION
587 =cut
589 ###########################################################
590 # HTML Driver
592 package Grutatxt::HTML;
594 @ISA = ("Grutatxt");
596 =head2 HTML Driver
598 The additional parameters for a new Grutatxt object are:
600 =over 4
602 =item I<table-headers>
604 If this boolean value is set, the first row in tables
605 is assumed to be the heading and rendered using 'th'
606 instead of 'td' tags.
608 =item I<center-tables>
610 If this boolean value is set, tables are centered.
612 =item I<expand-tables>
614 If this boolean value is set, tables are expanded (width 100%).
616 =item I<dl-as-dl>
618 If this boolean value is set, definition lists will be
619 rendered using 'dl', 'dt' and 'dd' instead of tables.
621 =item I<header-offset>
623 Offset to be summed to the heading level when rendering
624 'h?' tags (default is 0).
626 =item I<class-oddeven>
628 If this boolean value is set, tables will be rendered
629 with an "oddeven" CSS class, and rows alternately classed
630 as "even" or "odd". If it's not set, no CSS class info
631 is added to tables.
633 =item I<url-label-max>
635 If an URL without label is given (that is, the URL itself
636 is used as the label), it's trimmed to have as much
637 characters as this value says. By default it's 80.
639 =back
641 =cut
643 sub new
645 my ($class, %args) = @_;
646 my ($gh);
648 bless(\%args, $class);
649 $gh = \%args;
651 $gh->{'-process-urls'} = 1;
652 $gh->{'url-label-max'} ||= 80;
654 return $gh;
658 sub _inline
660 my ($gh, $l) = @_;
662 # accept unnamed and HTML inlines
663 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
664 $gh->{'-inline'} = 'HTML';
665 return;
668 if ($l =~ /^>>$/) {
669 delete $gh->{'-inline'};
670 return;
673 if ($gh->{'-inline'} eq 'HTML') {
674 $gh->_push($l);
679 sub _escape
681 my ($gh, $l) = @_;
683 $l =~ s/&/&amp;/g;
684 $l =~ s/</&lt;/g;
685 $l =~ s/>/&gt;/g;
687 return $l;
691 sub _empty_line
693 my ($gh) = @_;
695 return('<p>');
699 sub _url
701 my ($gh, $url, $label) = @_;
703 if (!$label) {
704 $label = $url;
706 if (length($label) > $gh->{'url-label-max'}) {
707 $label = substr($label, 0,
708 $gh->{'url-label-max'}) . '...';
712 return "<a href = \"$url\">$label</a>";
716 sub _strong
718 my ($gh, $str) = @_;
719 return "<strong>$str</strong>";
723 sub _em
725 my ($gh, $str) = @_;
726 return "<em>$str</em>";
730 sub _code
732 my ($gh, $str) = @_;
733 return "<code class = 'literal'>$str</code>";
737 sub _funcname
739 my ($gh, $str) = @_;
740 return "<code class = 'funcname'>$str</code>";
744 sub _varname
746 my ($gh, $str) = @_;
747 return "<code class = 'var'>$str</code>";
751 sub _new_mode
753 my ($gh, $mode, $params) = @_;
755 if ($mode ne $gh->{'-mode'}) {
756 my $tag;
758 # clean list levels
759 if ($gh->{'-mode'} eq 'ul') {
760 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
762 elsif ($gh->{'-mode'} eq 'ol') {
763 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
765 elsif ($gh->{'-mode'}) {
766 $gh->_push("</$gh->{'-mode'}>");
769 # send new one
770 $tag = $params ? "<$mode $params>" : "<$mode>";
771 $gh->_push($tag) if $mode;
773 $gh->{'-mode'} = $mode;
774 $gh->{'-mode-elems'} = 0;
776 # clean previous lists
777 $gh->{'-ul-levels'} = undef;
778 $gh->{'-ol-levels'} = undef;
783 sub _dl
785 my ($gh, $str) = @_;
786 my ($ret) = '';
788 if ($gh->{'dl-as-dl'}) {
789 $gh->_new_mode('dl');
790 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
792 else {
793 $gh->_new_mode('table');
794 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
797 return $ret;
801 sub _ul
803 my ($gh, $levels) = @_;
804 my ($ret);
806 $ret = '';
808 if ($levels > 0) {
809 $ret .= '<ul>';
811 elsif ($levels < 0) {
812 $ret .= '</li></ul>' x abs($levels);
815 if ($gh->{'-mode'} ne 'ul') {
816 $gh->{'-mode'} = 'ul';
818 else {
819 $ret .= '</li>' if $levels <= 0;
822 $ret .= '<li>';
824 return $ret;
828 sub _ol
830 my ($gh, $levels) = @_;
831 my ($ret);
833 $ret = '';
835 if ($levels > 0) {
836 $ret .= '<ol>';
838 elsif ($levels < 0) {
839 $ret .= '</li></ol>' x abs($levels);
842 if ($gh->{'-mode'} ne 'ol') {
843 $gh->{'-mode'} = 'ol';
845 else {
846 $ret .= '</li>' if $levels <= 0;
849 $ret .= '<li>';
851 return $ret;
855 sub _blockquote
857 my ($gh) = @_;
859 $gh->_new_mode('blockquote');
860 return "\"";
864 sub _hr
866 my ($gh) = @_;
868 return "<hr size = '1' noshade = 'noshade'>";
872 sub _heading
874 my ($gh, $level, $l) = @_;
876 # creates a valid anchor
877 my ($a) = lc($l);
879 $a =~ s/[\"\'\/]//g;
880 $a =~ s/\s/_/g;
881 $a =~ s/<[^>]+>//g;
883 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
884 $a, $level+$gh->{'header-offset'},
885 $l, $level+$gh->{'header-offset'});
887 return $l;
891 sub _table
893 my ($gh, $str) = @_;
895 if ($gh->{'-mode'} eq 'table') {
896 my ($class) = '';
897 my (@spans) = $gh->_calc_col_span($str);
899 # calculate CSS class, if any
900 if ($gh->{'class-oddeven'}) {
901 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
904 $str = "<tr $class>";
906 # build columns
907 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
908 my ($i,$s);
910 $i = ${$gh->{'-table'}}[$n];
911 $i = "&nbsp;" if $i =~ /^\s*$/;
913 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
915 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
916 $str .= "<th $class $s>$i</th>";
918 else {
919 $str .= "<td $class $s>$i</td>";
923 $str .= '</tr>';
925 @{$gh->{'-table'}} = ();
926 $gh->{'-tbl-row'}++;
928 else {
929 # new table
930 my ($params);
932 $params = "border = '1'";
933 $params .= " width = '100\%'" if $gh->{'expand-tables'};
934 $params .= " align = 'center'" if $gh->{'center-tables'};
935 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
937 $gh->_new_mode('table', $params);
939 @{$gh->{'-table'}} = ();
940 $gh->{'-tbl-row'} = 1;
941 $str = '';
944 return $str;
948 ###########################################################
949 # troff Driver
951 package Grutatxt::troff;
953 @ISA = ("Grutatxt");
955 =head2 troff Driver
957 The troff driver uses the B<-me> macros and B<tbl>. A
958 good way to post-process this output (to PostScript in
959 the example) could be by using
961 groff -t -me -Tps
963 The additional parameters for a new Grutatxt object are:
965 =over 4
967 =item I<normal-size>
969 The point size of normal text. By default is 10.
971 =item I<heading-sizes>
973 This argument must be a reference to an array containing
974 the size in points of the 3 different heading levels. By
975 default, level sizes are [ 20, 18, 15 ].
977 =item I<table-type>
979 The type of table to be rendered by B<tbl>. Can be
980 I<allbox> (all lines rendered; this is the default value),
981 I<box> (only outlined) or I<doublebox> (only outlined by
982 a double line).
984 =back
986 =cut
988 sub new
990 my ($class, %args) = @_;
991 my ($gh);
993 bless(\%args,$class);
994 $gh = \%args;
996 $gh->{'-process-urls'} = 0;
998 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
999 $gh->{'normal-size'} ||= 10;
1000 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
1002 return $gh;
1006 sub _prefix
1008 my ($gh) = @_;
1010 $gh->_push(".nr pp $gh->{'normal-size'}");
1011 $gh->_push(".nh");
1015 sub _inline
1017 my ($gh,$l) = @_;
1019 # accept only troff inlines
1020 if ($l =~ /^<<\s*troff$/i) {
1021 $gh->{'-inline'} = 'troff';
1022 return;
1025 if ($l =~ /^>>$/) {
1026 delete $gh->{'-inline'};
1027 return;
1030 if ($gh->{'-inline'} eq 'troff') {
1031 $gh->_push($l);
1036 sub _escape
1038 my ($gh,$l) = @_;
1040 $l =~ s/\\/\\\\/g;
1041 $l =~ s/^'/\\&'/;
1043 return $l;
1047 sub _empty_line
1049 my ($gh) = @_;
1051 return '.lp';
1055 sub _strong
1057 my ($gh, $str) = @_;
1058 return "\\fB$str\\fP";
1062 sub _em
1064 my ($gh, $str) = @_;
1065 return "\\fI$str\\fP";
1069 sub _code
1071 my ($gh, $str) = @_;
1072 return "\\fI$str\\fP";
1076 sub _funcname
1078 my ($gh, $str) = @_;
1079 return "\\fB$str\\fP";
1083 sub _varname
1085 my ($gh, $str) = @_;
1086 return "\\fI$str\\fP";
1090 sub _new_mode
1092 my ($gh, $mode, $params) = @_;
1094 if ($mode ne $gh->{'-mode'}) {
1095 my $tag;
1097 # flush previous list
1098 if ($gh->{'-mode'} eq 'pre') {
1099 $gh->_push('.)l');
1101 elsif ($gh->{'-mode'} eq 'table') {
1102 chomp($gh->{'-table-head'});
1103 $gh->{'-table-head'} =~ s/\s+$//;
1104 $gh->_push($gh->{'-table-head'} . '.');
1105 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1107 elsif ($gh->{'-mode'} eq 'blockquote') {
1108 $gh->_push('.)q');
1111 # send new one
1112 if ($mode eq 'pre') {
1113 $gh->_push('.(l L');
1115 elsif ($mode eq 'blockquote') {
1116 $gh->_push('.(q');
1119 $gh->{'-mode'} = $mode;
1124 sub _dl
1126 my ($gh, $str) = @_;
1128 $gh->_new_mode('dl');
1129 return ".ip \"$str\"\n";
1133 sub _ul
1135 my ($gh) = @_;
1137 $gh->_new_mode('ul');
1138 return ".bu\n";
1142 sub _ol
1144 my ($gh) = @_;
1146 $gh->_new_mode('ol');
1147 return ".np\n";
1151 sub _blockquote
1153 my ($gh) = @_;
1155 $gh->_new_mode('blockquote');
1156 return "\"";
1160 sub _hr
1162 my ($gh) = @_;
1164 return '.hl';
1168 sub _heading
1170 my ($gh, $level, $l) = @_;
1172 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1174 return $l;
1178 sub _table
1180 my ($gh, $str) = @_;
1182 if ($gh->{'-mode'} eq 'table') {
1183 my ($h, $b);
1184 my (@spans) = $gh->_calc_col_span($str);
1186 # build columns
1187 $h = '';
1188 $b = '';
1189 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1190 my ($i);
1192 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1193 $h .= 'cB ';
1195 else {
1196 $h .= 'l ';
1199 # add span columns
1200 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1202 $b .= '#' if $n;
1204 $i = ${$gh->{'-table'}}[$n];
1205 $i =~ s/^\s+//;
1206 $i =~ s/\s+$//;
1207 $i =~ s/(\s)+/$1/g;
1208 $b .= $i;
1211 # add a separator
1212 $b .= "\n_" if $gh->{'table-headers'} and
1213 $gh->{'-tbl-row'} == 1 and
1214 $gh->{'table-type'} ne "allbox";
1216 $gh->{'-table-head'} .= "$h\n";
1217 $gh->{'-table-body'} .= "$b\n";
1219 @{$gh->{'-table'}} = ();
1220 $gh->{'-tbl-row'}++;
1222 else {
1223 # new table
1224 $gh->_new_mode('table');
1226 @{$gh->{'-table'}} = ();
1227 $gh->{'-tbl-row'} = 1;
1229 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1230 $gh->{'-table-body'} = '';
1233 $str = '';
1234 return $str;
1238 sub _postfix
1240 my ($gh) = @_;
1242 # add to top headings and footers
1243 unshift(@{$gh->{'o'}},".ef '\%' ''");
1244 unshift(@{$gh->{'o'}},".of '' '\%'");
1245 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1246 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1250 ###########################################################
1251 # man Driver
1253 package Grutatxt::man;
1255 @ISA = ("Grutatxt::troff", "Grutatxt");
1257 =head2 man Driver
1259 The man driver is used to generate Unix-like man pages. Note that
1260 all headings have the same level with this output driver.
1262 The additional parameters for a new Grutatxt object are:
1264 =over 4
1266 =item I<section>
1268 The man page section (see man documentation). By default is 1.
1270 =item I<page-name>
1272 The name of the page. This is usually the name of the program
1273 or function the man page is documenting and will be shown in the
1274 page header. By default is the empty string.
1276 =back
1278 =cut
1280 sub new
1282 my ($class, %args) = @_;
1283 my ($gh);
1285 bless(\%args,$class);
1286 $gh = \%args;
1288 $gh->{'-process-urls'} = 0;
1290 $gh->{'section'} ||= 1;
1291 $gh->{'page-name'} ||= "";
1293 return $gh;
1297 sub _prefix
1299 my ($gh) = @_;
1301 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1305 sub _inline
1307 my ($gh, $l) = @_;
1309 # accept only man markup inlines
1310 if ($l =~ /^<<\s*man$/i) {
1311 $gh->{'-inline'} = 'man';
1312 return;
1315 if ($l =~ /^>>$/) {
1316 delete $gh->{'-inline'};
1317 return;
1320 if ($gh->{'-inline'} eq 'man') {
1321 $gh->_push($l);
1326 sub _empty_line
1328 my ($gh) = @_;
1330 return ' ';
1334 sub _new_mode
1336 my ($gh,$mode,$params) = @_;
1338 if ($mode ne $gh->{'-mode'}) {
1339 my $tag;
1341 # flush previous list
1342 if ($gh->{'-mode'} eq 'pre' or
1343 $gh->{'-mode'} eq 'table') {
1344 $gh->_push('.fi');
1347 if ($gh->{'-mode'} eq 'blockquote') {
1348 $gh->_push('.RE');
1351 if ($gh->{'-mode'} eq 'ul') {
1352 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1355 if ($gh->{'-mode'} eq 'ol') {
1356 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1359 # send new one
1360 if ($mode eq 'pre' or $mode eq 'table') {
1361 $gh->_push('.nf');
1364 if ($mode eq 'blockquote') {
1365 $gh->_push('.RS 4');
1368 $gh->{'-mode'} = $mode;
1373 sub _dl
1375 my ($gh, $str) = @_;
1377 $gh->_new_mode('dl');
1378 return ".TP\n.B \"$str\"\n";
1382 sub _ul
1384 my ($gh, $levels) = @_;
1385 my ($ret) = '';
1387 if ($levels > 0) {
1388 $ret = ".RS 4\n";
1390 elsif ($levels < 0) {
1391 $ret = ".RE\n" x abs($levels);
1394 $gh->_new_mode('ul');
1395 return $ret . ".TP 4\n\\(bu\n";
1399 sub _ol
1401 my ($gh, $levels) = @_;
1402 my $l = @{$gh->{'-ol-levels'}};
1403 my $ret = '';
1405 $gh->{'-ol-level'} += $levels;
1407 if ($levels > 0) {
1408 $ret = ".RS 4\n";
1410 $l[$gh->{'-ol-level'}] = 1;
1412 elsif ($levels < 0) {
1413 $ret = ".RE\n" x abs($levels);
1416 $gh->_new_mode('ol');
1417 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1419 return $ret;
1423 sub _hr
1425 my ($gh) = @_;
1427 return '';
1431 sub _heading
1433 my ($gh, $level, $l) = @_;
1435 # all headers are the same depth in man pages
1436 return ".SH \"" . uc($l) . "\"";
1440 sub _table
1442 my ($gh, $str) = @_;
1444 if ($gh->{'-mode'} eq 'table') {
1445 foreach my $r (@{$gh->{'-table-raw'}}) {
1446 $gh->_push("|$r|");
1449 else {
1450 $gh->_new_mode('table');
1453 @{$gh->{'-table'}} = ();
1454 @{$gh->{'-table-raw'}} = ();
1456 $gh->_push($str);
1458 return '';
1462 sub _postfix
1467 ###########################################################
1468 # latex Driver
1470 package Grutatxt::latex;
1472 @ISA = ("Grutatxt");
1474 =head2 LaTeX Driver
1476 The additional parameters for a new Grutatxt object are:
1478 =over 4
1480 =item I<docclass>
1482 The LaTeX document class. By default is 'report'. You can also use
1483 'article' or 'book' (consult your LaTeX documentation for details).
1485 =item I<papersize>
1487 The paper size to be used in the document. By default is 'a4paper'.
1489 =item I<encoding>
1491 The character encoding used in the document. By default is 'latin1'.
1493 =back
1495 Note that you can't nest further than 4 levels in LaTeX; if you do,
1496 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1498 =cut
1500 sub new
1502 my ($class, %args) = @_;
1503 my ($gh);
1505 bless(\%args,$class);
1506 $gh = \%args;
1508 $gh->{'-process-urls'} = 0;
1510 $gh->{'-docclass'} ||= 'report';
1511 $gh->{'-papersize'} ||= 'a4paper';
1512 $gh->{'-encoding'} ||= 'latin1';
1514 return $gh;
1518 sub _prefix
1520 my ($gh) = @_;
1522 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1523 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1525 $gh->_push("\\begin{document}");
1529 sub _inline
1531 my ($gh, $l) = @_;
1533 # accept only latex inlines
1534 if ($l =~ /^<<\s*latex$/i) {
1535 $gh->{'-inline'} = 'latex';
1536 return;
1539 if ($l =~ /^>>$/) {
1540 delete $gh->{'-inline'};
1541 return;
1544 if ($gh->{'-inline'} eq 'latex') {
1545 $gh->_push($l);
1550 sub _escape
1552 my ($gh, $l) = @_;
1554 $l =~ s/ _ / \\_ /g;
1555 $l =~ s/ ~ / \\~ /g;
1556 $l =~ s/ & / \\& /g;
1558 return $l;
1562 sub _escape_post
1564 my ($gh, $l) = @_;
1566 $l =~ s/ # / \\# /g;
1567 $l =~ s/^\\n$//g;
1568 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1570 return $l;
1574 sub _empty_line
1576 my ($gh) = @_;
1578 return "\\n";
1582 sub _strong
1584 my ($gh, $str) = @_;
1585 return "\\textbf{$str}";
1589 sub _em
1591 my ($gh, $str) = @_;
1592 return "\\emph{$str}";
1596 sub _code
1598 my ($gh, $str) = @_;
1599 return "{\\tt $str}";
1603 sub _funcname
1605 my ($gh, $str) = @_;
1606 return "{\\tt $str}";
1610 sub _varname
1612 my ($gh, $str) = @_;
1614 $str =~ s/^\$/\\\$/;
1616 return "{\\tt $str}";
1620 sub _new_mode
1622 my ($gh, $mode, $params) = @_;
1624 # mode equivalences
1625 my %latex_modes = (
1626 'pre' => 'verbatim',
1627 'blockquote' => 'quote',
1628 'table' => 'tabular',
1629 'dl' => 'description',
1630 'ul' => 'itemize',
1631 'ol' => 'enumerate'
1634 if ($mode ne $gh->{'-mode'}) {
1635 # close previous mode
1636 if ($gh->{'-mode'} eq 'ul') {
1637 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1639 elsif ($gh->{'-mode'} eq 'ol') {
1640 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1642 elsif ($gh->{'-mode'} eq 'table') {
1643 $gh->_push("\\end{tabular}\n");
1645 else {
1646 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1647 if $gh->{'-mode'};
1650 # send new one
1651 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1652 if $mode;
1654 $gh->{'-mode'} = $mode;
1656 $gh->{'-ul-levels'} = undef;
1657 $gh->{'-ol-levels'} = undef;
1662 sub _dl
1664 my ($gh, $str) = @_;
1666 $gh->_new_mode('dl');
1667 return "\\item[$str]\n";
1671 sub _ul
1673 my ($gh, $levels) = @_;
1674 my ($ret);
1676 $ret = '';
1678 if ($levels > 0) {
1679 $ret .= "\\begin{itemize}\n";
1681 elsif ($levels < 0) {
1682 $ret .= "\\end{itemize}\n" x abs($levels);
1685 $gh->{'-mode'} = 'ul';
1687 $ret .= "\\item\n";
1689 return $ret;
1693 sub _ol
1695 my ($gh, $levels) = @_;
1696 my ($ret);
1698 $ret = '';
1700 if ($levels > 0) {
1701 $ret .= "\\begin{enumerate}\n";
1703 elsif ($levels < 0) {
1704 $ret .= "\\end{enumerate}\n" x abs($levels);
1707 $gh->{'-mode'} = 'ol';
1709 $ret .= "\\item\n";
1711 return $ret;
1715 sub _blockquote
1717 my ($gh) = @_;
1719 $gh->_new_mode('blockquote');
1720 return "``";
1724 sub _hr
1726 my ($gh) = @_;
1728 return "------------\n";
1732 sub _heading
1734 my ($gh, $level, $l) = @_;
1736 my @latex_headings = ( "\\section*{", "\\subsection*{",
1737 "\\subsubsection*{");
1739 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1741 return $l;
1745 sub _table
1747 my ($gh,$str) = @_;
1749 if ($gh->{'-mode'} eq 'table') {
1750 my ($class) = '';
1751 my (@spans) = $gh->_calc_col_span($str);
1752 my (@cols);
1754 $str = '';
1756 # build columns
1757 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1758 my ($i, $s);
1760 $i = ${$gh->{'-table'}}[$n];
1761 $i = "&nbsp;" if $i =~ /^\s*$/;
1763 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1765 # multispan columns
1766 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1767 if $spans[$n] > 1;
1769 $i =~ s/\s{2,}/ /g;
1770 $i =~ s/^\s+//;
1771 $i =~ s/\s+$//;
1773 push(@cols, $i);
1776 $str .= join('&', @cols) . "\\\\\n\\hline";
1778 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1780 @{$gh->{'-table'}} = ();
1781 $gh->{'-tbl-row'}++;
1783 else {
1784 # new table
1786 # count the number of columns
1787 $str =~ s/[^\+]//g;
1788 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1790 $gh->_push();
1791 $gh->_new_mode('table', $params);
1793 @{$gh->{'-table'}} = ();
1794 $gh->{'-tbl-row'} = 1;
1795 $str = '';
1798 return $str;
1802 sub _postfix
1804 my ($gh) = @_;
1806 $gh->_push("\\end{document}");
1810 =head1 AUTHOR
1812 Angel Ortega angel@triptico.com
1814 =cut