Added a <->.
[grutatxt.git] / Grutatxt.pm
blobd55641d56d0db9a3de2ff914c2b09f59caead035
1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2008 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
25 package Grutatxt;
27 use locale;
29 $VERSION = '2.0.15-dev';
31 =pod
33 =head1 NAME
35 Grutatxt - Text to HTML (and other formats) converter
37 =head1 SYNOPSIS
39 use Grutatxt;
41 # create a new Grutatxt converter object
42 $grutatxt = new Grutatxt();
44 # process a Grutatxt format string
45 @output = $grutatxt->process($text);
47 # idem for a file
48 @output2 = $grutatxt->process_file($file);
50 =head1 DESCRIPTION
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML, troff or man.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
63 from it.
65 =for html <->
67 A comprehensive description of the markup is defined in
68 the README file, included with the Grutatxt package (it is
69 written in Grutatxt format itself, so it can be converted
70 using the I<grutatxt> tool to any of the supported formats).
71 The latest version (and more information) can be retrieved
72 from the Grutatxt home page at:
74 http://www.triptico.com/software/grutatxt.html
76 =head1 FUNCTIONS AND METHODS
78 =head2 B<new>
80 $grutatxt = new Grutatxt([ "mode" => $mode, ]
81 [ "title" => \$title, ]
82 [ "marks" => \@marks, ]
83 [ "index" => \@index, ]
84 [ "abstract" => \$abstract, ]
85 [ "strip-parens" => $bool, ]
86 [ "strip-dollars" => $bool, ]
87 [ %driver_specific_arguments ] );
89 Creates a new Grutatxt object instance. All parameters are
90 optional.
92 =over 4
94 =item I<mode>
96 Output format. Can be HTML, troff or man. HTML is used if not specified.
98 =item I<title>
100 If I<title> is specified as a reference to scalar, the first
101 level 1 heading found in the text is stored inside it.
103 =item I<marks>
105 Marks in the Grutatxt markup are created by inserting the
106 string <-> alone in a line. If I<marks> is specified as a
107 reference to array, it will be filled with the subscripts
108 (relative to the output array) of the lines where the marks
109 are found in the text.
111 =item I<index>
113 If I<index> is specified as a reference to array, it will
114 be filled with strings in the format
116 level,heading
118 This information can be used to build a table of contents
119 of the processed text.
121 =item I<strip-parens>
123 Function names in the Grutatxt markup are strings of
124 alphanumeric characters immediately followed by a pair
125 of open and close parentheses. If this boolean value is
126 set, function names found in the processed text will have
127 their parentheses deleted.
129 =item I<strip-dollars>
131 Variable names in the Grutatxt markup are strings of
132 alphanumeric characters preceded by a dollar sign.
133 If this boolean value is set, variable names found in
134 the processed text will have the dollar sign deleted.
136 =item I<abstract>
138 The I<abstract> of a Grutatxt document is the fragment of text
139 from the beginning of the document to the end of the first
140 paragraph after the title. If I<abstract> is specified as a
141 reference to scalar, it will contain (after each call to the
142 B<process()> method) the subscript of the element of the output
143 array that marks the end of the subject.
145 =back
147 =cut
149 sub new
151 my ($class, %args) = @_;
152 my ($gh);
154 $args{'mode'} ||= 'HTML';
156 $class .= "::" . $args{'mode'};
158 $gh = new $class(%args);
160 return $gh;
164 sub escape
165 # escapes special characters, ignoring passthrough code
167 my ($gh, $l) = @_;
169 # splits between << and >>
170 my (@l) = split(/(<<|>>)/, $l);
172 @l = map {
173 my $l = $_;
175 # escape only text outside << and >>
176 unless ($l eq '<<' .. $l eq '>>') {
177 $l = $gh->_escape($l);
180 $_ = $l;
181 } @l;
183 # join again, stripping << and >>
184 $l = join('', grep(!/^(<<|>>)$/, @l));
186 return $l;
190 =head2 B<process>
192 @output = $grutatxt->process($text);
194 Processes a text in Grutatxt format. The result is returned
195 as an array of lines.
197 =cut
199 sub process
201 my ($gh, $content) = @_;
202 my ($p);
204 # clean output
205 @{$gh->{'o'}} = ();
207 # clean title and paragraph numbers
208 $gh->{'-title'} = '';
209 $gh->{'-p'} = 0;
211 # clean marks
212 @{$gh->{'marks'}} = () if ref($gh->{'marks'});
214 # clean index
215 @{$gh->{'index'}} = () if ref($gh->{'index'});
217 # reset abstract line
218 ${$gh->{'abstract'}} = 0 if ref($gh->{'abstract'});
220 # insert prefix
221 $gh->_prefix();
223 $gh->{'-mode'} = undef;
225 foreach my $l (split(/\n/,$content)) {
226 # inline data (passthrough)
227 if ($l =~ /^<<$/ .. $l =~ /^>>$/) {
228 $gh->_inline($l);
229 next;
232 # marks
233 if ($l =~ /^\s*<\->\s*$/) {
234 push(@{$gh->{'marks'}},scalar(@{$gh->{'o'}}))
235 if ref($gh->{'marks'});
237 next;
240 # escape possibly dangerous characters
241 $l = $gh->escape($l);
243 # empty lines
244 $l =~ s/^\r$//ge;
245 if ($l =~ s/^$/$gh->_empty_line()/ge) {
246 # mark the abstract end
247 if ($gh->{'-title'}) {
248 $gh->{'-p'}++;
250 # mark abstract if it's the
251 # second paragraph from the title
252 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})-1
253 if $gh->{'-p'} == 2;
257 if ($gh->{'-process-urls'}) {
258 # URLs followed by a parenthesized phrase
259 $l =~ s/(https?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
260 $l =~ s/(ftps?:\/\/\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
261 $l =~ s/(file:\/?\S+)\s+\(([^\)]+)\)/$gh->_url($1,$2)/ge;
262 $l =~ s|(\s+)\./(\S+)\s+\(([^\)]+)\)|$1.$gh->_url($2,$3)|ge;
263 $l =~ s|^\./(\S+)\s+\(([^\)]+)\)|$gh->_url($1,$2)|ge;
265 # URLs without phrase
266 $l =~ s/([^=][^\"])(https?:\/\/\S+)/$1.$gh->_url($2)/ge;
267 $l =~ s/([^=][^\"])(ftps?:\/\/\S+)/$1.$gh->_url($2)/ge;
268 $l =~ s/([^=][^\"])(file:\/?\S+)/$1.$gh->_url($2)/ge;
269 $l =~ s|(\s+)\./(\S+)|$1.$gh->_url($2)|ge;
271 $l =~ s/^(https?:\/\/\S+)/$gh->_url($1)/ge;
272 $l =~ s/^(ftps?:\/\/\S+)/$gh->_url($1)/ge;
273 $l =~ s/^(file:\/?\S+)/$gh->_url($1)/ge;
274 $l =~ s|^\./(\S+)|$gh->_url($1)|ge;
277 # change '''text''' and *text* into strong emphasis
278 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
279 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
280 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
282 # change ''text'' and _text_ into emphasis
283 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
284 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
285 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
287 # change `text' into code
288 $l =~ s/`([^\']*)\'/$gh->_code($1)/ge;
290 # enclose function names
291 if ($gh->{'strip-parens'}) {
292 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
294 else {
295 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
298 # enclose variable names
299 if ($gh->{'strip-dollars'}) {
300 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
302 else {
303 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
307 # main switch
310 # definition list
311 if ($l =~ /^\s\*\s+/ && $l =~ s/^\s\*\s+([^:\.,;]+)\:\s+/$gh->_dl($1)/e) {
312 $gh->{'-mode-elems'} ++;
315 # unsorted list
316 elsif ($gh->{'-mode'} ne 'pre' and
317 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
318 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e)) {
319 $gh->{'-mode-elems'} ++;
322 # sorted list
323 elsif ($gh->{'-mode'} ne 'pre' and
324 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
325 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e)) {
326 $gh->{'-mode-elems'} ++;
329 # quoted block
330 elsif ($l =~ s/^\s\"/$gh->_blockquote()/e) {
333 # table rows
334 elsif ($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e) {
335 $gh->{'-mode-elems'} ++;
338 # table heading / end of row
339 elsif ($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e) {
342 # preformatted text
343 elsif ($l =~ s/^(\s.*)$/$gh->_pre($1)/e) {
346 # anything else
347 else {
348 # back to normal mode
349 $gh->_new_mode(undef);
352 # 1 level heading
353 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
355 # 2 level heading
356 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
358 # 3 level heading
359 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
361 # change ------ into hr
362 $l =~ s/^----*$/$gh->_hr()/e;
364 # push finally
365 $gh->_push($l) if $l;
368 # flush
369 $gh->_new_mode(undef);
371 # postfix
372 $gh->_postfix();
374 # set title
375 ${$gh->{'title'}} = $gh->{'-title'} if ref($gh->{'title'});
377 # set abstract, if not set
378 ${$gh->{'abstract'}} = scalar(@{$gh->{'o'}})
379 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
381 # travel all lines again, post-escaping
382 @{$gh->{'o'}} = map { $_ = $gh->_escape_post($_); } @{$gh->{'o'}};
384 return @{$gh->{'o'}};
388 =head2 B<process_file>
390 @output = $grutatxt->process_file($filename);
392 Processes a file in Grutatxt format.
394 =cut
396 sub process_file
398 my ($gh, $file) = @_;
400 open F, $file or return(undef);
402 my ($content) = join('',<F>);
403 close F;
405 return $gh->process($content);
409 sub _push
411 my ($gh, $l) = @_;
413 push(@{$gh->{'o'}},$l);
417 sub _process_heading
419 my ($gh, $level, $hd) = @_;
420 my ($l);
422 $l = pop(@{$gh->{'o'}});
424 if ($l eq $gh->_empty_line()) {
425 $gh->_push($l);
426 return $hd;
429 # store title
430 $gh->{'-title'} = $l if $level == 1 and not $gh->{'-title'};
432 # store index
433 if (ref($gh->{'index'})) {
434 push(@{$gh->{'index'}},"$level,$l");
437 return $gh->_heading($level,$l);
441 sub _calc_col_span
443 my ($gh, $l) = @_;
444 my (@spans);
446 # strip first + and all -
447 $l =~ s/^\+//;
448 $l =~ s/-//g;
450 my ($t) = 1; @spans = ();
451 for (my $n = 0; $n < length($l); $n++) {
452 if (substr($l, $n, 1) eq '+') {
453 push(@spans, $t);
454 $t = 1;
456 else {
457 # it's a colspan mark:
458 # increment
459 $t++;
463 return @spans;
467 sub _table_row
469 my ($gh, $str) = @_;
471 my @s = split(/\|/,$str);
473 for (my $n = 0; $n < scalar(@s); $n++) {
474 ${$gh->{'-table'}}[$n] .= ' ' . $s[$n];
477 push(@{$gh->{'-table-raw'}}, $str);
479 return '';
483 sub _pre
485 my ($gh, $l) = @_;
487 # if any other mode is active, add to it
488 if ($gh->{'-mode'} and $gh->{'-mode'} ne 'pre') {
489 $l =~ s/^\s+//;
491 my ($a) = pop(@{$gh->{'o'}})." ".$l;
492 $gh->_push($a);
493 $l = '';
495 else {
496 # tabs to spaces if a non-zero tabsize is given (only in LaTex)
497 $l =~ s/\t/' ' x $gh->{'tabsize'}/ge if $gh->{'tabsize'} > 0;
499 $gh->_new_mode('pre');
502 return $l;
506 sub _multilevel_list
508 my ($gh, $str, $ind) = @_;
509 my (@l,$level);
511 @l = @{$gh->{$str}};
512 $ind = length($ind);
513 $level = 0;
515 if ($l[-1] < $ind) {
516 # if last level is less indented, increase
517 # nesting level
518 push(@l, $ind);
519 $level++;
521 elsif ($l[-1] > $ind) {
522 # if last level is more indented, decrease
523 # levels until the same is found (or back to
524 # the beginning if not)
525 while (pop(@l)) {
526 $level--;
527 last if $l[-1] == $ind;
531 $gh->{$str} = \@l;
533 return $level;
537 sub _unsorted_list
539 my ($gh, $ind) = @_;
541 return $gh->_ul($gh->_multilevel_list('-ul-levels', $ind));
545 sub _ordered_list
547 my ($gh, $ind) = @_;
549 return $gh->_ol($gh->_multilevel_list('-ol-levels', $ind));
553 # empty stubs for falling through the superclass
555 sub _inline { my ($gh, $l) = @_; $l; }
556 sub _escape { my ($gh, $l) = @_; $l; }
557 sub _escape_post { my ($gh, $l) = @_; $l; }
558 sub _empty_line { my ($gh) = @_; ''; }
559 sub _url { my ($gh, $url, $label) = @_; ''; }
560 sub _strong { my ($gh, $str) = @_; $str; }
561 sub _em { my ($gh, $str) = @_; $str; }
562 sub _code { my ($gh, $str) = @_; $str; }
563 sub _funcname { my ($gh, $str) = @_; $str; }
564 sub _varname { my ($gh, $str) = @_; $str; }
565 sub _new_mode { my ($gh, $mode) = @_; }
566 sub _dl { my ($gh, $str) = @_; $str; }
567 sub _ul { my ($gh, $level) = @_; ''; }
568 sub _ol { my ($gh, $level) = @_; ''; }
569 sub _blockquote { my ($gh, $str) = @_; $str; }
570 sub _hr { my ($gh) = @_; ''; }
571 sub _heading { my ($gh, $level, $l) = @_; $l; }
572 sub _table { my ($gh, $str) = @_; $str; }
573 sub _prefix { my ($gh) = @_; }
574 sub _postfix { my ($gh) = @_; }
576 ###########################################################
578 =head1 DRIVER SPECIFIC INFORMATION
580 =cut
582 ###########################################################
583 # HTML Driver
585 package Grutatxt::HTML;
587 @ISA = ("Grutatxt");
589 =head2 HTML Driver
591 The additional parameters for a new Grutatxt object are:
593 =over 4
595 =item I<table-headers>
597 If this boolean value is set, the first row in tables
598 is assumed to be the heading and rendered using 'th'
599 instead of 'td' tags.
601 =item I<center-tables>
603 If this boolean value is set, tables are centered.
605 =item I<expand-tables>
607 If this boolean value is set, tables are expanded (width 100%).
609 =item I<dl-as-dl>
611 If this boolean value is set, definition lists will be
612 rendered using 'dl', 'dt' and 'dd' instead of tables.
614 =item I<header-offset>
616 Offset to be summed to the heading level when rendering
617 'h?' tags (default is 0).
619 =item I<class-oddeven>
621 If this boolean value is set, tables will be rendered
622 with an "oddeven" CSS class, and rows alternately classed
623 as "even" or "odd". If it's not set, no CSS class info
624 is added to tables.
626 =item I<url-label-max>
628 If an URL without label is given (that is, the URL itself
629 is used as the label), it's trimmed to have as much
630 characters as this value says. By default it's 80.
632 =back
634 =cut
636 sub new
638 my ($class, %args) = @_;
639 my ($gh);
641 bless(\%args, $class);
642 $gh = \%args;
644 $gh->{'-process-urls'} = 1;
645 $gh->{'url-label-max'} ||= 80;
647 return $gh;
651 sub _inline
653 my ($gh, $l) = @_;
655 # accept unnamed and HTML inlines
656 if ($l =~ /^<<$/ or $l =~ /^<<\s*html$/i) {
657 $gh->{'-inline'} = 'HTML';
658 return;
661 if ($l =~ /^>>$/) {
662 delete $gh->{'-inline'};
663 return;
666 if ($gh->{'-inline'} eq 'HTML') {
667 $gh->_push($l);
672 sub _escape
674 my ($gh, $l) = @_;
676 $l =~ s/&/&amp;/g;
677 $l =~ s/</&lt;/g;
678 $l =~ s/>/&gt;/g;
680 return $l;
684 sub _empty_line
686 my ($gh) = @_;
688 return('<p>');
692 sub _url
694 my ($gh, $url, $label) = @_;
696 if (!$label) {
697 $label = $url;
699 if (length($label) > $gh->{'url-label-max'}) {
700 $label = substr($label, 0,
701 $gh->{'url-label-max'}) . '...';
705 return "<a href = \"$url\">$label</a>";
709 sub _strong
711 my ($gh, $str) = @_;
712 return "<strong>$str</strong>";
716 sub _em
718 my ($gh, $str) = @_;
719 return "<em>$str</em>";
723 sub _code
725 my ($gh, $str) = @_;
726 return "<code class = 'literal'>$str</code>";
730 sub _funcname
732 my ($gh, $str) = @_;
733 return "<code class = 'funcname'>$str</code>";
737 sub _varname
739 my ($gh, $str) = @_;
740 return "<code class = 'var'>$str</code>";
744 sub _new_mode
746 my ($gh, $mode, $params) = @_;
748 if ($mode ne $gh->{'-mode'}) {
749 my $tag;
751 # clean list levels
752 if ($gh->{'-mode'} eq 'ul') {
753 $gh->_push('</li>' . '</ul>' x scalar(@{$gh->{'-ul-levels'}}));
755 elsif ($gh->{'-mode'} eq 'ol') {
756 $gh->_push('</li>' . '</ol>' x scalar(@{$gh->{'-ol-levels'}}));
758 elsif ($gh->{'-mode'}) {
759 $gh->_push("</$gh->{'-mode'}>");
762 # send new one
763 $tag = $params ? "<$mode $params>" : "<$mode>";
764 $gh->_push($tag) if $mode;
766 $gh->{'-mode'} = $mode;
767 $gh->{'-mode-elems'} = 0;
769 # clean previous lists
770 $gh->{'-ul-levels'} = undef;
771 $gh->{'-ol-levels'} = undef;
776 sub _dl
778 my ($gh, $str) = @_;
779 my ($ret) = '';
781 if ($gh->{'dl-as-dl'}) {
782 $gh->_new_mode('dl');
783 $ret .= "<dt><strong class = 'term'>$str</strong><dd>";
785 else {
786 $gh->_new_mode('table');
787 $ret .= "<tr><td valign = 'top'><strong class = 'term'>$1</strong>&nbsp;&nbsp;</td><td valign = 'top'>";
790 return $ret;
794 sub _ul
796 my ($gh, $levels) = @_;
797 my ($ret);
799 $ret = '';
801 if ($levels > 0) {
802 $ret .= '<ul>';
804 elsif ($levels < 0) {
805 $ret .= '</li></ul>' x abs($levels);
808 if ($gh->{'-mode'} ne 'ul') {
809 $gh->{'-mode'} = 'ul';
811 else {
812 $ret .= '</li>' if $levels <= 0;
815 $ret .= '<li>';
817 return $ret;
821 sub _ol
823 my ($gh, $levels) = @_;
824 my ($ret);
826 $ret = '';
828 if ($levels > 0) {
829 $ret .= '<ol>';
831 elsif ($levels < 0) {
832 $ret .= '</li></ol>' x abs($levels);
835 if ($gh->{'-mode'} ne 'ol') {
836 $gh->{'-mode'} = 'ol';
838 else {
839 $ret .= '</li>' if $levels <= 0;
842 $ret .= '<li>';
844 return $ret;
848 sub _blockquote
850 my ($gh) = @_;
852 $gh->_new_mode('blockquote');
853 return "\"";
857 sub _hr
859 my ($gh) = @_;
861 return "<hr size = '1' noshade = 'noshade'>";
865 sub _heading
867 my ($gh, $level, $l) = @_;
869 # creates a valid anchor
870 my ($a) = lc($l);
872 $a =~ s/[\"\'\/]//g;
873 $a =~ s/\s/_/g;
874 $a =~ s/<[^>]+>//g;
876 $l = sprintf("<a name = '%s'></a>\n<h%d class = 'level$level'>%s</h%d>",
877 $a, $level+$gh->{'header-offset'},
878 $l, $level+$gh->{'header-offset'});
880 return $l;
884 sub _table
886 my ($gh, $str) = @_;
888 if ($gh->{'-mode'} eq 'table') {
889 my ($class) = '';
890 my (@spans) = $gh->_calc_col_span($str);
892 # calculate CSS class, if any
893 if ($gh->{'class-oddeven'}) {
894 $class = "class = '" . ($gh->{'-tbl-row'} & 1) ? "odd'" : "even'";
897 $str = "<tr $class>";
899 # build columns
900 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
901 my ($i,$s);
903 $i = ${$gh->{'-table'}}[$n];
904 $i = "&nbsp;" if $i =~ /^\s*$/;
906 $s = " colspan = '$spans[$n]'" if $spans[$n] > 1;
908 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
909 $str .= "<th $class $s>$i</th>";
911 else {
912 $str .= "<td $class $s>$i</td>";
916 $str .= '</tr>';
918 @{$gh->{'-table'}} = ();
919 $gh->{'-tbl-row'}++;
921 else {
922 # new table
923 my ($params);
925 $params = "border = '1'";
926 $params .= " width = '100\%'" if $gh->{'expand-tables'};
927 $params .= " align = 'center'" if $gh->{'center-tables'};
928 $params .= " class = 'oddeven'" if $gh->{'class-oddeven'};
930 $gh->_new_mode('table', $params);
932 @{$gh->{'-table'}} = ();
933 $gh->{'-tbl-row'} = 1;
934 $str = '';
937 return $str;
941 ###########################################################
942 # troff Driver
944 package Grutatxt::troff;
946 @ISA = ("Grutatxt");
948 =head2 troff Driver
950 The troff driver uses the B<-me> macros and B<tbl>. A
951 good way to post-process this output (to PostScript in
952 the example) could be by using
954 groff -t -me -Tps
956 The additional parameters for a new Grutatxt object are:
958 =over 4
960 =item I<normal-size>
962 The point size of normal text. By default is 10.
964 =item I<heading-sizes>
966 This argument must be a reference to an array containing
967 the size in points of the 3 different heading levels. By
968 default, level sizes are [ 20, 18, 15 ].
970 =item I<table-type>
972 The type of table to be rendered by B<tbl>. Can be
973 I<allbox> (all lines rendered; this is the default value),
974 I<box> (only outlined) or I<doublebox> (only outlined by
975 a double line).
977 =back
979 =cut
981 sub new
983 my ($class, %args) = @_;
984 my ($gh);
986 bless(\%args,$class);
987 $gh = \%args;
989 $gh->{'-process-urls'} = 0;
991 $gh->{'heading-sizes'} ||= [ 20, 18, 15 ];
992 $gh->{'normal-size'} ||= 10;
993 $gh->{'table-type'} ||= "allbox"; # box, allbox, doublebox
995 return $gh;
999 sub _prefix
1001 my ($gh) = @_;
1003 $gh->_push(".nr pp $gh->{'normal-size'}");
1004 $gh->_push(".nh");
1008 sub _inline
1010 my ($gh,$l) = @_;
1012 # accept only troff inlines
1013 if ($l =~ /^<<\s*troff$/i) {
1014 $gh->{'-inline'} = 'troff';
1015 return;
1018 if ($l =~ /^>>$/) {
1019 delete $gh->{'-inline'};
1020 return;
1023 if ($gh->{'-inline'} eq 'troff') {
1024 $gh->_push($l);
1029 sub _escape
1031 my ($gh,$l) = @_;
1033 $l =~ s/\\/\\\\/g;
1034 $l =~ s/^'/\\&'/;
1036 return $l;
1040 sub _empty_line
1042 my ($gh) = @_;
1044 return '.lp';
1048 sub _strong
1050 my ($gh, $str) = @_;
1051 return "\\fB$str\\fP";
1055 sub _em
1057 my ($gh, $str) = @_;
1058 return "\\fI$str\\fP";
1062 sub _code
1064 my ($gh, $str) = @_;
1065 return "\\fI$str\\fP";
1069 sub _funcname
1071 my ($gh, $str) = @_;
1072 return "\\fB$str\\fP";
1076 sub _varname
1078 my ($gh, $str) = @_;
1079 return "\\fI$str\\fP";
1083 sub _new_mode
1085 my ($gh, $mode, $params) = @_;
1087 if ($mode ne $gh->{'-mode'}) {
1088 my $tag;
1090 # flush previous list
1091 if ($gh->{'-mode'} eq 'pre') {
1092 $gh->_push('.)l');
1094 elsif ($gh->{'-mode'} eq 'table') {
1095 chomp($gh->{'-table-head'});
1096 $gh->{'-table-head'} =~ s/\s+$//;
1097 $gh->_push($gh->{'-table-head'} . '.');
1098 $gh->_push($gh->{'-table-body'} . '.TE\n.sp 0.6');
1100 elsif ($gh->{'-mode'} eq 'blockquote') {
1101 $gh->_push('.)q');
1104 # send new one
1105 if ($mode eq 'pre') {
1106 $gh->_push('.(l L');
1108 elsif ($mode eq 'blockquote') {
1109 $gh->_push('.(q');
1112 $gh->{'-mode'} = $mode;
1117 sub _dl
1119 my ($gh, $str) = @_;
1121 $gh->_new_mode('dl');
1122 return ".ip \"$str\"\n";
1126 sub _ul
1128 my ($gh) = @_;
1130 $gh->_new_mode('ul');
1131 return ".bu\n";
1135 sub _ol
1137 my ($gh) = @_;
1139 $gh->_new_mode('ol');
1140 return ".np\n";
1144 sub _blockquote
1146 my ($gh) = @_;
1148 $gh->_new_mode('blockquote');
1149 return "\"";
1153 sub _hr
1155 my ($gh) = @_;
1157 return '.hl';
1161 sub _heading
1163 my ($gh, $level, $l) = @_;
1165 $l = '.sz ' . ${$gh->{'heading-sizes'}}[$level - 1] . "\n$l\n.sp 0.6";
1167 return $l;
1171 sub _table
1173 my ($gh, $str) = @_;
1175 if ($gh->{'-mode'} eq 'table') {
1176 my ($h, $b);
1177 my (@spans) = $gh->_calc_col_span($str);
1179 # build columns
1180 $h = '';
1181 $b = '';
1182 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1183 my ($i);
1185 if ($gh->{'table-headers'} and $gh->{'-tbl-row'} == 1) {
1186 $h .= 'cB ';
1188 else {
1189 $h .= 'l ';
1192 # add span columns
1193 $h .= 's ' x ($spans[$n] - 1) if $spans[$n] > 1;
1195 $b .= '#' if $n;
1197 $i = ${$gh->{'-table'}}[$n];
1198 $i =~ s/^\s+//;
1199 $i =~ s/\s+$//;
1200 $i =~ s/(\s)+/$1/g;
1201 $b .= $i;
1204 # add a separator
1205 $b .= "\n_" if $gh->{'table-headers'} and
1206 $gh->{'-tbl-row'} == 1 and
1207 $gh->{'table-type'} ne "allbox";
1209 $gh->{'-table-head'} .= "$h\n";
1210 $gh->{'-table-body'} .= "$b\n";
1212 @{$gh->{'-table'}} = ();
1213 $gh->{'-tbl-row'}++;
1215 else {
1216 # new table
1217 $gh->_new_mode('table');
1219 @{$gh->{'-table'}} = ();
1220 $gh->{'-tbl-row'} = 1;
1222 $gh->{'-table-head'} = ".TS\n$gh->{'table-type'} tab (#);\n";
1223 $gh->{'-table-body'} = '';
1226 $str = '';
1227 return $str;
1231 sub _postfix
1233 my ($gh) = @_;
1235 # add to top headings and footers
1236 unshift(@{$gh->{'o'}},".ef '\%' ''");
1237 unshift(@{$gh->{'o'}},".of '' '\%'");
1238 unshift(@{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1239 unshift(@{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1243 ###########################################################
1244 # man Driver
1246 package Grutatxt::man;
1248 @ISA = ("Grutatxt::troff", "Grutatxt");
1250 =head2 man Driver
1252 The man driver is used to generate Unix-like man pages. Note that
1253 all headings have the same level with this output driver.
1255 The additional parameters for a new Grutatxt object are:
1257 =over 4
1259 =item I<section>
1261 The man page section (see man documentation). By default is 1.
1263 =item I<page-name>
1265 The name of the page. This is usually the name of the program
1266 or function the man page is documenting and will be shown in the
1267 page header. By default is the empty string.
1269 =back
1271 =cut
1273 sub new
1275 my ($class, %args) = @_;
1276 my ($gh);
1278 bless(\%args,$class);
1279 $gh = \%args;
1281 $gh->{'-process-urls'} = 0;
1283 $gh->{'section'} ||= 1;
1284 $gh->{'page-name'} ||= "";
1286 return $gh;
1290 sub _prefix
1292 my ($gh) = @_;
1294 $gh->_push(".TH \"$gh->{'page-name'}\" \"$gh->{'section'}\" \"" . localtime() . "\"");
1298 sub _inline
1300 my ($gh, $l) = @_;
1302 # accept only man markup inlines
1303 if ($l =~ /^<<\s*man$/i) {
1304 $gh->{'-inline'} = 'man';
1305 return;
1308 if ($l =~ /^>>$/) {
1309 delete $gh->{'-inline'};
1310 return;
1313 if ($gh->{'-inline'} eq 'man') {
1314 $gh->_push($l);
1319 sub _empty_line
1321 my ($gh) = @_;
1323 return ' ';
1327 sub _new_mode
1329 my ($gh,$mode,$params) = @_;
1331 if ($mode ne $gh->{'-mode'}) {
1332 my $tag;
1334 # flush previous list
1335 if ($gh->{'-mode'} eq 'pre' or
1336 $gh->{'-mode'} eq 'table') {
1337 $gh->_push('.fi');
1340 if ($gh->{'-mode'} eq 'blockquote') {
1341 $gh->_push('.RE');
1344 if ($gh->{'-mode'} eq 'ul') {
1345 $gh->_push(".RE\n" x scalar(@{$gh->{'-ul-levels'}}));
1348 if ($gh->{'-mode'} eq 'ol') {
1349 $gh->_push(".RE\n" x scalar(@{$gh->{'-ol-levels'}}));
1352 # send new one
1353 if ($mode eq 'pre' or $mode eq 'table') {
1354 $gh->_push('.nf');
1357 if ($mode eq 'blockquote') {
1358 $gh->_push('.RS 4');
1361 $gh->{'-mode'} = $mode;
1366 sub _dl
1368 my ($gh, $str) = @_;
1370 $gh->_new_mode('dl');
1371 return ".TP\n.B \"$str\"\n";
1375 sub _ul
1377 my ($gh, $levels) = @_;
1378 my ($ret) = '';
1380 if ($levels > 0) {
1381 $ret = ".RS 4\n";
1383 elsif ($levels < 0) {
1384 $ret = ".RE\n" x abs($levels);
1387 $gh->_new_mode('ul');
1388 return $ret . ".TP 4\n\\(bu\n";
1392 sub _ol
1394 my ($gh, $levels) = @_;
1395 my $l = @{$gh->{'-ol-levels'}};
1396 my $ret = '';
1398 $gh->{'-ol-level'} += $levels;
1400 if ($levels > 0) {
1401 $ret = ".RS 4\n";
1403 $l[$gh->{'-ol-level'}] = 1;
1405 elsif ($levels < 0) {
1406 $ret = ".RE\n" x abs($levels);
1409 $gh->_new_mode('ol');
1410 $ret .= ".TP 4\n" . $l[$gh->{'-ol-level'}]++ . ".\n";
1412 return $ret;
1416 sub _hr
1418 my ($gh) = @_;
1420 return '';
1424 sub _heading
1426 my ($gh, $level, $l) = @_;
1428 # all headers are the same depth in man pages
1429 return ".SH \"" . uc($l) . "\"";
1433 sub _table
1435 my ($gh, $str) = @_;
1437 if ($gh->{'-mode'} eq 'table') {
1438 foreach my $r (@{$gh->{'-table-raw'}}) {
1439 $gh->_push("|$r|");
1442 else {
1443 $gh->_new_mode('table');
1446 @{$gh->{'-table'}} = ();
1447 @{$gh->{'-table-raw'}} = ();
1449 $gh->_push($str);
1451 return '';
1455 sub _postfix
1460 ###########################################################
1461 # latex Driver
1463 package Grutatxt::latex;
1465 @ISA = ("Grutatxt");
1467 =head2 LaTeX Driver
1469 The additional parameters for a new Grutatxt object are:
1471 =over 4
1473 =item I<docclass>
1475 The LaTeX document class. By default is 'report'. You can also use
1476 'article' or 'book' (consult your LaTeX documentation for details).
1478 =item I<papersize>
1480 The paper size to be used in the document. By default is 'a4paper'.
1482 =item I<encoding>
1484 The character encoding used in the document. By default is 'latin1'.
1486 =back
1488 Note that you can't nest further than 4 levels in LaTeX; if you do,
1489 LaTeX will choke in the generated code with a 'Too deeply nested' error.
1491 =cut
1493 sub new
1495 my ($class, %args) = @_;
1496 my ($gh);
1498 bless(\%args,$class);
1499 $gh = \%args;
1501 $gh->{'-process-urls'} = 0;
1503 $gh->{'-docclass'} ||= 'report';
1504 $gh->{'-papersize'} ||= 'a4paper';
1505 $gh->{'-encoding'} ||= 'latin1';
1507 return $gh;
1511 sub _prefix
1513 my ($gh) = @_;
1515 $gh->_push("\\documentclass[$gh->{'-papersize'}]{$gh->{-docclass}}");
1516 $gh->_push("\\usepackage[$gh->{'-encoding'}]{inputenc}");
1518 $gh->_push("\\begin{document}");
1522 sub _inline
1524 my ($gh, $l) = @_;
1526 # accept only latex inlines
1527 if ($l =~ /^<<\s*latex$/i) {
1528 $gh->{'-inline'} = 'latex';
1529 return;
1532 if ($l =~ /^>>$/) {
1533 delete $gh->{'-inline'};
1534 return;
1537 if ($gh->{'-inline'} eq 'latex') {
1538 $gh->_push($l);
1543 sub _escape
1545 my ($gh, $l) = @_;
1547 $l =~ s/ _ / \\_ /g;
1548 $l =~ s/ ~ / \\~ /g;
1549 $l =~ s/ & / \\& /g;
1551 return $l;
1555 sub _escape_post
1557 my ($gh, $l) = @_;
1559 $l =~ s/ # / \\# /g;
1560 $l =~ s/^\\n$//g;
1561 $l =~ s/([^\s_])_([^\s_])/$1\\_$2/g;
1563 return $l;
1567 sub _empty_line
1569 my ($gh) = @_;
1571 return "\\n";
1575 sub _strong
1577 my ($gh, $str) = @_;
1578 return "\\textbf{$str}";
1582 sub _em
1584 my ($gh, $str) = @_;
1585 return "\\emph{$str}";
1589 sub _code
1591 my ($gh, $str) = @_;
1592 return "{\\tt $str}";
1596 sub _funcname
1598 my ($gh, $str) = @_;
1599 return "{\\tt $str}";
1603 sub _varname
1605 my ($gh, $str) = @_;
1607 $str =~ s/^\$/\\\$/;
1609 return "{\\tt $str}";
1613 sub _new_mode
1615 my ($gh, $mode, $params) = @_;
1617 # mode equivalences
1618 my %latex_modes = (
1619 'pre' => 'verbatim',
1620 'blockquote' => 'quote',
1621 'table' => 'tabular',
1622 'dl' => 'description',
1623 'ul' => 'itemize',
1624 'ol' => 'enumerate'
1627 if ($mode ne $gh->{'-mode'}) {
1628 # close previous mode
1629 if ($gh->{'-mode'} eq 'ul') {
1630 $gh->_push("\\end{itemize}" x scalar(@{$gh->{'-ul-levels'}}));
1632 elsif ($gh->{'-mode'} eq 'ol') {
1633 $gh->_push("\\end{enumerate}" x scalar(@{$gh->{'-ol-levels'}}));
1635 elsif ($gh->{'-mode'} eq 'table') {
1636 $gh->_push("\\end{tabular}\n");
1638 else {
1639 $gh->_push("\\end{" . $latex_modes{$gh->{'-mode'}} . "}")
1640 if $gh->{'-mode'};
1643 # send new one
1644 $gh->_push("\\begin{" . $latex_modes{$mode} . "}" . $params)
1645 if $mode;
1647 $gh->{'-mode'} = $mode;
1649 $gh->{'-ul-levels'} = undef;
1650 $gh->{'-ol-levels'} = undef;
1655 sub _dl
1657 my ($gh, $str) = @_;
1659 $gh->_new_mode('dl');
1660 return "\\item[$str]\n";
1664 sub _ul
1666 my ($gh, $levels) = @_;
1667 my ($ret);
1669 $ret = '';
1671 if ($levels > 0) {
1672 $ret .= "\\begin{itemize}\n";
1674 elsif ($levels < 0) {
1675 $ret .= "\\end{itemize}\n" x abs($levels);
1678 $gh->{'-mode'} = 'ul';
1680 $ret .= "\\item\n";
1682 return $ret;
1686 sub _ol
1688 my ($gh, $levels) = @_;
1689 my ($ret);
1691 $ret = '';
1693 if ($levels > 0) {
1694 $ret .= "\\begin{enumerate}\n";
1696 elsif ($levels < 0) {
1697 $ret .= "\\end{enumerate}\n" x abs($levels);
1700 $gh->{'-mode'} = 'ol';
1702 $ret .= "\\item\n";
1704 return $ret;
1708 sub _blockquote
1710 my ($gh) = @_;
1712 $gh->_new_mode('blockquote');
1713 return "``";
1717 sub _hr
1719 my ($gh) = @_;
1721 return "------------\n";
1725 sub _heading
1727 my ($gh, $level, $l) = @_;
1729 my @latex_headings = ( "\\section*{", "\\subsection*{",
1730 "\\subsubsection*{");
1732 $l = "\n" . $latex_headings[$level - 1] . $l . "}";
1734 return $l;
1738 sub _table
1740 my ($gh,$str) = @_;
1742 if ($gh->{'-mode'} eq 'table') {
1743 my ($class) = '';
1744 my (@spans) = $gh->_calc_col_span($str);
1745 my (@cols);
1747 $str = '';
1749 # build columns
1750 for (my $n = 0; $n < scalar(@{$gh->{'-table'}}); $n++) {
1751 my ($i, $s);
1753 $i = ${$gh->{'-table'}}[$n];
1754 $i = "&nbsp;" if $i =~ /^\s*$/;
1756 # $s = " colspan='$spans[$n]'" if $spans[$n] > 1;
1758 # multispan columns
1759 $i = "\\multicolumn{$spans[$n]}{|l|}{$i}"
1760 if $spans[$n] > 1;
1762 $i =~ s/\s{2,}/ /g;
1763 $i =~ s/^\s+//;
1764 $i =~ s/\s+$//;
1766 push(@cols, $i);
1769 $str .= join('&', @cols) . "\\\\\n\\hline";
1771 # $str .= "\n\\hline" if $gh->{'-tbl-row'} == 1;
1773 @{$gh->{'-table'}} = ();
1774 $gh->{'-tbl-row'}++;
1776 else {
1777 # new table
1779 # count the number of columns
1780 $str =~ s/[^\+]//g;
1781 my $params = "{" . "|l" x (length($str) - 1) . "|}\n\\hline";
1783 $gh->_push();
1784 $gh->_new_mode('table', $params);
1786 @{$gh->{'-table'}} = ();
1787 $gh->{'-tbl-row'} = 1;
1788 $str = '';
1791 return $str;
1795 sub _postfix
1797 my ($gh) = @_;
1799 $gh->_push("\\end{document}");
1803 =head1 AUTHOR
1805 Angel Ortega angel@triptico.com
1807 =cut