1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2002 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
35 Grutatxt - Text to HTML (and other formats) converter
41 # create a new Grutatxt converter object
42 $grutatxt=new Grutatxt();
44 # process a Grutatxt format string
45 @output=$grutatxt->process($text);
48 @output2=$grutatxt->process_file($file);
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML or troff.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
65 A comprehensive description of the markup is defined in
66 the README file, included with the Grutatxt package (it is
67 written in Grutatxt format itself, so it can be converted
68 using the I<grutatxt> tool to any of the supported formats).
69 The latest version (and more information) can be retrieved
70 from the Grutatxt home page at:
72 http://www.triptico.com/software/grutatxt.html
74 =head1 FUNCTIONS AND METHODS
78 $grutatxt=new Grutatxt([ "mode" => $mode, ]
79 [ "title" => \$title, ]
80 [ "marks" => \@marks, ]
81 [ "index" => \@index, ]
82 [ "abstract" => \$abstract, ]
83 [ "strip-parens" => $bool, ]
84 [ "strip-dollars" => $bool, ]
85 [ %driver_specific_arguments ] );
87 Creates a new Grutatxt object instance. All parameters are
94 Output format. Can be HTML or troff. HTML is used if not specified.
98 If I<title> is specified as a reference to scalar, the first
99 level 1 heading found in the text is stored inside it.
103 Marks in the Grutatxt markup are created by inserting the
104 string <-> alone in a line. If I<marks> is specified as a
105 reference to array, it will be filled with the subscripts
106 (relative to the output array) of the lines where the marks
107 are found in the text.
111 If I<index> is specified as a reference to array, it will
112 be filled with strings in the format
116 This information can be used to build a table of contents
117 of the processed text.
119 =item I<strip-parens>
121 Function names in the Grutatxt markup are strings of
122 alphanumeric characters immediately followed by a pair
123 of open and close parentheses. If this boolean value is
124 set, function names found in the processed text will have
125 their parentheses deleted.
127 =item I<strip-dollars>
129 Variable names in the Grutatxt markup are strings of
130 alphanumeric characters preceded by a dollar sign.
131 If this boolean value is set, variable names found in
132 the processed text will have the dollar sign deleted.
136 The I<abstract> of a Grutatxt document is the fragment of text
137 from the beginning of the document to the end of the first
138 paragraph after the title. If I<abstract> is specified as a
139 reference to scalar, it will contain (after each call to the
140 B<process()> method) the subscript of the element of the output
141 array that marks the end of the subject.
149 my ($class,%args)=@_;
152 $args{'mode'}||='HTML';
154 $class.="::".$args{'mode'};
156 $gh=new
$class(%args);
164 @output=$grutatxt->process($text);
166 Processes a text in Grutatxt format. The result is returned
167 as an array of lines.
173 my ($gh,$content)=@_;
179 # clean title and paragraph numbers
184 @
{$gh->{'marks'}}=() if ref($gh->{'marks'});
187 @
{$gh->{'index'}}=() if ref($gh->{'index'});
189 # reset abstract line
190 ${$gh->{'abstract'}}=0 if ref($gh->{'abstract'});
195 $gh->{'-mode'}=undef;
197 foreach my $l (split(/\n/,$content))
199 # inline data (passthrough)
200 if($l =~ /^<<$/ .. $l =~ /^>>$/)
207 if($l =~ /^\s*<\->\s*$/)
209 push(@
{$gh->{'marks'}},scalar(@
{$gh->{'o'}}))
210 if ref($gh->{'marks'});
215 # escape possibly dangerous characters
220 if($l =~ s/^$/$gh->_empty_line()/ge)
222 # mark the abstract end
227 # mark abstract if it's the
228 # second paragraph from the title
229 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})-1
234 if($gh->{'-process-urls'})
236 # URLs followed by a parenthesized phrase
237 $l =~ s/(http:\/\/\S
+)\s
+\
(([^\
)]+)\
)/$gh->_url($1,$2)/ge
;
239 # URLs without phrase
240 $l =~ s/([^=][^\"])(http:\/\/\S
+)/$1.$gh->_url($2)/ge
;
241 $l =~ s/^(http:\/\/\S
+)/$gh->_url($1)/ge
;
244 # change '''text''' and *text* into strong emphasis
245 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
246 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
247 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
249 # change ''text'' and _text_ into emphasis
250 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
251 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
252 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
254 # enclose function names
255 if($gh->{'strip-parens'})
257 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
261 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
264 # enclose variable names
265 if($gh->{'strip-dollars'})
267 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
271 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
279 if($l =~ s/^\s\*\s+([\w\s\-]+)\:\s+/$gh->_dl($1)/e)
284 elsif($gh->{'-mode'} ne "pre" and
285 ($l =~ s/^(\s+)\*\s+/$gh->_unsorted_list($1)/e or
286 $l =~ s/^(\s+)\-\s+/$gh->_unsorted_list($1)/e))
291 elsif($gh->{'-mode'} ne "pre" and
292 ($l =~ s/^(\s+)\#\s+/$gh->_ordered_list($1)/e or
293 $l =~ s/^(\s+)1\s+/$gh->_ordered_list($1)/e))
298 elsif($l =~ s/^\s\"/$gh->_blockquote()/e)
303 elsif($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e)
307 # table heading / end of row
308 elsif($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e)
313 elsif($l =~ s/^(\s.*)$/$gh->_pre($1)/e)
320 # back to normal mode
321 $gh->_new_mode(undef);
325 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
328 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
331 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
333 # change ------ into hr
334 $l =~ s/^----*$/$gh->_hr()/e;
337 $gh->_push($l) if $l;
341 $gh->_new_mode(undef);
347 ${$gh->{'title'}}=$gh->{'-title'} if ref($gh->{'title'});
349 # set abstract, if not set
350 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})
351 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
353 return(@
{$gh->{'o'}});
357 =head2 B<process_file>
359 @output=$grutatxt->process_file($filename);
361 Processes a file in Grutatxt format.
369 open F
, $file or return(undef);
371 my ($content)=join('',<F
>);
374 return($gh->process($content));
382 push(@
{$gh->{'o'}},$l);
388 my ($gh,$level,$hd)=@_;
391 $l=pop(@
{$gh->{'o'}});
393 if($l eq $gh->_empty_line())
400 $gh->{'-title'}=$l if $level==1 and not $gh->{'-title'};
403 if(ref($gh->{'index'}))
405 push(@
{$gh->{'index'}},"$level,$l");
408 return($gh->_heading($level,$l));
417 # strip first + and all -
421 my ($t)=1; @spans=();
422 for(my $n=0;$n < length($l);$n++)
424 if(substr($l,$n,1) eq '+')
431 # it's a colspan mark:
445 my @s=split(/\|/,$str);
447 for(my $n=0;$n < scalar(@s);$n++)
449 ${$gh->{'-table'}}[$n].=' '.$s[$n];
460 # if any other mode is active, add to it
461 if($gh->{'-mode'} and $gh->{'-mode'} ne "pre")
465 my ($a)=pop(@
{$gh->{'o'}})." ".$l;
471 $gh->_new_mode("pre");
480 my ($gh, $str, $ind)=@_;
489 # if last level is less indented, increase
496 # if last level is more indented, decrease
497 # levels until the same is found (or back to
498 # the beginning if not)
502 last if $l[-1] == $ind;
516 return($gh->_ul($gh->_multilevel_list('-ul-levels', $ind)));
524 return($gh->_ol($gh->_multilevel_list('-ol-levels', $ind)));
528 # empty stubs for falling through the superclass
530 sub _inline
{ my ($gh,$l)=@_; $l; }
531 sub _escape
{ my ($gh,$l)=@_; $l; }
532 sub _empty_line
{ my ($gh)=@_; ""; }
533 sub _url
{ my ($gh,$url,$label)=@_; ""; }
534 sub _strong
{ my ($gh,$str)=@_; $str; }
535 sub _em
{ my ($gh,$str)=@_; $str; }
536 sub _funcname
{ my ($gh,$str)=@_; $str; }
537 sub _varname
{ my ($gh,$str)=@_; $str; }
538 sub _new_mode
{ my ($gh,$mode)=@
; }
539 sub _dl
{ my ($gh,$str)=@_; $str; }
540 sub _ul
{ my ($gh,$level)=@_; ""; }
541 sub _ol
{ my ($gh,$level)=@_; ""; }
542 sub _blockquote
{ my ($gh,$str)=@_; $str; }
543 sub _hr
{ my ($gh)=@_; "" }
544 sub _heading
{ my ($gh,$level,$l)=@_; $l; }
545 sub _table
{ my ($gh,$str)=@_; $str; }
546 sub _prefix
{ my ($gh)=@_; }
547 sub _postfix
{ my ($gh)=@_; }
549 ###########################################################
551 =head1 DRIVER SPECIFIC INFORMATION
555 ###########################################################
558 package Grutatxt
::HTML
;
564 The additional parameters for a new Grutatxt object are:
568 =item I<table-headers>
570 If this boolean value is set, the first row in tables
571 is assumed to be the heading and rendered using <th>
572 instead of <td> tags.
574 =item I<center-tables>
576 If this boolean value is set, tables are centered.
578 =item I<expand-tables>
580 If this boolean value is set, tables are expanded (width 100%).
584 If this boolean value is set, definition lists will be
585 rendered using <dl>, <dt> and <dd> instead of tables.
587 =item I<header-offset>
589 Offset to be summed to the heading level when rendering
590 <h?> tags (default is 0).
592 =item I<class-oddeven>
594 If this boolean value is set, tables will be rendered
595 with an "oddeven" CSS class, and rows alternately classed
596 as "even" or "odd". If it's not set, no CSS class info
605 my ($class,%args)=@_;
608 bless(\
%args,$class);
611 $gh->{'-process-urls'}=1;
621 # accept unnamed and HTML inlines
622 if($l =~ /^<<$/ or $l =~ /^<<\s*html$/i)
624 $gh->{'-inline'}="HTML";
630 delete $gh->{'-inline'};
634 if($gh->{'-inline'} eq "HTML")
663 my ($gh,$url,$label)=@_;
665 $label=$url unless $label;
667 return("<a href=\"$url\">$label</a>");
674 return("<strong class=strong>$str</strong>");
681 return("<em class=em>$str</em>");
688 return("<code class=funcname>$str</code>");
695 return("<code class=var>$str</code>");
701 my ($gh,$mode,$params)=@_;
703 if($mode ne $gh->{'-mode'})
707 # flush previous mode
710 if($gh->{'-mode'} eq "ul")
712 $gh->_push("</ul>" x
scalar(@
{$gh->{'-ul-levels'}}));
714 elsif($gh->{'-mode'} eq "ol")
716 $gh->_push("</ol>" x
scalar(@
{$gh->{'-ol-levels'}}));
718 elsif($gh->{'-mode'})
720 $gh->_push("</$gh->{'-mode'}>");
724 $tag=$params ?
"<$mode $params>" : "<$mode>";
725 $gh->_push($tag) if $mode;
727 $gh->{'-mode'}=$mode;
729 # clean previous lists
730 $gh->{'-ul-levels'} = undef;
731 $gh->{'-ol-levels'} = undef;
740 if($gh->{'dl-as-dl'})
742 $gh->_new_mode("dl");
743 return("<dt><strong class=term>$str</strong><dd>");
747 $gh->_new_mode("table");
748 return("<tr><td valign=top><strong class=term>$1</strong> </td><td valign=top>");
755 my ($gh, $levels)=@_;
766 $ret="</ul>" x
abs($levels);
779 my ($gh, $levels)=@_;
790 $ret="</ol>" x
abs($levels);
805 $gh->_new_mode("blockquote");
814 return("<hr size=1 noshade>");
820 my ($gh,$level,$l)=@_;
822 # substitute anchor spaces with underscores
823 my ($a)=lc($l); $a =~ s/\s/_/g;
825 $l=sprintf("<a name=\"%s\"></a>\n<h%d class=level$level>%s</h%d>",
826 $a, $level+$gh->{'header-offset'},
827 $l, $level+$gh->{'header-offset'});
837 if($gh->{'-mode'} eq "table")
840 my (@spans)=$gh->_calc_col_span($str);
842 # calculate CSS class, if any
843 if($gh->{'class-oddeven'})
845 $class=($gh->{'-tbl-row'} & 1) ?
"odd" : "even";
851 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
855 $i=${$gh->{'-table'}}[$n];
856 $i=" " if $i =~ /^\s*$/;
858 $s=" colspan=$spans[$n]" if $spans[$n] > 1;
860 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
862 $str.="<th $class $s>$i</th>";
866 $str.="<td $class $s>$i</td>";
870 @
{$gh->{'-table'}}=();
879 $params.=" width='100\%'" if $gh->{'expand-tables'};
880 $params.=" align=center" if $gh->{'center-tables'};
881 $params.=" class=oddeven" if $gh->{'class-oddeven'};
883 $gh->_new_mode("table", $params);
885 @
{$gh->{'-table'}}=();
894 ###########################################################
897 package Grutatxt
::troff
;
903 The troff driver uses the B<-me> macros and B<tbl>. A
904 good way to post-process this output (to PostScript in
905 the example) could be by using
909 The additional parameters for a new Grutatxt object are:
915 The point size of normal text. By default is 10.
917 =item I<heading-sizes>
919 This argument must be a reference to an array containing
920 the size in points of the 3 different heading levels. By
921 default, level sizes are [ 20, 18, 15 ].
925 The type of table to be rendered by B<tbl>. Can be
926 I<allbox> (all lines rendered; this is the default value),
927 I<box> (only outlined) or I<doublebox> (only outlined by
936 my ($class,%args)=@_;
939 bless(\
%args,$class);
942 $gh->{'-process-urls'}=0;
944 $gh->{'heading-sizes'}||=[ 20, 18, 15 ];
945 $gh->{'normal-size'}||=10;
946 $gh->{'table-type'}||="allbox"; # box, allbox, doublebox
956 $gh->_push(".nr pp $gh->{'normal-size'}");
965 # accept only troff inlines
966 if($l =~ /^<<\s*troff$/i)
968 $gh->{'-inline'}="troff";
974 delete $gh->{'-inline'};
978 if($gh->{'-inline'} eq "troff")
1007 return("\\fB$str\\fP");
1014 return("\\fI$str\\fP");
1021 return("\\fB$str\\fP");
1028 return("\\fI$str\\fP");
1034 my ($gh,$mode,$params)=@_;
1036 if($mode ne $gh->{'-mode'})
1040 # flush previous list
1041 if($gh->{'-mode'} eq "pre")
1045 elsif($gh->{'-mode'} eq "table")
1047 chomp($gh->{'-table-head'});
1048 $gh->{'-table-head'} =~ s/\s+$//;
1049 $gh->_push($gh->{'-table-head'}.".");
1050 $gh->_push($gh->{'-table-body'}.".TE\n.sp 0.6");
1052 elsif($gh->{'-mode'} eq "blockquote")
1060 $gh->_push(".(l L");
1062 elsif($mode eq "blockquote")
1067 $gh->{'-mode'}=$mode;
1076 $gh->_new_mode("dl");
1077 return(".ip \"$str\"\n");
1085 $gh->_new_mode("ul");
1094 $gh->_new_mode("ol");
1103 $gh->_new_mode("blockquote");
1118 my ($gh,$level,$l)=@_;
1120 $l=".sz ".${$gh->{'heading-sizes'}}[$level - 1]."\n$l\n.sp 0.6";
1130 if($gh->{'-mode'} eq "table")
1133 my (@spans)=$gh->_calc_col_span($str);
1138 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
1142 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
1152 $h.="s " x
($spans[$n] - 1) if $spans[$n] > 1;
1156 $i=${$gh->{'-table'}}[$n];
1164 $b.="\n_" if $gh->{'table-headers'} and
1165 $gh->{'-tbl-row'}==1 and
1166 $gh->{'table-type'} ne "allbox";
1168 $gh->{'-table-head'}.="$h\n";
1169 $gh->{'-table-body'}.="$b\n";
1171 @
{$gh->{'-table'}}=();
1172 $gh->{'-tbl-row'}++;
1177 $gh->_new_mode("table");
1179 @
{$gh->{'-table'}}=();
1180 $gh->{'-tbl-row'}=1;
1182 $gh->{'-table-head'}=".TS\n$gh->{'table-type'} tab (#);\n";
1183 $gh->{'-table-body'}="";
1195 # add to top headings and footers
1196 unshift(@
{$gh->{'o'}},".ef '\%' ''");
1197 unshift(@
{$gh->{'o'}},".of '' '\%'");
1198 unshift(@
{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1199 unshift(@
{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1205 Angel Ortega angel@triptico.com