1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2002 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
35 Grutatxt - Text to HTML (and other formats) converter
41 # create a new Grutatxt converter object
42 $grutatxt=new Grutatxt();
44 # process a Grutatxt format string
45 @output=$grutatxt->process($text);
48 @output2=$grutatxt->process_file($file);
52 Grutatxt is a module to process text documents in
53 a special markup format (also called Grutatxt), very
54 similar to plain ASCII text. These documents can be
55 converted to HTML or troff.
57 The markup is designed to be fairly intuitive and
58 straightforward and can include headings, bold and italic
59 text effects, bulleted, numbered and definition lists, URLs,
60 function and variable names, preformatted text, horizontal
61 separators and tables. Special marks can be inserted in the
62 text and a heading-based structural index can be obtained
65 A comprehensive description of the markup is defined in
66 the README file, included with the Grutatxt package (it is
67 written in Grutatxt format itself, so it can be converted
68 using the I<grutatxt> tool to any of the supported formats).
69 The latest version (and more information) can be retrieved
70 from the Grutatxt home page at:
72 http://www.triptico.com/software/grutatxt.html
74 =head1 FUNCTIONS AND METHODS
78 $grutatxt=new Grutatxt([ "mode" => $mode, ]
79 [ "title" => \$title, ]
80 [ "marks" => \@marks, ]
81 [ "index" => \@index, ]
82 [ "abstract" => \$abstract, ]
83 [ "strip-parens" => $bool, ]
84 [ "strip-dollars" => $bool, ]
85 [ %driver_specific_arguments ] );
87 Creates a new Grutatxt object instance. All parameters are
94 Output format. Can be HTML or troff. HTML is used if not specified.
98 If I<title> is specified as a reference to scalar, the first
99 level 1 heading found in the text is stored inside it.
103 Marks in the Grutatxt markup are created by inserting the
104 string <-> alone in a line. If I<marks> is specified as a
105 reference to array, it will be filled with the subscripts
106 (relative to the output array) of the lines where the marks
107 are found in the text.
111 If I<index> is specified as a reference to array, it will
112 be filled with strings in the format
116 This information can be used to build a table of contents
117 of the processed text.
119 =item I<strip-parens>
121 Function names in the Grutatxt markup are strings of
122 alphanumeric characters immediately followed by a pair
123 of open and close parentheses. If this boolean value is
124 set, function names found in the processed text will have
125 their parentheses deleted.
127 =item I<strip-dollars>
129 Variable names in the Grutatxt markup are strings of
130 alphanumeric characters preceded by a dollar sign.
131 If this boolean value is set, variable names found in
132 the processed text will have the dollar sign deleted.
136 The I<abstract> of a Grutatxt document is the fragment of text
137 from the beginning of the document to the end of the first
138 paragraph after the title. If I<abstract> is specified as a
139 reference to scalar, it will contain (after each call to the
140 B<process()> method) the subscript of the element of the output
141 array that marks the end of the subject.
149 my ($class,%args)=@_;
152 $args{'mode'}||='HTML';
154 $class.="::".$args{'mode'};
156 $gh=new
$class(%args);
164 @output=$grutatxt->process($text);
166 Processes a text in Grutatxt format. The result is returned
167 as an array of lines.
173 my ($gh,$content)=@_;
179 # clean title and paragraph numbers
184 @
{$gh->{'marks'}}=() if ref($gh->{'marks'});
187 @
{$gh->{'index'}}=() if ref($gh->{'index'});
189 # reset abstract line
190 ${$gh->{'abstract'}}=0 if ref($gh->{'abstract'});
195 $gh->{'-mode'}=undef;
197 foreach my $l (split(/\n/,$content))
199 # inline data (passthrough)
200 if($l =~ /^<<$/ .. $l =~ /^>>$/)
207 if($l =~ /^\s*<\->\s*$/)
209 push(@
{$gh->{'marks'}},scalar(@
{$gh->{'o'}}))
210 if ref($gh->{'marks'});
215 # escape possibly dangerous characters
220 if($l =~ s/^$/$gh->_empty_line()/ge)
222 # mark the abstract end
227 # mark abstract if it's the
228 # second paragraph from the title
229 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})-1
234 if($gh->{'-process-urls'})
236 # URLs followed by a parenthesized phrase
237 $l =~ s/(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)\s*\(([^\)]+)\)/$gh->_url($1,$2)/ge;
239 # URLs without phrase
240 $l =~ s/([^=][^\"])(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)/$1.$gh->_url($2,$2)/ge;
241 $l =~ s/^(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)/$gh->_url($1,$1)/ge;
244 # change '''text''' and *text* into strong emphasis
245 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
246 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
247 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
249 # change ''text'' and _text_ into emphasis
250 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
251 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
252 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
254 # enclose function names
255 if($gh->{'strip-parens'})
257 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
261 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
264 # enclose variable names
265 if($gh->{'strip-dollars'})
267 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
271 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
277 if($l =~ s/^\s\*\s+([\w\s\-]+)\:\s+/$gh->_dl($1)/e)
282 elsif($l =~ s/^\s\*\s+/$gh->_ul()/e or
283 $l =~ s/^\s\-\s+/$gh->_ul()/e)
288 elsif($l =~ s/^\s\#\s+/$gh->_ol()/e or
289 $l =~ s/^\s1\s+/$gh->_ol()/e)
294 elsif($l =~ s/^\s\"/$gh->_blockquote()/e)
299 elsif($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e)
303 # table heading / end of row
304 elsif($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e)
309 elsif($l =~ s/^(\s.*)$/$gh->_pre($1)/e)
316 # back to normal mode
317 $gh->_new_mode(undef);
321 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
324 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
327 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
329 # change ------ into hr
330 $l =~ s/^----*$/$gh->_hr()/e;
333 $gh->_push($l) if $l;
337 $gh->_new_mode(undef);
343 ${$gh->{'title'}}=$gh->{'-title'} if ref($gh->{'title'});
345 # set abstract, if not set
346 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})
347 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
349 return(@
{$gh->{'o'}});
353 =head2 B<process_file>
355 @output=$grutatxt->process_file($filename);
357 Processes a file in Grutatxt format.
365 open F
, $file or return(undef);
367 my ($content)=join('',<F
>);
370 return($gh->process($content));
378 push(@
{$gh->{'o'}},$l);
384 my ($gh,$level,$hd)=@_;
387 $l=pop(@
{$gh->{'o'}});
389 if($l eq $gh->_empty_line())
396 $gh->{'-title'}=$l if $level==1 and not $gh->{'-title'};
399 if(ref($gh->{'index'}))
401 push(@
{$gh->{'index'}},"$level,$l");
404 return($gh->_heading($level,$l));
413 # strip first + and all -
417 my ($t)=1; @spans=();
418 for(my $n=0;$n < length($l);$n++)
420 if(substr($l,$n,1) eq '+')
427 # it's a colspan mark:
441 my @s=split(/\|/,$str);
443 for(my $n=0;$n < scalar(@s);$n++)
445 ${$gh->{'-table'}}[$n].=' '.$s[$n];
456 # if any other mode is active, add to it
457 if($gh->{'-mode'} and $gh->{'-mode'} ne "pre")
461 my ($a)=pop(@
{$gh->{'o'}})." ".$l;
467 $gh->_new_mode("pre");
473 # empty stubs for falling through the superclass
475 sub _inline
{ my ($gh,$l)=@_; $l; }
476 sub _escape
{ my ($gh,$l)=@_; $l; }
477 sub _empty_line
{ my ($gh)=@_; ""; }
478 sub _url
{ my ($gh,$url,$label)=@_; ""; }
479 sub _strong
{ my ($gh,$str)=@_; $str; }
480 sub _em
{ my ($gh,$str)=@_; $str; }
481 sub _funcname
{ my ($gh,$str)=@_; $str; }
482 sub _varname
{ my ($gh,$str)=@_; $str; }
483 sub _new_mode
{ my ($gh,$mode)=@
; }
484 sub _dl
{ my ($gh,$str)=@_; $str; }
485 sub _ul
{ my ($gh,$str)=@_; $str; }
486 sub _ol
{ my ($gh,$str)=@_; $str; }
487 sub _blockquote
{ my ($gh,$str)=@_; $str; }
488 sub _hr
{ my ($gh)=@_; "" }
489 sub _heading
{ my ($gh,$level,$l)=@_; $l; }
490 sub _table
{ my ($gh,$str)=@_; $str; }
491 sub _prefix
{ my ($gh)=@_; }
492 sub _postfix
{ my ($gh)=@_; }
494 ###########################################################
496 =head1 DRIVER SPECIFIC INFORMATION
500 ###########################################################
503 package Grutatxt
::HTML
;
509 The additional parameters for a new Grutatxt object are:
513 =item I<table-headers>
515 If this boolean value is set, the first row in tables
516 is assumed to be the heading and rendered using <th>
517 instead of <td> tags.
519 =item I<center-tables>
521 If this boolean value is set, tables are centered.
523 =item I<expand-tables>
525 If this boolean value is set, tables are expanded (width 100%).
529 If this boolean value is set, definition lists will be
530 rendered using <dl>, <dt> and <dd> instead of tables.
532 =item I<header-offset>
534 Offset to be summed to the heading level when rendering
535 <h?> tags (default is 0).
537 =item I<class-oddeven>
539 If this boolean value is set, tables will be rendered
540 with an "oddeven" CSS class, and rows alternately classed
541 as "even" or "odd". If it's not set, no CSS class info
550 my ($class,%args)=@_;
553 bless(\
%args,$class);
556 $gh->{'-process-urls'}=1;
566 # accept unnamed and HTML inlines
567 if($l =~ /^<<$/ or $l =~ /^<<\s*html$/i)
569 $gh->{'-inline'}="HTML";
575 delete $gh->{'-inline'};
579 if($gh->{'-inline'} eq "HTML")
608 my ($gh,$url,$label)=@_;
610 $label=$url unless $label;
612 return("<a href=\"$url\">$label</a>");
619 return("<strong class=strong>$str</strong>");
626 return("<em class=em>$str</em>");
633 return("<code class=funcname>$str</code>");
640 return("<code class=var>$str</code>");
646 my ($gh,$mode,$params)=@_;
648 if($mode ne $gh->{'-mode'})
652 # flush previous list
653 $gh->_push("</$gh->{'-mode'}>")
657 $tag=$params ?
"<$mode $params>" : "<$mode>";
658 $gh->_push($tag) if $mode;
660 $gh->{'-mode'}=$mode;
669 if($gh->{'dl-as-dl'})
671 $gh->_new_mode("dl");
672 return("<dt><strong class=term>$str</strong><dd>");
676 $gh->_new_mode("table");
677 return("<tr><td valign=top><strong class=term>$1</strong> </td><td valign=top>");
686 $gh->_new_mode("ul");
695 $gh->_new_mode("ol");
704 $gh->_new_mode("blockquote");
713 return("<hr size=1 noshade>");
719 my ($gh,$level,$l)=@_;
721 # substitute anchor spaces with underscores
722 my ($a)=lc($l); $a =~ s/\s/_/g;
724 $l=sprintf("<a name=\"%s\"></a>\n<h%d class=level$level>%s</h%d>",
725 $a, $level+$gh->{'header-offset'},
726 $l, $level+$gh->{'header-offset'});
736 if($gh->{'-mode'} eq "table")
739 my (@spans)=$gh->_calc_col_span($str);
741 # calculate CSS class, if any
742 if($gh->{'class-oddeven'})
744 $class=($gh->{'-tbl-row'} & 1) ?
"odd" : "even";
750 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
754 $i=${$gh->{'-table'}}[$n];
755 $i=" " if $i =~ /^\s*$/;
757 $s=" colspan=$spans[$n]" if $spans[$n] > 1;
759 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
761 $str.="<th $class $s>$i</th>";
765 $str.="<td $class $s>$i</td>";
769 @
{$gh->{'-table'}}=();
778 $params.=" width='100\%'" if $gh->{'expand-tables'};
779 $params.=" align=center" if $gh->{'center-tables'};
780 $params.=" class=oddeven" if $gh->{'class-oddeven'};
782 $gh->_new_mode("table", $params);
784 @
{$gh->{'-table'}}=();
793 ###########################################################
796 package Grutatxt
::troff
;
802 The troff driver uses the B<-me> macros and B<tbl>. A
803 good way to post-process this output (to PostScript in
804 the example) could be by using
808 The additional parameters for a new Grutatxt object are:
814 The point size of normal text. By default is 10.
816 =item I<heading-sizes>
818 This argument must be a reference to an array containing
819 the size in points of the 3 different heading levels. By
820 default, level sizes are [ 20, 18, 15 ].
824 The type of table to be rendered by B<tbl>. Can be
825 I<allbox> (all lines rendered; this is the default value),
826 I<box> (only outlined) or I<doublebox> (only outlined by
835 my ($class,%args)=@_;
838 bless(\
%args,$class);
841 $gh->{'-process-urls'}=0;
843 $gh->{'heading-sizes'}||=[ 20, 18, 15 ];
844 $gh->{'normal-size'}||=10;
845 $gh->{'table-type'}||="allbox"; # box, allbox, doublebox
855 $gh->_push(".nr pp $gh->{'normal-size'}");
864 # accept only troff inlines
865 if($l =~ /^<<\s*troff$/i)
867 $gh->{'-inline'}="troff";
873 delete $gh->{'-inline'};
877 if($gh->{'-inline'} eq "troff")
906 return("\\fB$str\\fP");
913 return("\\fI$str\\fP");
920 return("\\fB$str\\fP");
927 return("\\fI$str\\fP");
933 my ($gh,$mode,$params)=@_;
935 if($mode ne $gh->{'-mode'})
939 # flush previous list
940 if($gh->{'-mode'} eq "pre")
944 elsif($gh->{'-mode'} eq "table")
946 chomp($gh->{'-table-head'});
947 $gh->{'-table-head'} =~ s/\s+$//;
948 $gh->_push($gh->{'-table-head'}.".");
949 $gh->_push($gh->{'-table-body'}.".TE\n.sp 0.6");
951 elsif($gh->{'-mode'} eq "blockquote")
961 elsif($mode eq "blockquote")
966 $gh->{'-mode'}=$mode;
975 $gh->_new_mode("dl");
976 return(".ip \"$str\"\n");
984 $gh->_new_mode("ul");
993 $gh->_new_mode("ol");
1002 $gh->_new_mode("blockquote");
1017 my ($gh,$level,$l)=@_;
1019 $l=".sz ".${$gh->{'heading-sizes'}}[$level - 1]."\n$l\n.sp 0.6";
1029 if($gh->{'-mode'} eq "table")
1032 my (@spans)=$gh->_calc_col_span($str);
1037 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
1041 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
1051 $h.="s " x
($spans[$n] - 1) if $spans[$n] > 1;
1055 $i=${$gh->{'-table'}}[$n];
1063 $b.="\n_" if $gh->{'table-headers'} and
1064 $gh->{'-tbl-row'}==1 and
1065 $gh->{'table-type'} ne "allbox";
1067 $gh->{'-table-head'}.="$h\n";
1068 $gh->{'-table-body'}.="$b\n";
1070 @
{$gh->{'-table'}}=();
1071 $gh->{'-tbl-row'}++;
1076 $gh->_new_mode("table");
1078 @
{$gh->{'-table'}}=();
1079 $gh->{'-tbl-row'}=1;
1081 $gh->{'-table-head'}=".TS\n$gh->{'table-type'} tab (#);\n";
1082 $gh->{'-table-body'}="";
1094 # add to top headings and footers
1095 unshift(@
{$gh->{'o'}},".ef '\%' ''");
1096 unshift(@
{$gh->{'o'}},".of '' '\%'");
1097 unshift(@
{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1098 unshift(@
{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1104 Angel Ortega angel@triptico.com