1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2002 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
33 Grutatxt - Text to HTML (and other formats) converter
39 # create a new Grutatxt converter object
40 $grutatxt=new Grutatxt();
42 # process a Grutatxt format string
43 @output=$grutatxt->process($text);
46 @output2=$grutatxt->process_file($file);
50 Grutatxt is a module to process text documents in
51 a special markup format (also called Grutatxt), very
52 similar to plain ASCII text. These documents can be
53 converted to HTML or troff.
55 The markup is designed to be fairly intuitive and
56 straightforward and can include headings, bold and italic
57 text effects, bulleted, numbered and definition lists, URLs,
58 function and variable names, preformatted text, horizontal
59 separators and tables. Special marks can be inserted in the
60 text and a heading-based structural index can be obtained
63 A comprehensive description of the markup is defined in
64 the README file, included with the Grutatxt package (it is
65 written in Grutatxt format itself, so it can be converted
66 using the I<grutatxt> tool to any of the supported formats).
67 The latest version (and more information) can be retrieved
68 from the Grutatxt home page at:
70 http://www.triptico.com/software/grutatxt.html
72 =head1 FUNCTIONS AND METHODS
76 $grutatxt=new Grutatxt([ "mode" => $mode, ]
77 [ "title" => \$title, ]
78 [ "marks" => \@marks, ]
79 [ "index" => \@index, ]
80 [ "abstract" => \$abstract, ]
81 [ "strip-parens" => $bool, ]
82 [ "strip-dollars" => $bool, ]
83 [ %driver_specific_arguments ] );
85 Creates a new Grutatxt object instance. All parameters are
92 Output format. Can be HTML or troff. HTML is used if not specified.
96 If I<title> is specified as a reference to scalar, the first
97 level 1 heading found in the text is stored inside it.
101 Marks in the Grutatxt markup are created by inserting the
102 string <-> alone in a line. If I<marks> is specified as a
103 reference to array, it will be filled with the subscripts
104 (relative to the output array) of the lines where the marks
105 are found in the text.
109 If I<index> is specified as a reference to array, it will
110 be filled with strings in the format
114 This information can be used to build a table of contents
115 of the processed text.
117 =item I<strip-parens>
119 Function names in the Grutatxt markup are strings of
120 alphanumeric characters immediately followed by a pair
121 of open and close parentheses. If this boolean value is
122 set, function names found in the processed text will have
123 their parentheses deleted.
125 =item I<strip-dollars>
127 Variable names in the Grutatxt markup are strings of
128 alphanumeric characters preceded by a dollar sign.
129 If this boolean value is set, variable names found in
130 the processed text will have the dollar sign deleted.
134 The I<abstract> of a Grutatxt document is the fragment of text
135 from the beginning of the document to the end of the first
136 paragraph after the title. If I<abstract> is specified as a
137 reference to scalar, it will contain (after each call to the
138 B<process()> method) the subscript of the element of the output
139 array that marks the end of the subject.
147 my ($class,%args)=@_;
150 $args{'mode'}||='HTML';
152 $class.="::".$args{'mode'};
154 $gh=new
$class(%args);
162 @output=$grutatxt->process($text);
164 Processes a text in Grutatxt format. The result is returned
165 as an array of lines.
171 my ($gh,$content)=@_;
177 # clean title and paragraph numbers
182 @
{$gh->{'marks'}}=() if ref($gh->{'marks'});
185 @
{$gh->{'index'}}=() if ref($gh->{'index'});
187 # reset abstract line
188 ${$gh->{'abstract'}}=0 if ref($gh->{'abstract'});
193 $gh->{'-mode'}=undef;
195 foreach my $l (split(/\n/,$content))
197 # inline data (passthrough)
198 if($l =~ /^<<$/ .. $l =~ /^>>$/)
205 if($l =~ /^\s*<\->\s*$/)
207 push(@
{$gh->{'marks'}},scalar(@
{$gh->{'o'}}))
208 if ref($gh->{'marks'});
213 # escape possibly dangerous characters
218 if($l =~ s/^$/$gh->_empty_line()/ge)
220 # mark the abstract end
225 # mark abstract if it's the
226 # second paragraph from the title
227 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})-1
232 if($gh->{'-process-urls'})
234 # URLs followed by a parenthesized phrase
235 $l =~ s/(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)\s*\(([^\)]+)\)/$gh->_url($1,$2)/ge;
237 # URLs without phrase
238 $l =~ s/([^=][^\"])(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)/$1.$gh->_url($2,$2)/ge;
239 $l =~ s/^(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)/$gh->_url($1,$1)/ge;
242 # change '''text''' and *text* into strong emphasis
243 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
244 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
245 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
247 # change ''text'' and _text_ into emphasis
248 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
249 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
250 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
252 # enclose function names
253 if($gh->{'strip-parens'})
255 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
259 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
262 # enclose variable names
263 if($gh->{'strip-dollars'})
265 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
269 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
275 if($l =~ s/^\s\*\s+([\w\s\-]+)\:\s+/$gh->_dl($1)/e)
280 elsif($l =~ s/^\s\*\s+/$gh->_ul()/e or
281 $l =~ s/^\s\-\s+/$gh->_ul()/e)
286 elsif($l =~ s/^\s\#\s+/$gh->_ol()/e or
287 $l =~ s/^\s1\s+/$gh->_ol()/e)
292 elsif($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e)
296 # table heading / end of row
297 elsif($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e)
302 elsif($l =~ s/^(\s.*)$/$gh->_pre($1)/e)
309 # back to normal mode
310 $gh->_new_mode(undef);
314 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
317 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
320 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
322 # change ------ into hr
323 $l =~ s/^----*$/$gh->_hr()/e;
326 $gh->_push($l) if $l;
330 $gh->_new_mode(undef);
336 ${$gh->{'title'}}=$gh->{'-title'} if ref($gh->{'title'});
338 # set abstract, if not set
339 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})
340 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
342 return(@
{$gh->{'o'}});
346 =head2 B<process_file>
348 @output=$grutatxt->process_file($filename);
350 Processes a file in Grutatxt format.
358 open F
, $file or return(undef);
360 my ($content)=join('',<F
>);
363 return($gh->process($content));
371 push(@
{$gh->{'o'}},$l);
377 my ($gh,$level,$hd)=@_;
380 $l=pop(@
{$gh->{'o'}});
382 if($l eq $gh->_empty_line())
389 $gh->{'-title'}=$l if $level==1 and not $gh->{'-title'};
392 if(ref($gh->{'index'}))
394 push(@
{$gh->{'index'}},"$level,$l");
397 return($gh->_heading($level,$l));
406 # strip first + and all -
410 my ($t)=1; @spans=();
411 for(my $n=0;$n < length($l);$n++)
413 if(substr($l,$n,1) eq '+')
420 # it's a colspan mark:
434 my @s=split(/\|/,$str);
436 for(my $n=0;$n < scalar(@s);$n++)
438 ${$gh->{'-table'}}[$n].=' '.$s[$n];
449 # if any other mode is active, add to it
450 if($gh->{'-mode'} and $gh->{'-mode'} ne "pre")
454 my ($a)=pop(@
{$gh->{'o'}})." ".$l;
460 $gh->_new_mode("pre");
466 # empty stubs for falling through the superclass
468 sub _inline
{ my ($gh,$l)=@_; $l; }
469 sub _escape
{ my ($gh,$l)=@_; $l; }
470 sub _empty_line
{ my ($gh)=@_; ""; }
471 sub _url
{ my ($gh,$url,$label)=@_; ""; }
472 sub _strong
{ my ($gh,$str)=@_; $str; }
473 sub _em
{ my ($gh,$str)=@_; $str; }
474 sub _funcname
{ my ($gh,$str)=@_; $str; }
475 sub _varname
{ my ($gh,$str)=@_; $str; }
476 sub _new_mode
{ my ($gh,$mode)=@
; }
477 sub _dl
{ my ($gh,$str)=@_; $str; }
478 sub _ul
{ my ($gh,$str)=@_; $str; }
479 sub _ol
{ my ($gh,$str)=@_; $str; }
480 sub _hr
{ my ($gh)=@_; "" }
481 sub _heading
{ my ($gh,$level,$l)=@_; $l; }
482 sub _table
{ my ($gh,$str)=@_; $str; }
483 sub _prefix
{ my ($gh)=@_; }
484 sub _postfix
{ my ($gh)=@_; }
486 ###########################################################
488 =head1 DRIVER SPECIFIC INFORMATION
492 ###########################################################
495 package Grutatxt
::HTML
;
501 The additional parameters for a new Grutatxt object are:
505 =item I<table-headers>
507 If this boolean value is set, the first row in tables
508 is assumed to be the heading and rendered using <th>
509 instead of <td> tags.
511 =item I<center-tables>
513 If this boolean value is set, tables are centered.
515 =item I<expand-tables>
517 If this boolean value is set, tables are expanded (width 100%).
521 If this boolean value is set, definition lists will be
522 rendered using <dl>, <dt> and <dd> instead of tables.
524 =item I<header-offset>
526 Offset to be summed to the heading level when rendering
527 <h?> tags (default is 0).
529 =item I<class-oddeven>
531 If this boolean value is set, tables will be rendered
532 with an "oddeven" CSS class, and rows alternately classed
533 as "even" or "odd". If it's not set, no CSS class info
542 my ($class,%args)=@_;
545 bless(\
%args,$class);
548 $gh->{'-process-urls'}=1;
558 # accept unnamed and HTML inlines
559 if($l =~ /^<<$/ or $l =~ /^<<\s*html$/i)
561 $gh->{'-inline'}="HTML";
567 delete $gh->{'-inline'};
571 if($gh->{'-inline'} eq "HTML")
600 my ($gh,$url,$label)=@_;
602 $label=$url unless $label;
604 return("<a href=\"$url\">$label</a>");
611 return("<strong class=strong>$str</strong>");
618 return("<em class=em>$str</em>");
625 return("<code class=funcname>$str</code>");
632 return("<code class=var>$str</code>");
638 my ($gh,$mode,$params)=@_;
640 if($mode ne $gh->{'-mode'})
644 # flush previous list
645 $gh->_push("</$gh->{'-mode'}>")
649 $tag=$params ?
"<$mode $params>" : "<$mode>";
650 $gh->_push($tag) if $mode;
652 $gh->{'-mode'}=$mode;
661 if($gh->{'dl-as-dl'})
663 $gh->_new_mode("dl");
664 return("<dt><strong class=term>$str</strong><dd>");
668 $gh->_new_mode("table");
669 return("<tr><td valign=top><strong class=term>$1</strong> </td><td valign=top>");
678 $gh->_new_mode("ul");
687 $gh->_new_mode("ol");
696 return("<hr size=1 noshade>");
702 my ($gh,$level,$l)=@_;
704 # substitute anchor spaces with underscores
705 my ($a)=lc($l); $a =~ s/\s/_/g;
707 $l=sprintf("<a name=\"$a\"></a>\n<h%d class=level$level>$l</h%d>",
708 $level+$gh->{'header-offset'},
709 $level+$gh->{'header-offset'});
719 if($gh->{'-mode'} eq "table")
722 my (@spans)=$gh->_calc_col_span($str);
724 # calculate CSS class, if any
725 if($gh->{'class-oddeven'})
727 $class=($gh->{'-tbl-row'} & 1) ?
"odd" : "even";
733 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
737 $i=${$gh->{'-table'}}[$n];
738 $i=" " if $i =~ /^\s*$/;
740 $s=" colspan=$spans[$n]" if $spans[$n] > 1;
742 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
744 $str.="<th $class $s>$i</th>";
748 $str.="<td $class $s>$i</td>";
752 @
{$gh->{'-table'}}=();
761 $params.=" width='100\%'" if $gh->{'expand-tables'};
762 $params.=" align=center" if $gh->{'center-tables'};
763 $params.=" class=oddeven" if $gh->{'class-oddeven'};
765 $gh->_new_mode("table", $params);
767 @
{$gh->{'-table'}}=();
776 ###########################################################
779 package Grutatxt
::troff
;
785 The troff driver uses the B<-me> macros and B<tbl>. A
786 good way to post-process this output (to PostScript in
787 the example) could be by using
791 The additional parameters for a new Grutatxt object are:
797 The point size of normal text. By default is 10.
799 =item I<heading-sizes>
801 This argument must be a reference to an array containing
802 the size in points of the 3 different heading levels. By
803 default, level sizes are [ 20, 18, 15 ].
807 The type of table to be rendered by B<tbl>. Can be
808 I<allbox> (all lines rendered; this is the default value),
809 I<box> (only outlined) or I<doublebox> (only outlined by
818 my ($class,%args)=@_;
821 bless(\
%args,$class);
824 $gh->{'-process-urls'}=0;
826 $gh->{'heading-sizes'}||=[ 20, 18, 15 ];
827 $gh->{'normal-size'}||=10;
828 $gh->{'table-type'}||="allbox"; # box, allbox, doublebox
838 $gh->_push(".nr pp $gh->{'normal-size'}");
847 # accept only troff inlines
848 if($l =~ /^<<\s*troff$/i)
850 $gh->{'-inline'}="troff";
856 delete $gh->{'-inline'};
860 if($gh->{'-inline'} eq "troff")
889 return("\\fB$str\\fP");
896 return("\\fI$str\\fP");
903 return("\\fB$str\\fP");
910 return("\\fI$str\\fP");
916 my ($gh,$mode,$params)=@_;
918 if($mode ne $gh->{'-mode'})
922 # flush previous list
923 if($gh->{'-mode'} eq "pre")
927 elsif($gh->{'-mode'} eq "table")
929 chomp($gh->{'-table-head'});
930 $gh->{'-table-head'} =~ s/\s+$//;
931 $gh->_push($gh->{'-table-head'}.".");
932 $gh->_push($gh->{'-table-body'}.".TE\n.sp 0.6");
941 $gh->{'-mode'}=$mode;
950 $gh->_new_mode("dl");
951 return(".ip \"$str\"\n");
959 $gh->_new_mode("ul");
968 $gh->_new_mode("ol");
983 my ($gh,$level,$l)=@_;
985 $l=".sz ".${$gh->{'heading-sizes'}}[$level - 1]."\n$l\n.sp 0.6";
995 if($gh->{'-mode'} eq "table")
998 my (@spans)=$gh->_calc_col_span($str);
1003 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
1007 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
1017 $h.="s " x
($spans[$n] - 1) if $spans[$n] > 1;
1021 $i=${$gh->{'-table'}}[$n];
1029 $b.="\n_" if $gh->{'table-headers'} and
1030 $gh->{'-tbl-row'}==1 and
1031 $gh->{'table-type'} ne "allbox";
1033 $gh->{'-table-head'}.="$h\n";
1034 $gh->{'-table-body'}.="$b\n";
1036 @
{$gh->{'-table'}}=();
1037 $gh->{'-tbl-row'}++;
1042 $gh->_new_mode("table");
1044 @
{$gh->{'-table'}}=();
1045 $gh->{'-tbl-row'}=1;
1047 $gh->{'-table-head'}=".TS\n$gh->{'table-type'} tab (#);\n";
1048 $gh->{'-table-body'}="";
1060 # add to top headings and footers
1061 unshift(@
{$gh->{'o'}},".ef '\%' ''");
1062 unshift(@
{$gh->{'o'}},".of '' '\%'");
1063 unshift(@
{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1064 unshift(@
{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1070 Angel Ortega angel@triptico.com