1 #####################################################################
3 # Grutatxt - A text to HTML (and other things) converter
5 # Copyright (C) 2000/2002 Angel Ortega <angel@triptico.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 # http://www.triptico.com
23 #####################################################################
33 Grutatxt - Text to HTML (and other formats) converter
39 # create a new Grutatxt converter object
40 $grutatxt=new Grutatxt();
42 # process a Grutatxt format string
43 @output=$grutatxt->process($text);
46 @output2=$grutatxt->process_file($file);
50 Grutatxt is a module to process text documents in
51 a special markup format (also called Grutatxt), very
52 similar to plain ASCII text. These documents can be
53 converted to HTML or troff.
55 The markup is designed to be fairly intuitive and
56 straightforward and can include headings, bold and italic
57 text effects, bulleted, numbered and definition lists, URLs,
58 function and variable names, preformatted text, horizontal
59 separators and tables. Special marks can be inserted in the
60 text and a heading-based structural index can be obtained
63 A comprehensive description of the markup is defined in
64 the README file, included with the Grutatxt package (it is
65 written in Grutatxt format itself, so it can be converted
66 using the I<grutatxt> tool to any of the supported formats).
67 The latest version (and more information) can be retrieved
68 from the Grutatxt home page at:
70 http://www.triptico.com/software/grutatxt.html
72 =head1 FUNCTIONS AND METHODS
76 $grutatxt=new Grutatxt([ "mode" => $mode, ]
77 [ "title" => \$title, ]
78 [ "marks" => \@marks, ]
79 [ "index" => \@index, ]
80 [ "abstract" => \$abstract, ]
81 [ "strip-parens" => $bool, ]
82 [ "strip-dollars" => $bool, ]
83 [ %driver_specific_arguments ] );
85 Creates a new Grutatxt object instance. All parameters are
92 Output format. Can be HTML or troff. HTML is used if not specified.
96 If I<title> is specified as a reference to scalar, the first
97 level 1 heading found in the text is stored inside it.
101 Marks in the Grutatxt markup are created by inserting the
102 string <-> alone in a line. If I<marks> is specified as a
103 reference to array, it will be filled with the subscripts
104 (relative to the output array) of the lines where the marks
105 are found in the text.
109 If I<index> is specified as a reference to array, it will
110 be filled with strings in the format
114 This information can be used to build a table of contents
115 of the processed text.
117 =item I<strip-parens>
119 Function names in the Grutatxt markup are strings of
120 alphanumeric characters immediately followed by a pair
121 of open and close parentheses. If this boolean value is
122 set, function names found in the processed text will have
123 their parentheses deleted.
125 =item I<strip-dollars>
127 Variable names in the Grutatxt markup are strings of
128 alphanumeric characters preceded by a dollar sign.
129 If this boolean value is set, variable names found in
130 the processed text will have the dollar sign deleted.
134 The I<abstract> of a Grutatxt document is the fragment of text
135 from the beginning of the document to the end of the first
136 paragraph after the title. If I<abstract> is specified as a
137 reference to scalar, it will contain (after each call to the
138 B<process()> method) the subscript of the element of the output
139 array that marks the end of the subject.
147 my ($class,%args)=@_;
150 $args{'mode'}||='HTML';
152 $class.="::".$args{'mode'};
154 $gh=new
$class(%args);
162 @output=$grutatxt->process($text);
164 Processes a text in Grutatxt format. The result is returned
165 as an array of lines.
171 my ($gh,$content)=@_;
177 # clean title and paragraph numbers
182 @
{$gh->{'marks'}}=() if ref($gh->{'marks'});
185 @
{$gh->{'index'}}=() if ref($gh->{'index'});
187 # reset abstract line
188 ${$gh->{'abstract'}}=0 if ref($gh->{'abstract'});
193 foreach my $l (split(/\n/,$content))
195 # inline data (passthrough)
196 if($l =~ /^<<$/ .. $l =~ /^>>$/)
203 if($l =~ /^\s*<\->\s*$/)
205 push(@
{$gh->{'marks'}},scalar(@
{$gh->{'o'}}))
206 if ref($gh->{'marks'});
211 # escape possibly dangerous characters
216 if($l =~ s/^$/$gh->_empty_line()/ge)
218 # mark the abstract end
223 # mark abstract if it's the
224 # second paragraph from the title
225 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})-1
230 if($gh->{'-process-urls'})
232 # URLs followed by a parenthesized phrase
233 $l =~ s/(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)\s*\(([^\)]+)\)/$gh->_url($1,$2)/ge;
235 # URLs without phrase
236 $l =~ s/([^=][^\"])(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)/$1.$gh->_url($2,$2)/ge;
237 $l =~ s/^(http:\/\/[\w\
/\.\?\&\=\-\%\;]*)/$gh->_url($1,$1)/ge;
240 # change '''text''' and *text* into strong emphasis
241 $l =~ s/\'\'\'([^\'][^\'][^\']*)\'\'\'/$gh->_strong($1)/ge;
242 $l =~ s/\*(\S[^\*]+\S)\*/$gh->_strong($1)/ge;
243 $l =~ s/\*(\S+)\*/$gh->_strong($1)/ge;
245 # change ''text'' and _text_ into emphasis
246 $l =~ s/\'\'([^\'][^\']*)\'\'/$gh->_em($1)/ge;
247 $l =~ s/\b_(\S[^_]*\S)_\b/$gh->_em($1)/ge;
248 $l =~ s/\b_(\S+)_\b/$gh->_em($1)/ge;
250 # enclose function names
251 if($gh->{'strip-parens'})
253 $l =~ s/(\w+)\(\)/$gh->_funcname($1)/ge;
257 $l =~ s/(\w+)\(\)/$gh->_funcname($1."()")/ge;
260 # enclose variable names
261 if($gh->{'strip-dollars'})
263 $l =~ s/\$([\w_\.]+)/$gh->_varname($1)/ge;
267 $l =~ s/(\$[\w_\.]+)/$gh->_varname($1)/ge;
273 if($l =~ s/^\s\*\s+([\w\s\-]+)\:\s+/$gh->_dl($1)/e)
278 elsif($l =~ s/^\s\*\s+/$gh->_ul()/e or
279 $l =~ s/^\s\-\s+/$gh->_ul()/e)
284 elsif($l =~ s/^\s\#\s+/$gh->_ol()/e or
285 $l =~ s/^\s1\s+/$gh->_ol()/e)
290 elsif($l =~ s/^\s*\|(.*)\|\s*$/$gh->_table_row($1)/e)
294 # table heading / end of row
295 elsif($l =~ s/^\s*(\+[-\+\|]+\+)\s*$/$gh->_table($1)/e)
300 elsif($l =~ s/^(\s.*)$/$gh->_pre($1)/e)
307 # back to normal mode
308 $gh->_new_mode(undef);
312 $l =~ s/^(=+)\s*$/$gh->_process_heading(1,$1)/e;
315 $l =~ s/^(-+)\s*$/$gh->_process_heading(2,$1)/e;
318 $l =~ s/^(~+)\s*$/$gh->_process_heading(3,$1)/e;
320 # change ------ into hr
321 $l =~ s/^----*$/$gh->_hr()/e;
324 $gh->_push($l) if $l;
328 $gh->_new_mode(undef);
334 ${$gh->{'title'}}=$gh->{'-title'} if ref($gh->{'title'});
336 # set abstract, if not set
337 ${$gh->{'abstract'}}=scalar(@
{$gh->{'o'}})
338 if ref($gh->{'abstract'}) and not ${$gh->{'abstract'}};
340 return(@
{$gh->{'o'}});
344 =head2 B<process_file>
346 @output=$grutatxt->process_file($filename);
348 Processes a file in Grutatxt format.
356 open F
, $file or return(undef);
358 my ($content)=join('',<F
>);
361 return($gh->process($content));
369 push(@
{$gh->{'o'}},$l);
375 my ($gh,$level,$hd)=@_;
378 $l=pop(@
{$gh->{'o'}});
380 if($l eq $gh->_empty_line())
387 $gh->{'-title'}=$l if $level==1 and not $gh->{'-title'};
390 if(ref($gh->{'index'}))
392 push(@
{$gh->{'index'}},"$level,$l");
395 return($gh->_heading($level,$l));
404 # strip first + and all -
408 my ($t)=1; @spans=();
409 for(my $n=0;$n < length($l);$n++)
411 if(substr($l,$n,1) eq '+')
418 # it's a colspan mark:
432 my @s=split(/\|/,$str);
434 for(my $n=0;$n < scalar(@s);$n++)
436 ${$gh->{'-table'}}[$n].=' '.$s[$n];
447 # if any other mode is active, add to it
448 if($gh->{'mode'} and $gh->{'mode'} ne "pre")
452 my ($a)=pop(@
{$gh->{'o'}})." ".$l;
458 $gh->_new_mode("pre");
464 # empty stubs for falling through the superclass
466 sub _inline
{ my ($gh,$l)=@_; $l; }
467 sub _escape
{ my ($gh,$l)=@_; $l; }
468 sub _empty_line
{ my ($gh)=@_; ""; }
469 sub _url
{ my ($gh,$url,$label)=@_; ""; }
470 sub _strong
{ my ($gh,$str)=@_; $str; }
471 sub _em
{ my ($gh,$str)=@_; $str; }
472 sub _funcname
{ my ($gh,$str)=@_; $str; }
473 sub _varname
{ my ($gh,$str)=@_; $str; }
474 sub _new_mode
{ my ($gh,$mode)=@
; }
475 sub _dl
{ my ($gh,$str)=@_; $str; }
476 sub _ul
{ my ($gh,$str)=@_; $str; }
477 sub _ol
{ my ($gh,$str)=@_; $str; }
478 sub _hr
{ my ($gh)=@_; "" }
479 sub _heading
{ my ($gh,$level,$l)=@_; $l; }
480 sub _table
{ my ($gh,$str)=@_; $str; }
481 sub _prefix
{ my ($gh)=@_; }
482 sub _postfix
{ my ($gh)=@_; }
484 ###########################################################
486 =head1 DRIVER SPECIFIC INFORMATION
490 ###########################################################
493 package Grutatxt
::HTML
;
499 The additional parameters for a new Grutatxt object are:
503 =item I<table-headers>
505 If this boolean value is set, the first row in tables
506 is assumed to be the heading and rendered using <th>
507 instead of <td> tags.
509 =item I<center-tables>
511 If this boolean value is set, tables are centered.
513 =item I<expand-tables>
515 If this boolean value is set, tables are expanded (width 100%).
519 If this boolean value is set, definition lists will be
520 rendered using <dl>, <dt> and <dd> instead of tables.
522 =item I<header-offset>
524 Offset to be summed to the heading level when rendering
525 <h?> tags (default is 0).
527 =item I<class-oddeven>
529 If this boolean value is set, tables will be rendered
530 with an "oddeven" CSS class, and rows alternately classed
531 as "even" or "odd". If it's not set, no CSS class info
540 my ($class,%args)=@_;
543 bless(\
%args,$class);
546 $gh->{'-process-urls'}=1;
556 # accept unnamed and HTML inlines
557 if($l =~ /^<<$/ or $l =~ /^<<\s*html$/i)
559 $gh->{'-inline'}="HTML";
565 delete $gh->{'-inline'};
569 if($gh->{'-inline'} eq "HTML")
598 my ($gh,$url,$label)=@_;
600 $label=$url unless $label;
602 return("<a href=\"$url\">$label</a>");
609 return("<strong class=strong>$str</strong>");
616 return("<em class=em>$str</em>");
623 return("<code class=funcname>$str</code>");
630 return("<code class=var>$str</code>");
636 my ($gh,$mode,$params)=@_;
638 if($mode ne $gh->{'mode'})
642 # flush previous list
643 $gh->_push("</$gh->{'mode'}>")
647 $tag=$params ?
"<$mode $params>" : "<$mode>";
648 $gh->_push($tag) if $mode;
659 if($gh->{'dl-as-dl'})
661 $gh->_new_mode("dl");
662 return("<dt><strong class=term>$str</strong><dd>");
666 $gh->_new_mode("table");
667 return("<tr><td valign=top><strong class=term>$1</strong class=strong> </td><td valign=top>");
676 $gh->_new_mode("ul");
685 $gh->_new_mode("ol");
694 return("<hr size=1 noshade>");
700 my ($gh,$level,$l)=@_;
702 # substitute anchor spaces with underscores
703 my ($a)=lc($l); $a =~ s/\s/_/g;
705 $l=sprintf("<a name=\"$a\"></a>\n<h%d class=level$level>$l</h%d>",
706 $level+$gh->{'header-offset'},
707 $level+$gh->{'header-offset'});
717 if($gh->{'mode'} eq "table")
720 my (@spans)=$gh->_calc_col_span($str);
722 # calculate CSS class, if any
723 if($gh->{'class-oddeven'})
725 $class=($gh->{'-tbl-row'} & 1) ?
"odd" : "even";
731 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
735 $i=${$gh->{'-table'}}[$n];
736 $i=" " if $i =~ /^\s*$/;
738 $s=" colspan=$spans[$n]" if $spans[$n] > 1;
740 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
742 $str.="<th $class $s>$i</th>";
746 $str.="<td $class $s>$i</td>";
750 @
{$gh->{'-table'}}=();
759 $params.=" width='100\%'" if $gh->{'expand-tables'};
760 $params.=" align=center" if $gh->{'center-tables'};
761 $params.=" class=oddeven" if $gh->{'class-oddeven'};
763 $gh->_new_mode("table", $params);
765 @
{$gh->{'-table'}}=();
774 ###########################################################
777 package Grutatxt
::troff
;
783 The troff driver uses the B<-me> macros and B<tbl>. A
784 good way to post-process this output (to PostScript in
785 the example) could be by using
789 The additional parameters for a new Grutatxt object are:
795 The point size of normal text. By default is 10.
797 =item I<heading-sizes>
799 This argument must be a reference to an array containing
800 the size in points of the 3 different heading levels. By
801 default, level sizes are [ 20, 18, 15 ].
805 The type of table to be rendered by B<tbl>. Can be
806 I<allbox> (all lines rendered; this is the default value),
807 I<box> (only outlined) or I<doublebox> (only outlined by
816 my ($class,%args)=@_;
819 bless(\
%args,$class);
822 $gh->{'-process-urls'}=0;
824 $gh->{'heading-sizes'}||=[ 20, 18, 15 ];
825 $gh->{'normal-size'}||=10;
826 $gh->{'table-type'}||="allbox"; # box, allbox, doublebox
836 $gh->_push(".nr pp $gh->{'normal-size'}");
845 # accept only troff inlines
846 if($l =~ /^<<\s*troff$/i)
848 $gh->{'-inline'}="troff";
854 delete $gh->{'-inline'};
858 if($gh->{'-inline'} eq "troff")
887 return("\\fB$str\\fP");
894 return("\\fI$str\\fP");
901 return("\\fB$str\\fP");
908 return("\\fI$str\\fP");
914 my ($gh,$mode,$params)=@_;
916 if($mode ne $gh->{'mode'})
920 # flush previous list
921 if($gh->{'mode'} eq "pre")
925 elsif($gh->{'mode'} eq "table")
927 chomp($gh->{'-table-head'});
928 $gh->{'-table-head'} =~ s/\s+$//;
929 $gh->_push($gh->{'-table-head'}.".");
930 $gh->_push($gh->{'-table-body'}.".TE\n.sp 0.6");
948 $gh->_new_mode("dl");
949 return(".ip \"$str\"\n");
957 $gh->_new_mode("ul");
966 $gh->_new_mode("ol");
981 my ($gh,$level,$l)=@_;
983 $l=".sz ".${$gh->{'heading-sizes'}}[$level - 1]."\n$l\n.sp 0.6";
993 if($gh->{'mode'} eq "table")
996 my (@spans)=$gh->_calc_col_span($str);
1001 for(my $n=0;$n < scalar(@
{$gh->{'-table'}});$n++)
1005 if($gh->{'table-headers'} and $gh->{'-tbl-row'}==1)
1015 $h.="s " x
($spans[$n] - 1) if $spans[$n] > 1;
1019 $i=${$gh->{'-table'}}[$n];
1027 $b.="\n_" if $gh->{'table-headers'} and
1028 $gh->{'-tbl-row'}==1 and
1029 $gh->{'table-type'} ne "allbox";
1031 $gh->{'-table-head'}.="$h\n";
1032 $gh->{'-table-body'}.="$b\n";
1034 @
{$gh->{'-table'}}=();
1035 $gh->{'-tbl-row'}++;
1040 $gh->_new_mode("table");
1042 @
{$gh->{'-table'}}=();
1043 $gh->{'-tbl-row'}=1;
1045 $gh->{'-table-head'}=".TS\n$gh->{'table-type'} tab (#);\n";
1046 $gh->{'-table-body'}="";
1058 # add to top headings and footers
1059 unshift(@
{$gh->{'o'}},".ef '\%' ''");
1060 unshift(@
{$gh->{'o'}},".of '' '\%'");
1061 unshift(@
{$gh->{'o'}},".eh '$gh->{'-title'}' ''");
1062 unshift(@
{$gh->{'o'}},".oh '' '$gh->{'-title'}'");
1068 Angel Ortega angel@triptico.com