2 # this file converts simple html text into a docbook xml variant.
3 # The mapping of markups and links is far from perfect. But all we
4 # want is the docbook-to-pdf converter and similar technology being
5 # present in the world of docbook-to-anything converters.
17 if ($F =~ /^(\w+)=(.*)/)
21 open F
, "<$F" or next;
22 my $T = join ("",<F
>); close F
;
29 $o{version
} = `date` if not length $o{version
};
34 s{<!--VERSION-->}{ $o{version} }gse
;
35 s{</h2>(.*)}{</title>\n<subtitle>$1</subtitle>}mg;
36 s{<h2>}{<sect1 id=\"$F\"><title>}mg;
37 s{<[Pp]([> ])}{<para$1}mg; s{</[Pp]>}{</para>}mg;
38 s{<pre>}{<screen>}mg; s{</pre>}{</screen>}mg;
39 s{<h3>}{<sect2><title>}mg;
40 s{</h3>((?:.(?!<sect2>))*.?)}{</title>$1</sect2>}sg;
41 s{<!doctype [^<>]*>}{}sg;
42 s{<!DOCTYPE [^<>]*>}{}sg;
43 s{(<\w+\b[^<>]*\swidth=)(\d+\%)}{$1\"$2\"}sg;
44 s{(<\w+\b[^<>]*\s\w+=)(\d+)}{$1\"$2\"}sg;
45 s{&&}{\&\;\&\;}sg;
47 s{&(\w+[\),])}{\&\;$1}sg;
48 s{(</?)span(\s[^<>]*)?>}{$1."phrase$2>"}sge;
49 s{(</?)small(\s[^<>]*)?>}{$1."note$2>"}sge;
50 s{(</?)(b|em|i)>}{$1."emphasis>"}sge;
51 s{(</?)(li)>}{$1."listitem>"}sge;
52 s{(</?)(ul)>}{$1."itemizedlist>"}sge;
53 s{(</?)(ol)>}{$1."orderedlist>"}sge;
54 s{(</?)(dl)>}{$1."variablelist>"}sge;
55 s{<dt\b([^<>]*)>}{"<varlistentry$1><term>"}sge;
56 s{</dt\b([^<>]*)>}{"</term>"}sge;
57 s{<dd\b([^<>]*)>}{"<listitem$1>"}sge;
58 s{</dd\b([^<>]*)>}{"</listitem></varlistentry>"}sge;
59 s{<table\b([^<>]*)>}{"<informaltable$1><tgroup cols=\"2\"><tbody>"}sge;
60 s{</table\b([^<>]*)>}{"</tbody></tgroup></informaltable>"}sge;
61 s{(</?)tr(\s[^<>]*)?>}{$1."row$2>"}sge;
62 s{(</?)td(\s[^<>]*)?>}{$1."entry$2>"}sge;
63 s
{<informaltable
\b[^<>]*>\s
*<tgroup
\b[^<>]*>\s
*<tbody
>
64 \s
*<row
\b[^<>]*>\s
*<entry
\b[^<>]*>\s
*<informaltable
\b}
66 s
{</informaltable>\s*</entry
>\s
*</row
>
67 \s
*</tbody>\s*</tgroup
>\s
*</informaltable
>}
68 {</informaltable
>}gsx
;
69 s{(<informaltable[^<>]*\swidth=\"100\%\")}{$1 pgwide=\"1\"}gs;
70 s
{(<tbody
>\s
*<row
[^<>]*>\s
*<entry
[^<>]*\s
)(width
=\"50\
%\")}
71 {<colspec colwidth
=\"1*\" /><colspec colwidth=\"1*\" />\n$1$2}gs
;
73 s{<nobr>([\'\`]*)<tt>}{<cmdsynopsis>$1}sg;
74 s{</tt>([\'\`]*)</nobr>}{$2</cmdsynopsis>}sg;
75 s{<nobr><(?:tt|code)>([\`\"\'])}{<cmdsynopsis>$1}sg;
76 s{<(?:tt|code)><nobr>([\`\"\'])}{<cmdsynopsis>$1}sg;
77 s{([\`\"\'])</(?:tt|code)></nobr>}{$1</cmdsynopsis>}sg;
78 s{([\`\"\'])</nobr></(?:tt|code)>}{$1</cmdsynopsis>}sg;
79 s{(</?)tt>}{$1."constant>"}sge;
80 s{(</?)code>}{$1."literal>"}sge;
81 s{>([^<>]+)<br>}{><highlights>$1</highlights>}sg;
84 s{(</?)date>}{$1."sect1info>"}sge;
85 s{<reference>}{<reference id=\"reference\">}s;
87 s
{<a\s
+href
=\"((?
:http
|ftp
|mailto
):[^<>]+)\"\s
*>((?
:.(?
!</a>))*.)</a>}
88 { "<ulink url=\"$1\">$2</ulink>" }sge
;
89 s
{<a\s
+href
=\"zziplib
.html\#
([\w_
]+)\"\s
*>((?
:.(?
!</a>))*.)</a>}
90 { "<link linkend=\"$1\">$2</link>" }sge
;
91 s
{<a\s
+href
=\"(zziplib
.html
)\"\s
*>((?
:.(?
!</a>))*.)</a>}
92 { "<link linkend=\"reference\">$2</link>" }sge
;
93 s
{<a\s
+href
=\"([\w
-]+[.]html
)\"\s
*>((?
:.(?
!</a>))*.)</a>}
94 { my $K = $1; chop $K;
95 if (not exists $file{$K}) { print STDERR
"bad link $1\n"; }
96 "<link linkend=\"$K\">$2</link>" }sge
;
97 s
{<a\s
+href
=\"([\w
-]+[.](?
:h
|c
|am
|txt
))\"\s
*>((?
:.(?
!</a>))*.)</a>}
98 { "<ulink url=\"file:$1\">$2</ulink>" }sge
;
99 s
{<a\s
+href
=\"([A
-Z0
-9]+[.][A
-Z0
-9]+)\"\s
*>((?
:.(?
!</a>))*.)</a>}
100 { "<ulink url=\"file:$1\">$2</ulink>" }sge
;
102 # s{(</?)subtitle>}{$1."para>"}ge;
104 $_ .= "</sect1>" if /<sect1
[> ]/;
105 $file{$F}{text
} = $_;
110 print '<!DOCTYPE reference PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"',$n;
111 print ' "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">',$n;
112 print '<book><chapter><title>Documentation</title>',$n;
115 print "</chapter>" if $file{$F}{text
} =~ /<reference
/;
116 print $file{$F}{text
},$n,$n;