mktar: Use `wc` instead of `du` in summary message
[sunny256-utils.git] / html2db
blob1b9f655acc129197c24710fe732bd1171f1fe61d
1 #!/usr/bin/env perl
3 #=======================================================================
4 # html2db
5 # File ID: 2aad61e8-f743-11dd-8708-000475e441b9
6 # Simple HTML to DocBook conversion utility.
8 # Character set: UTF-8
9 # ©opyleft 2004– Øyvind A. Holm <sunny@sunbase.org>
10 # License: GNU General Public License version 2 or later, see end of
11 # file for legal stuff.
12 #=======================================================================
14 use strict;
15 use warnings;
16 use Getopt::Long;
18 $| = 1;
20 our $Debug = 0;
22 our %Opt = (
24 'debug' => 0,
25 'help' => 0,
26 'verbose' => 0,
27 'version' => 0,
30 my $Warn = "<!-- \@html2db -->";
32 our $progname = $0;
33 $progname =~ s/^.*\/(.*?)$/$1/;
34 our $VERSION = "0.00";
36 Getopt::Long::Configure("bundling");
37 GetOptions(
39 "debug" => \$Opt{'debug'},
40 "help|h" => \$Opt{'help'},
41 "verbose|v+" => \$Opt{'verbose'},
42 "version" => \$Opt{'version'},
44 ) || die("$progname: Option error. Use -h for help.\n");
46 $Opt{'debug'} && ($Debug = 1);
47 $Opt{'help'} && usage(0);
48 if ($Opt{'version'}) {
49 print_version();
50 exit(0);
53 my $Orig = join("", <>);
54 my $Data = $Orig;
55 my $H1_kludge = "<h1>This is just a placeholder by html2db. It should not be here.</h1>";
57 if ($Data =~ m#</body\b#s) {
58 $Data =~ s#</body\b(.*?)$#$H1_kludge</body$1#s;
59 } else {
60 $Data .= $H1_kludge;
63 $Data =~ s#<!DOCTYPE .*?>#<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4//EN" "http://docbook.org/xml/4.3/docbookx.dtd">#s;
64 $Data = conv_elem("html", "article", $Data);
65 $Data = conv_elem("head", "articleinfo", $Data);
66 $Data = conv_elem("em", "emphasis", $Data);
67 $Data = conv_elem("samp", "computeroutput", $Data);
68 $Data = conv_elem("code", "filename", $Data, 1);
69 $Data = conv_elem("kbd", "command", $Data);
70 $Data = conv_elem("p", "para", $Data);
71 $Data = conv_elem("ul", "itemizedlist", $Data);
72 $Data = conv_elem("ol", "orderedlist", $Data);
73 $Data = conv_elem("li", "listitem", $Data, 0, 1);
74 $Data = conv_elem("dl", "variablelist", $Data);
75 $Data = conv_elem("dt", "term", $Data);
76 $Data = conv_elem("dd", "listitem", $Data, 0, 1);
77 $Data = conv_elem("pre", "screen", $Data);
78 $Data = conv_h(6, $Data);
79 $Data = conv_h(5, $Data);
80 $Data = conv_h(4, $Data);
81 $Data = conv_h(3, $Data);
82 $Data = conv_h(2, $Data);
83 $Data = conv_h(1, $Data);
85 $Data =~ s#<a\s+href="(.*?)">(.+?)</a>#<ulink url="$1">$2</ulink>#gs;
87 D("\$Data before cleanup \x7B\x7B\x7B\n$Data\n\x7D\x7D\x7D");
89 $Data =~ s/$H1_kludge//;
91 print($Data);
93 sub conv_elem {
94 # {{{
95 my ($From, $To, $Str, $Check, $InsPara) = @_;
96 defined($Check) || ($Check = 0);
97 defined($InsPara) || ($InsPara = 0);
98 my $chk_str = $Check ? "$Warn" : "";
99 my ($Par1, $Par2) =
100 ( "", "");
101 if ($InsPara) {
102 $Par1 = "<para>";
103 $Par2 = "</para>";
106 $Str =~
108 <$From\b(.*?)>(.*?)</$From\b(.*?)>
111 my ($Attrib, $Txt, $SubAttr) =
112 ( $1, $2, $3);
113 D("Er i regexp");
114 $Attrib =~ s/^\s*(.*?)\s*$/$1/s;
115 length($Attrib) && ($Attrib = " $Attrib", $chk_str = $Warn);
116 "$chk_str<$To$Attrib>$Par1$Txt$Par2</$To>";
117 }gsex;
118 return($Str);
119 # }}}
122 sub conv_h {
123 # {{{
124 my ($Level, $Str) = @_;
125 my $end_head = "";
127 for (my $Tmp = $Level; $Tmp >= 1; $Tmp--) {
128 $end_head .= $Tmp;
130 my $reg_str = <<END;
131 <(h$Level)\\b(.*?)>
132 (.*?)
133 </h$Level\\b(.*?)>
134 (.*?)
135 <(h[$end_head])\\b(.*?)>
137 D("reg_str = \x7B\x7B\x7B\n$reg_str\n\x7D\x7D\x7D");
138 $Str =~
140 $reg_str
143 my ($Elem1, $Attrib, $Header, $SubAttr1, $Txt, $Elem2, $SubAttr2) =
144 ( $1, $2, $3, $4, $5, $6, $7);
145 D("conv_h(): Er i regexp");
146 $Attrib =~ s/^\s*(.*?)\s*$/$1/s;
147 length($Attrib) && ($Attrib = " $Warn $Attrib");
148 "<sect$Level$Attrib> <title>$Header</title>\n$Txt\n</sect$Level>\n<$Elem2$SubAttr2>";
149 }gsex;
150 return($Str);
151 # }}}
154 sub print_version {
155 # Print program version {{{
156 print("$progname v$VERSION\n");
157 # }}}
158 } # print_version()
160 sub usage {
161 # Send the help message to stdout {{{
162 my $Retval = shift;
164 if ($Opt{'verbose'}) {
165 print("\n");
166 print_version();
168 print(<<END);
170 Usage: $progname [options] [file [files [...]]]
172 Experimental script for converting XHTML to DocBook.
174 To get all the headers converted, the files have to be filtered through
175 the script several times. And there will probably be some <hX> headers
176 which needs manual conversion. The curse of <h?> elements.
178 Options:
180 -h, --help
181 Show this help.
182 -v, --verbose
183 Increase level of verbosity. Can be repeated.
184 --version
185 Print version information.
186 --debug
187 Print debugging messages.
190 exit($Retval);
191 # }}}
192 } # usage()
194 sub msg {
195 # Print a status message to stderr based on verbosity level {{{
196 my ($verbose_level, $Txt) = @_;
198 if ($Opt{'verbose'} >= $verbose_level) {
199 print(STDERR "$progname: $Txt\n");
201 # }}}
202 } # msg()
204 sub D {
205 # Print a debugging message {{{
206 $Debug || return;
207 my @call_info = caller;
208 chomp(my $Txt = shift);
209 my $File = $call_info[1];
210 $File =~ s#\\#/#g;
211 $File =~ s#^.*/(.*?)$#$1#;
212 print(STDERR "$File:$call_info[2] $$ $Txt\n");
213 return("");
214 # }}}
215 } # D()
217 __END__
219 # Plain Old Documentation (POD) {{{
221 =pod
223 =head1 NAME
227 =head1 SYNOPSIS
229 [options] [file [files [...]]]
231 =head1 DESCRIPTION
235 =head1 OPTIONS
237 =over 4
239 =item B<-h>, B<--help>
241 Print a brief help summary.
243 =item B<-v>, B<--verbose>
245 Increase level of verbosity. Can be repeated.
247 =item B<--version>
249 Print version information.
251 =item B<--debug>
253 Print debugging messages.
255 =back
257 =head1 BUGS
261 =head1 AUTHOR
263 Made by Øyvind A. Holm S<E<lt>sunny@sunbase.orgE<gt>>.
265 =head1 COPYRIGHT
267 Copyleft © Øyvind A. Holm E<lt>sunny@sunbase.orgE<gt>
268 This is free software; see the file F<COPYING> for legalese stuff.
270 =head1 LICENCE
272 This program is free software: you can redistribute it and/or modify it
273 under the terms of the GNU General Public License as published by the
274 Free Software Foundation, either version 2 of the License, or (at your
275 option) any later version.
277 This program is distributed in the hope that it will be useful, but
278 WITHOUT ANY WARRANTY; without even the implied warranty of
279 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
280 See the GNU General Public License for more details.
282 You should have received a copy of the GNU General Public License along
283 with this program.
284 If not, see L<http://www.gnu.org/licenses/>.
286 =head1 SEE ALSO
288 =cut
290 # }}}
292 # vim: set fenc=UTF-8 ft=perl fdm=marker ts=4 sw=4 sts=4 et fo+=w :