The union of the color and the node_number in the struct screen_char.
[elinks.git] / doc / tools / help2xml
1 #! /usr/bin/perl
2 # The copyright notice and license are in the POD at the bottom.
4 use strict;
5 use warnings;
6 use Getopt::Long qw(GetOptions :config bundling gnu_compat);
7 use autouse 'Pod::Usage' => qw(pod2usage);
9 sub show_version
11 # This program has no version number, because it is only useful
12 # as part of the ELinks source tree.
13 print "help2xml (ELinks)\n";
14 pod2usage({-verbose => 99, -sections => "COPYRIGHT AND LICENSE",
15 -exitval => 0});
18 # This script internally stores XML as nested arrays. Example:
20 # ['element', ['@id', "foo"], ['@dir', "ltr"], "text", ['subelement'], "more"]
21 # <element id="foo" dir="ltr">text<subelement/>more</element>
23 # A node is one of:
24 # - A string. This is just text and will be properly escaped when output.
25 # - A reference to an array where the first array element is a string that
26 # does not begin with '@'. This array represents an XML element. The
27 # other array elements are the attributes and content of the XML element.
28 # The current implementation does not require attributes to be listed
29 # before content.
30 # - A reference to an array where the first array element is a string that
31 # begins with '@'. This array represents an attribute of the parent XML
32 # element. The second array element is the value of the attribute; it
33 # must be a string. There should be no other array elements.
35 # So there is no way to represent XML declarations, processing instructions,
36 # comments, doctypes, or general entity references.
38 # The names of attributes in these nodes should be written in 'single quotes'
39 # because "@foo" would make Perl interpolate the value of the @foo array.
40 # The names of elements are also written in single quotes, by convention.
42 # xml_output($outfh, $node): Write an XML node to a filehandle.
44 # $outfh: A reference to the output filehandle.
46 # $node: An XML node represented as described above.
48 # return: Unspecified.
49 sub xml_output
51 no locale;
52 my ($outfh, $node) = @_;
53 if (ref($node) eq "ARRAY") {
54 my $gi = $node->[0];
55 print $outfh "<$gi";
56 my @content;
57 foreach my $child (@{$node}[1..$#$node]) {
58 if (ref($child) eq "ARRAY" and $child->[0] =~ /^@(.*)/) {
59 my $attrname = $1;
60 my $attrval = $child->[1];
61 $attrval =~ s/([&"]|[^\0-~])/"&#".ord($1).";"/ge;
62 print $outfh " $attrname=\"$attrval\"";
63 } else {
64 push @content, $child;
67 if (@content) {
68 print $outfh ">";
69 foreach my $child (@content) {
70 xml_output($outfh, $child);
72 print $outfh "</$gi>";
73 } else {
74 print $outfh "/>";
76 } else {
77 $node =~ s/([&<>]|[^\0-~])/"&#".ord($1).";"/ge;
78 print $outfh $node;
82 # xml_node_is_element($node, $gi): Check whether $node is an element
83 # that has the general identifier $gi.
84 sub xml_node_is_element
86 my ($node, $gi) = @_;
87 return ref($node) eq "ARRAY" && $node->[0] eq $gi;
90 # xml_element_attrs($node): Return the attributes of an XML element as
91 # a list. In scalar context, return the number of attributes instead.
92 sub xml_element_attrs
94 no locale;
95 my ($node) = @_;
96 # $node->[0] is the general identifier of the element, a
97 # string, thus it won't match in the grep.
98 return grep { ref($_) eq "ARRAY" && $_->[0] =~ /^@/ } @$node;
101 # xml_element_content($node): Return the content of an XML element as
102 # a list. Not recommended for use in scalar context.
103 sub xml_element_content
105 no locale;
106 my ($node) = @_;
107 return grep { ref($_) ne "ARRAY" || $_->[0] !~ /^@/ } @$node[1..$#$node];
110 # apply_rules($node, $rules): Apply a list of transformations to an
111 # XML node.
113 # $node: An XML node represented as described above.
115 # $rules: A reference to an array of rules. The function applies the
116 # rules in order: the output of a rule can be further transformed with
117 # later rules but not with the same rule or earlier rules. Each rule
118 # in the array is a reference to a hash that has at least these keys:
120 # - FIND: A regular expression. The function recursively searches for
121 # matches in the content of $node, but not in names of elements,
122 # names of attributes, or contents of attributes.
123 # - REPLACE: A reference to a subroutine that returns a replacement for
124 # the match, as a list of nodes. This subroutine is called with
125 # no arguments, but it can use the $1 etc. variables that are set
126 # according to the regular expression.
128 # return: A list of nodes.
129 sub apply_rules
131 my ($node, $rules) = @_;
132 my @output;
133 if (ref($node) eq "ARRAY") {
134 if ($node->[0] =~ /^@/) {
135 return $node;
136 } else {
137 return [$node->[0],
138 map({ apply_rules($_, $rules) }
139 @{$node}[1..$#$node])];
141 } else {
142 my @rules = @$rules;
143 while (@rules) {
144 my $rule = shift @rules;
145 if ($node =~ $rule->{FIND}) {
146 # Using $` or $' anywhere in the program slows down all
147 # regexp matches. So get the values via substr instead.
148 my $pre = substr($node, 0, $-[0]);
149 my $post = substr($node, $+[0]);
150 my @replacement = $rule->{REPLACE}->(); # uses $1 etc.
151 return grep({ $_ ne "" }
152 map({ apply_rules($_, [@rules]) }
153 $pre, @replacement),
154 apply_rules($post, [$rule, @rules]));
157 return $node;
161 # html_splice_p(@nodes): If the first node in @nodes is a paragraph,
162 # replace it with its content. The idea is to avoid extraneous
163 # vertical space in 'dd' and 'li' elements.
165 # return: The new list of nodes.
166 sub html_splice_p
168 my @nodes = @_;
169 if (@nodes >= 1
170 && xml_node_is_element($nodes[0], 'p')
171 && !xml_element_attrs($nodes[0])) {
172 splice(@nodes, 0, 1, xml_element_content($nodes[0]));
174 return @nodes;
177 my %TemplatesDocBook = (
178 # named DocBook elements
179 APPLICATION => sub { ['application', @_] },
180 COMMAND => sub { ['command', @_] },
181 ENVAR => sub { ['envar', @_] },
182 FILENAME => sub { ['filename', @_] },
183 GUIBUTTON => sub { ['guibutton', @_] },
184 GUILABEL => sub { ['guilabel', @_] },
185 LINK => sub { my $linkend = shift; ['link', ['@linkend', $linkend], @_] },
186 LITERAL => sub { ['literal', @_] },
187 PARAMETER => sub { ['parameter', @_] },
188 SIMPARA => sub { ['simpara', @_] },
189 ULINK => sub { my $url = shift; ['ulink', ['@url', $url], @_] },
190 USERINPUT => sub { ['userinput', @_] },
191 VARIABLELIST => sub { ['variablelist', @_] },
193 # not named after DocBook elements, but pretty simple anyway
194 CMDOPTTYPE => sub { ['replaceable', @_] },
195 MANLINK => sub { my ($title, $volnum) = @_;
196 ['citerefentry', ['refentrytitle', $title], ['manvolnum', $volnum]] },
197 SGMLATTR => sub { ['sgmltag', ['@class', "attribute"], @_] },
198 SGMLELEMENT => sub { ['sgmltag', ['@class', "element"], @_] },
199 STRONG => sub { ['emphasis', ['@role', "strong"], @_] },
201 # not so simple
202 CFGOPTENTRY => sub { my ($name, $type, $default, @children) = @_;
203 ['varlistentry', ['@id', $name],
204 ['term', ['literal', $name], " ", ['type', $type], " $default"],
205 ['listitem', @children]] },
206 CMDOPTINFO => sub { my ($info) = @_; " $info" },
207 CMDOPTNAME => sub { my $id = shift; ['option', ['@id', $id], @_] },
208 CFGOPTTREE => sub { my ($name, $info, @children) = @_;
209 ['refsect2', ['@id', $name],
210 ['title', ['literal', $name], " ($info)"],
211 "\n", @children] },
212 GUIMENUCHOICE => sub { my $item = pop; ['menuchoice', map(['guimenu', $_], @_), ['guimenuitem', $item]] },
213 ITEMIZELIST => sub { ['itemizedlist', ['@spacing', "compact"],
214 map { ['listitem', $_], "\n" } @_] },
215 USEREXAMPLE => sub { ['informalexample', ['simpara', ['userinput', @_]]], "\n" },
216 VARLISTENTRY => sub { my ($termchildren, @itemchildren) = @_;
217 ['varlistentry', ['term', @$termchildren],
218 ['listitem', @itemchildren]] },
220 my %TemplatesHTML = (
221 # named DocBook elements
222 APPLICATION => sub { ['em', @_] },
223 COMMAND => sub { ['kbd', @_] },
224 ENVAR => sub { ['tt', @_] },
225 FILENAME => sub { ['tt', @_] },
226 GUIBUTTON => sub { "[ ", @_, " ]" },
227 GUILABEL => sub { @_ },
228 LINK => sub { my $linkend = shift; ['a', ['@href', "#$linkend"], @_] },
229 LITERAL => sub { @_ },
230 PARAMETER => sub { ['var', @_] },
231 SIMPARA => sub { ['p', @_] },
232 ULINK => sub { my $url = shift; ['a', ['@href', $url], @_] },
233 USERINPUT => sub { ['kbd', @_] },
234 VARIABLELIST => sub { ['dl', @_] },
236 # not named after DocBook elements, but pretty simple anyway
237 CMDOPTTYPE => sub { @_ },
238 MANLINK => sub { my ($title, $volnum) = @_;
239 ['b', "$title($volnum)"] },
240 SGMLATTR => sub { ['code', @_] },
241 SGMLELEMENT => sub { ['code', @_] },
242 STRONG => sub { ['strong', @_] },
244 # not so simple
245 CFGOPTENTRY => sub { my ($name, $type, $default, @children) = @_;
246 ['dt', ['@id', $name], "$name $type $default"],
247 ['dd', html_splice_p(@children)] },
248 CMDOPTINFO => sub { my ($info) = @_;
249 if ($info =~ /^(\(alias for )([\w.]+)(\))$/) {
250 return " $1", ['a', ['@href', "elinks.conf.5.html#$2"], $2], $3;
251 } else {
252 return " $info";
253 } },
254 CMDOPTNAME => sub { my $id = shift; ['span', ['@id', $id], @_] },
255 CFGOPTTREE => sub { my ($name, $info, @children) = @_;
256 ['h3', ['@id', $name], "$name ($info)"],
257 "\n", @children },
258 GUIMENUCHOICE => sub { ['em', join(" \x{2192} ", @_)] },
259 ITEMIZELIST => sub { ['ul', map { ['li', html_splice_p($_)], "\n" } @_] },
260 USEREXAMPLE => sub { ['blockquote', ['p', ['kbd', @_]]], "\n" },
261 VARLISTENTRY => sub { my ($termchildren, @itemchildren) = @_;
262 ['dt', @$termchildren],
263 ['dd', html_splice_p(@itemchildren)] },
266 sub optiondesc
268 my ($pipe, $rules, $templates) = @_;
269 my @ret;
270 my $paragraph_text;
272 my $end_paragraph = sub {
273 if (defined $paragraph_text) {
274 push @ret, $templates->{SIMPARA}($paragraph_text);
275 undef $paragraph_text;
279 while (defined($_) and /^ {12}/) {
280 # ' Cookie maximum age (in days):'
281 # ' -1 is use cookie's expiration date if any'
282 # ' 0 is force expiration at the end of session, ignoring cookie's'
283 # ' expiration date'
284 # ' 1+ is use cookie's expiration date, but limit age to the given'
285 # ' number of days'
286 if (/^ {12}((?:%|[+-]?\d).*)$/) {
287 $end_paragraph->();
288 my @list_paragraphs;
289 do {
290 my $paragraph_text = "";
291 do {
292 $paragraph_text .= "$1\n";
293 $_ = <$pipe>;
294 } while (defined($_) and /^ {12}(\s+\S.*)$/);
295 chomp $paragraph_text;
296 push @list_paragraphs, $templates->{SIMPARA}($paragraph_text);
297 } while (defined($_) and /^ {12}((?:%|[+-]?\d).*)$/);
298 push @ret, $templates->{ITEMIZELIST}(@list_paragraphs);
299 } elsif (/^ {12}\t(\d.*)$/) {
300 $end_paragraph->();
301 my @list_paragraphs;
302 do {
303 push @list_paragraphs, $templates->{SIMPARA}($1);
304 $_ = <$pipe>;
305 } while (defined($_) and /^ {12}\t(\d.*)$/);
306 push @ret, $templates->{ITEMIZELIST}(@list_paragraphs);
307 } elsif (/^ {12}\t(-.*)$/) {
308 $end_paragraph->();
309 push @ret, $templates->{USEREXAMPLE}($1);
310 $_ = <$pipe>;
311 } elsif (/^ {12}\t(\w+)(\(.*\))\s+:\s+(\S.*)$/) {
312 $end_paragraph->();
313 my @list_paragraphs;
314 my @remote_param_rules = (
315 { FIND => qr(\b(URL|text)\b),
316 REPLACE => sub { $templates->{PARAMETER}($1) } },
317 { FIND => qr(\b(new-tab|new-window|openBrowser)\b),
318 REPLACE => sub { $templates->{LITERAL}($1) } },
320 do {
321 push @list_paragraphs, $templates->{SIMPARA}(
322 $templates->{COMMAND}($1, apply_rules($2, \@remote_param_rules)),
323 ": $3");
324 $_ = <$pipe>;
325 } while (defined($_) and /^ {12}\t(\w+)(\(.*\))\s+:\s+(\S.*)$/);
326 push @ret, $templates->{ITEMIZELIST}(@list_paragraphs);
327 } elsif (/^ {12}(.*\S.*)$/) {
328 $paragraph_text .= "$1\n";
329 $_ = <$pipe>;
330 } else {
331 $end_paragraph->();
332 $_ = <$pipe>;
335 $end_paragraph->();
336 return map { apply_rules($_, $rules) } @ret;
339 sub cmdopt_id
341 no locale;
342 my ($option) = @_;
343 $option =~ s/^-+//;
344 $option =~ s/([^A-Za-z0-9-.])/sprintf('_%u', ord($1))/ge;
345 return "cmdopt:$option";
348 sub convert_config
350 my ($outfh, $elinks, $option, $templates) = @_;
351 local $_;
353 # The rules that apply to most of the output.
354 # See &apply_rules for the format.
355 my @shared_rules = (
356 # files, commands, environment variables
357 { FIND => qr!"vi"!,
358 REPLACE => sub { $templates->{COMMAND}("vi") } },
359 { FIND => qr!\b(xterm)\b!,
360 REPLACE => sub { $templates->{COMMAND}($1) } },
362 REPLACE => sub { $templates->{ENVAR}($1) } },
363 { FIND => qr!(~/\.elinks|/dev/urandom|/dev/zero|\bsetup\.h|\bmime\.types)\b!,
364 REPLACE => sub { $templates->{FILENAME}($1) } },
365 { FIND => qr!\b(rename|fsync|strftime)\((\d+)\)!,
366 REPLACE => sub { $templates->{MANLINK}($1, $2) } },
368 # the rest
369 { FIND => qr!\b(http[46]?://[\w./+-]+?)(\.?)$!,
370 REPLACE => sub { $templates->{ULINK}($1, $1), $2 } },
371 { FIND => qr!(ELinks bug (\d+))!,
372 REPLACE => sub { $templates->{ULINK}("$2", $1) } },
373 { FIND => qr!\b(ELinks)\b!,
374 REPLACE => sub { $templates->{APPLICATION}($1) } },
377 my @command_rules = (
378 { FIND => qr!(-default-mime-type text/html)!,
379 REPLACE => sub { $templates->{USERINPUT}($1) } },
381 # This rule cannot be shared because the configuration option
382 # documentation does not have the anchors for the links.
383 { FIND => qr!(-?config-dir|-dump|-default-mime-type|-touch-files|-no-connect|-session-ring)!,
384 REPLACE => sub { $templates->{LINK}(cmdopt_id($1), $1) } },
386 @shared_rules);
388 my @config_rules = (
389 # non-ASCII characters
390 { FIND => qr!<->!, REPLACE => sub { "\x{2194}" } },
391 { FIND => qr!(\s)-(\s)!, REPLACE => sub { "$1\x{2013}$2" } },
392 { FIND => qr!(\s)---?(\s)!, REPLACE => sub { "$1\x{2014}$2" } },
394 # user interface
395 { FIND => qr!(Setup) -> (Terminal options)!,
396 REPLACE => sub { $templates->{GUIMENUCHOICE}($1, $2) } },
397 { FIND => qr!\[ (Save) \]!,
398 REPLACE => sub { $templates->{GUIBUTTON}($1) } },
399 { FIND => qr!\b(Goto URL)\b!,
400 REPLACE => sub { $templates->{GUILABEL}($1) } },
402 # SGML
403 { FIND => qr!\b(ACCESSKEY|TABINDEX)\b!,
404 REPLACE => sub { $templates->{SGMLATTR}($1) } },
405 { FIND => qr!\b(IMG)\b!,
406 REPLACE => sub { $templates->{SGMLELEMENT}($1) } },
407 { FIND => qr!\b(alt)/(title)\b!,
408 REPLACE => sub { $templates->{SGMLATTR}($1), "/", $templates->{SGMLATTR}($2) } },
409 { FIND => qr!\b(alt)( attribute)!,
410 REPLACE => sub { $templates->{SGMLATTR}($1), $2 } },
412 # typography
413 { FIND => qr!\b_(not)_\b!,
414 REPLACE => sub { $templates->{STRONG}($1) } },
416 # This rule cannot be shared because the command-line option
417 # documentation does not have the anchors for the links.
418 { FIND => qr!\b(connection\.try_ipv6|cookies\.save|document\.browse\.minimum_refresh_time|document\.browse\.links\.color_dirs)\b!,
419 REPLACE => sub { $templates->{LINK}($1, $1) } },
421 @shared_rules);
423 open my $pipe, "-|", $elinks, $option or die;
424 my $version = <$pipe>;
425 chomp $version;
426 $version =~ s/^ELinks ([-.\w]+).*$/$1/ or die "unusual version: $version";
427 my @nodes;
428 $_ = <$pipe>;
429 while (defined($_)) {
430 if (/^$/) {
431 $_ = <$pipe>;
432 } elsif (/^Configuration options:$/) {
433 # The "Generated using" line is here at the top, because
434 # DocBook XML does not allow anything else to follow a
435 # refsect2 within a refsect1.
436 push @nodes, $templates->{SIMPARA}(
437 "Generated using output from ELinks version $version.");
438 $_ = <$pipe>;
439 while (defined($_)) {
440 if (/^$/) {
441 $_ = <$pipe>;
442 } elsif (/^ {2}(\S.*): \(([\w.-]+)\)$/) {
443 # ' Browsing: (document.browse)'
444 my ($tree_info, $tree_name) = ($1, $2);
445 my @tree_nodes;
446 $_ = <$pipe>;
447 push @tree_nodes, optiondesc($pipe, \@config_rules, $templates);
448 my @varlistentries;
449 while (defined($_)) {
450 if (/^$/) {
451 $_ = <$pipe>;
452 } elsif (/^ {4}(\S+) (\S+) (\(.*)$/) {
453 # ' [0|1] (default: 1)'
454 my ($optname, $opttype, $optdefault) = ($1, $2, $3);
455 while ($optdefault =~ /^\([^"()]*"[^"]*$/s) {
456 # a special hack for document.dump.separator,
457 # which has newlines in the default value
458 my $contline = <$pipe>;
459 last unless defined($contline);
460 chomp $contline;
461 $optdefault .= "\n$contline";
463 $_ = <$pipe>;
464 push @varlistentries, $templates->{CFGOPTENTRY}(
465 $optname, $opttype, $optdefault,
466 optiondesc($pipe, \@config_rules, $templates));
467 } else {
468 last;
471 push @tree_nodes, $templates->{VARIABLELIST}(@varlistentries)
472 if @varlistentries;
473 push @nodes, $templates->{CFGOPTTREE}(
474 $tree_name, $tree_info, @tree_nodes);
475 } else {
476 last;
479 } elsif (/^Usage:/) {
480 $_ = <$pipe>;
481 } elsif (/^Options:$/) {
482 $_ = <$pipe>;
483 my @varlistentries;
484 my $name_rules = [
485 { FIND => qr/([^,\s]+)/,
486 REPLACE => sub { $templates->{CMDOPTNAME}(cmdopt_id($1), $1) } },
488 while (defined($_)) {
489 if (/^$/) {
490 $_ = <$pipe>;
491 } elsif (/^ {4}(\S+(?:,\s+\S+)*)(?:\s+([\[<]\S*))?(?:\s+(\(.*\)))?\s*$/) {
492 my @optnames = apply_rules($1, $name_rules);
493 my (@opttype, @optinfo);
494 @opttype = (" ", $templates->{CMDOPTTYPE}($2)) if defined($2);
495 @optinfo = $templates->{CMDOPTINFO}($3) if defined($3);
496 $_ = <$pipe>;
497 push @varlistentries, $templates->{VARLISTENTRY}(
498 [@optnames, @opttype, @optinfo],
499 optiondesc($pipe, \@command_rules, $templates));
500 } else {
501 last;
504 push @nodes, $templates->{VARIABLELIST}(@varlistentries)
505 if @varlistentries;
506 push @nodes, $templates->{SIMPARA}(
507 "Generated using output from ELinks version $version.");
508 } else {
509 last;
512 die "parsing stopped at $.: $_" if defined($_);
513 xml_output($outfh, $_) foreach @nodes;
516 GetOptions("help" => sub { pod2usage({-verbose => 1, -exitval => 0}) },
517 "version" => \&show_version)
518 or exit 2;
519 print(STDERR "$0: wrong number of operands\n"), exit 2 if @ARGV != 2;
520 my ($ELinks, $Outfname) = @ARGV;
522 my ($Option, $Templates);
523 $Option = "--config-help" if $Outfname =~ m(config[^/]*$);
524 $Option = "--long-help" if $Outfname =~ m(command[^/]*$);
525 $Templates = \%TemplatesDocBook if $Outfname =~ m(xml[^/]*$);
526 $Templates = \%TemplatesHTML if $Outfname =~ m(html[^/]*$);
527 unless ($Option and $Templates) {
528 print(STDERR "$0: name of output file does not indicate its content: $Outfname\n");
529 exit 2;
531 open my $outfh, ">", $Outfname or die "$Outfname: $!\n";
532 convert_config $outfh, $ELinks, $Option, $Templates;
533 close $outfh or die "$Outfname: $!\n";
535 __END__
537 =head1 NAME
539 help2xml - Convert help output from ELinks to DocBook XML or XHTML.
541 =head1 SYNOPSIS
543 B<help2xml> F<.../src/elinks> F<.../option-command.frag.xml>
545 B<help2xml> F<.../src/elinks> F<.../option-config.frag.xml>
547 B<help2xml> F<.../src/elinks> F<.../option-command.frag.xhtml>
549 B<help2xml> F<.../src/elinks> F<.../option-config.frag.xhtml>
551 =head1 DESCRIPTION
553 B<help2xml> runs B<elinks --long-help> or B<elinks --config-help> to
554 get the documentation of command-line or configuration options from
555 the elinks executable, and converts it to a fragment of DocBook XML or
556 XHTML. In the build system, these fragments are then included in the
557 DocBook and XHTML versions of the L<elinks(1)> and L<elinks.conf(5)>
558 manual pages.
560 =head1 ARGUMENTS
562 =over
564 =item F<.../src/elinks>
566 The B<elinks> executable file that B<help2xml> runs in order to
567 get the documentation.
569 =item F<.../option-command.frag.xml>
571 =item F<.../option-config.frag.xml>
573 =item F<.../option-command.frag.xhtml>
575 =item F<.../option-config.frag.xhtml>
577 The output file to which B<help2xml> writes the DocBook XML or
578 XHTML fragment. The basename of this file must include the word
579 "command" for command-line options, or "config" for configuration
580 options. It must also include "xml" for Docbook XML, or "html" for
581 XHTML.
583 =back
585 =head1 AUTHOR
587 Kalle Olavi Niemitalo <>
591 Copyright (c) 2008 Kalle Olavi Niemitalo.
593 Permission to use, copy, modify, and/or distribute this software for any
594 purpose with or without fee is hereby granted, provided that the above
595 copyright notice and this permission notice appear in all copies.