3 # Man page to help file converter
4 # Copyright (C) 1994, 1995, 1998, 2000, 2001, 2002, 2003, 2004, 2005,
6 # The Free Software Foundation, Inc.
8 # Originally written by:
9 # Andrew V. Samoilov, 2002
11 # Andrew Borodin <aborodin@vmail.ru>, 2010
13 # Completely rewritten in Perl by:
14 # Alexandr Prenko, 2010
16 # This program is free software: you can redistribute it and/or modify
17 # it under the terms of the GNU General Public License as published by
18 # the Free Software Foundation, either version 3 of the License, or
19 # (at your option) any later version.
21 # This program is distributed in the hope that it will be useful,
22 # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 # GNU General Public License for more details.
26 # You should have received a copy of the GNU General Public License
27 # along with this program. If not, see <http://www.gnu.org/licenses/>.
30 # \brief Source: man page to help file converter
35 # Perl have no static variables, so this hash emulates them
37 "string_len anchor_flag" => 0,
38 "string_len lc_link_flag" => 0,
39 "handle_link old" => undef
43 my $CHAR_LINK_START = chr(01); # Ctrl-A
44 my $CHAR_LINK_POINTER = chr(02); # Ctrl-B
45 my $CHAR_LINK_END = chr(03); # Ctrl-C
46 my $CHAR_NODE_END = chr(04); # Ctrl-D
47 my $CHAR_ALTERNATE = chr(05); # Ctrl-E
48 my $CHAR_NORMAL = chr(06); # Ctrl-F
49 my $CHAR_VERSION = chr(07); # Ctrl-G
50 my $CHAR_FONT_BOLD = chr(010); # Ctrl-H
51 my $CHAR_FONT_NORMAL = chr(013); # Ctrl-K
52 my $CHAR_FONT_ITALIC = chr(024); # Ctrl-T
55 my $col = 0; # Current output column
56 my $out_row = 1; # Current output row
57 my $in_row = 0; # Current input row
58 my $no_split_flag = 0; # Flag: Don't split section on next ".SH"
59 my $skip_flag = 0; # Flag: Skip this section.
62 # 2 = title skipped, skipping text
63 my $link_flag = 0; # Flag: Next line is a link
64 my $verbatim_flag = 0; # Flag: Copy input to output verbatim
65 my $node = 0; # Flag: This line is an original ".SH"
67 my $c_out; # Output filename
68 my $f_out; # Output file
70 my $c_in; # Current input filename
72 my $indentation; # Indentation level, n spaces
73 my $tp_flag; # Flag: .TP paragraph
74 # 1 = this line is .TP label,
75 # 2 = first line of label description.
82 my ($str, $chars) = @_;
84 if (! defined $chars || $chars eq "")
91 $str = $strtok unless defined $str;
92 return undef unless defined $str;
95 $str =~ s/^[$chars]+//;
96 ($result, $strtok) = split /[$chars]+/, $str, 2;
97 ($result, $strtok) = split /[$chars]+/, $strtok, 2 if defined $result && $result eq "";
98 $strtok = undef if ! defined $strtok || $strtok eq "";
104 "node" => undef, # Section name
105 "lname" => undef, # Translated .SH, undef if not translated
107 "heading_level" => undef
111 my $nodes = struct_node();
112 my $cnode; # Current node
114 # Report error in input
118 warn sprintf "man2hlp: %s in file \"%s\" on line %d\n", $message, $c_in, $in_row;
121 # Do open, exit if it fails
124 my ($mode, $filename) = @_;
127 unless (open $f, $mode, $filename)
129 warn sprintf("man2hlp: Cannot open file \"%s\" ($!)\n", $filename);
135 # Do close, exit if it fails
141 warn "man2hlp: Cannot close file ($!)\n";
154 # Calculate the length of string
158 my $anchor_flag = \$static{"string_len anchor_flag"}; # Flag: Inside hypertext anchor name ho4u_v_Ariom
159 my $lc_link_flag = \$static{"string_len lc_link_flag"}; # Flag: Inside hypertext link target name
160 my $backslash_flag = 0; # Flag: Backslash quoting
161 my $len = 0; # Result: the length of the string
164 foreach my $c (split //, $buffer)
166 if ($c eq $CHAR_LINK_POINTER)
168 $$lc_link_flag = 1; # Link target name starts
170 elsif ($c eq $CHAR_LINK_END)
172 $$lc_link_flag = 0; # Link target name ends
174 elsif ($c eq $CHAR_NODE_END)
176 # Node anchor name starts
178 # Ugly hack to prevent loss of one space
181 # Don't add control characters to the length
182 next if ord($c) >= 0 && ord($c) < 32;
183 # Attempt to handle backslash quoting
184 if ($c eq '\\' && !$backslash_flag)
190 # Increase length if not inside anchor name or link target name
191 $len++ if !$$anchor_flag && !$$lc_link_flag;
192 if ($$anchor_flag && $c eq ']')
194 # Node anchor name ends
205 my $len; # The length of current word
206 my $backslash_flag = 0;
207 my $font_change_flag = 0;
211 return if $skip_flag;
215 # Attempt to handle backslash quoting
216 foreach (split //, $buffer)
218 if ($_ eq '\\' && !$backslash_flag)
230 $buffer = strtok($buffer, " \t\n");
231 # Repeat for each word
232 while (defined $buffer)
237 $len = string_len($buffer);
238 # Words are separated by spaces
246 print $f_out ' ' while $col++ < $indentation;
248 # Attempt to handle backslash quoting
249 foreach (split //, $buffer)
251 # handle quotes: \(lq, \(rq, \(dq
252 if ($quotes_flag != 0)
254 if (($_ eq 'l' || $_ eq 'r' || $_ eq 'd') && $quotes_flag == 1)
256 # continue quotes handling
260 elsif ($_ eq 'q' && $quotes_flag == 2)
262 # finish quotes handling
269 print $f_out '(' . $_;
270 print_error "Syntax error: unsupported \\(" . $_ . " command";
273 # handle \fR, \fB, \fI and \fP commands
274 if ($font_change_flag)
278 print $f_out $CHAR_FONT_BOLD;
282 print $f_out $CHAR_FONT_ITALIC;
284 elsif ($_ eq 'R' || $_ eq 'P')
286 print $f_out $CHAR_FONT_NORMAL;
290 print $f_out 'f' . $_;
291 print_error "Syntax error: unsupported \\f" . $_ . " command";
294 $font_change_flag = 0;
297 if ($_ eq '(' && $backslash_flag)
303 if ($_ eq 'f' && $backslash_flag)
305 $font_change_flag = 1;
309 if ($_ eq '\\' && !$backslash_flag)
315 $font_change_flag = 0;
323 $buffer = strtok(undef, " \t\n");
328 # Like print_string but with printf-like syntax
331 print_string sprintf shift, @_;
334 # Handle NODE and .SH commands. is_sh is 1 for .SH, 0 for NODE
335 # FIXME: Consider to remove first parameter
338 my ($buffer, $is_sh) = @_;
339 my ($len, $heading_level);
341 # If we already skipped a section, don't skip another
342 $skip_flag = 0 if $skip_flag == 2;
344 # Get the command parameters
345 $buffer = strtok(undef, "");
346 if (! defined $buffer)
348 print_error "Syntax error: .SH: no title";
354 $buffer =~ s/^"// and $buffer =~ s/"$//;
355 # Calculate heading level
357 $heading_level++ while substr($buffer, $heading_level, 1) eq ' ';
358 # Heading level must be even
359 if ($heading_level % 2)
361 print_error "Syntax error: .SH: odd heading level";
365 # Don't start a new section
367 print_string $buffer;
374 # Skipping title and marking text for skipping
379 $buffer = substr($buffer, $heading_level);
380 if (! $is_sh || ! $node)
382 # Start a new section, but omit empty section names
385 printf $f_out "%s[%s]", $CHAR_NODE_END, $buffer;
389 # Add section to the linked list
390 if (! defined $cnode)
396 $cnode->{'next'} = struct_node();
397 $cnode = $cnode->{'next'};
399 $cnode->{'node'} = $buffer;
400 $cnode->{'lname'} = undef;
401 $cnode->{'next'} = undef;
402 $cnode->{'heading_level'} = $heading_level;
406 $cnode->{'lname'} = $buffer;
407 print_string $buffer;
411 } # Start new section
416 # Convert character from the macro name to the font marker
421 'R' => $CHAR_FONT_NORMAL,
422 'B' => $CHAR_FONT_BOLD,
423 'I' => $CHAR_FONT_ITALIC
425 return exists $font{$c} ? $font{$c} : chr(0);
429 # Handle alternate font commands (.BR, .IR, .RB, .RI, .BI, .IB)
430 # Return 0 if the command wasn't recognized, 1 otherwise
432 sub handle_alt_font($)
438 return 0 if length($buffer) != 3;
439 return 0 if substr($buffer, 0, 1) ne '.';
442 char_to_font substr($buffer, 1, 1),
443 char_to_font substr($buffer, 2, 1)
446 # Exclude names with unknown characters, .BB, .II and .RR
447 if ($font[0] eq chr(0) || $font[1] eq chr(0) || $font[0] eq $font[1])
452 my $p = strtok(undef, "");
453 return 1 unless defined $p;
457 my @p = split //, $p;
463 $in_quotes = !$in_quotes;
468 if ($p[0] eq ' ' && !$in_quotes)
471 # Don't change font if we are at the end
474 $alt_state = $alt_state ? 0 : 1;
475 $buffer .= $font[$alt_state];
479 shift @p while @p && $p[0] eq ' ';
487 # Turn off attributes if necessary
488 if ($font[$alt_state] ne $CHAR_FONT_NORMAL)
490 $buffer .= $CHAR_FONT_NORMAL;
493 print_string $buffer;
498 # Handle .IP and .TP commands. is_tp is 1 for .TP, 0 for .IP
515 # Handle all the roff dot commands. See man groff_man for details
516 sub handle_command($)
521 # Get the command name
522 $buffer = strtok($buffer, " \t");
524 if ($buffer eq ".SH")
527 handle_node $buffer, 1;
529 elsif ($buffer eq ".\\\"NODE")
531 handle_node $buffer, 0;
533 elsif ($buffer eq ".\\\"DONT_SPLIT\"")
537 elsif ($buffer eq ".\\\"SKIP_SECTION\"")
541 elsif ($buffer eq ".\\\"LINK2\"")
543 # Next two input lines form a link
546 elsif ($buffer eq ".PP" || $buffer eq ".P" || $buffer eq ".LP")
553 elsif ($buffer eq ".nf")
555 # Following input lines are to be handled verbatim
559 elsif ($buffer eq ".I" || $buffer eq ".B" || $buffer eq ".SB")
561 # Bold text or italics text
562 my $backslash_flag = 0;
565 # Causes the text on the same line or the text on the
566 # next line to appear in boldface font, one point
567 # size smaller than the default font.
570 # FIXME: text is optional, so there is no error
572 my $p = strtok(undef, "");
575 print_error "Syntax error: .I | .B | .SB : no text";
579 $buffer = substr($buffer, 1, 1) eq 'I' ? $CHAR_FONT_ITALIC : $CHAR_FONT_BOLD;
581 # Attempt to handle backslash quoting
582 foreach (split //, $p)
584 if ($_ eq '\\' && !$backslash_flag)
592 print_string $buffer . $CHAR_FONT_NORMAL;
594 elsif ($buffer eq ".TP")
598 elsif ($buffer eq ".IP")
602 elsif ($buffer eq ".\\\"TOPICS")
606 print_error "Syntax error: .\\\"TOPICS must be first command";
609 $buffer = strtok(undef, "");
610 if (! defined $buffer)
612 print_error "Syntax error: .\\\"TOPICS: no text";
616 $buffer =~ s/^"// and $buffer =~ s/"$//;
619 elsif ($buffer eq ".br")
623 elsif ($buffer =~ /^\.\\"/)
625 # Comment { Hello from K.O. ;-) }
627 elsif ($buffer eq ".TH")
631 elsif ($buffer eq ".SM")
633 # Causes the text on the same line or the text on the
634 # next line to appear in a font that is one point
635 # size smaller than the default font.
636 $buffer = strtok(undef, "");
637 print_string $buffer if defined $buffer;
639 elsif (handle_alt_font($buffer) == 1)
643 elsif ($buffer eq ".RE")
649 # Other commands are ignored
650 print_error sprintf "Warning: unsupported command %s", $buffer;
658 'linkname' => undef, # Section name
659 'line' => undef, # Input line in ...
665 my $links = struct_links();
672 my $old = \$static{"handle_link old"};
679 # Old format link, not supported
681 elsif ($link_flag == 2)
683 # First part of new format link
684 # Bold text or italics text
685 if (substr($buffer, 0, 2) eq '.I' || substr($buffer, 0, 2) eq '.B')
687 $buffer =~ s/^..[\s\t]*//;
693 elsif ($link_flag == 3)
695 # Second part of new format link
701 # "Layout\&)," -- "Layout" should be highlighted, but not "),"
702 ($$old, $amp_arg) = split /\\&/, $$old, 2;
703 $amp_arg = "" unless defined $amp_arg;
704 printf_string "%s%s%s%s%s%s\n", $CHAR_LINK_START, $$old,
705 $CHAR_LINK_POINTER, $buffer, $CHAR_LINK_END, $amp_arg;
707 # Add to the linked list
708 if (defined $current_link)
710 $current_link->{'next'} = struct_links();
711 $current_link = $current_link->{'next'};
712 $current_link->{'next'} = undef;
716 $current_link = $links;
718 $current_link->{'linkname'} = $buffer;
719 $current_link->{'filename'} = $c_in;
720 $current_link->{'line'} = $in_row;
726 my $len; # Length of input line
727 my $c_man; # Manual filename
728 my $c_tmpl; # Template filename
729 my $f_man; # Manual file
730 my $f_tmpl; # Template file
731 my $buffer; # Full input line
733 my $outfile_buffer; # Large buffer to keep the output file
734 my $cont_start; # Start of [Contents]
735 my $file_end; # Length of the output file
737 # Validity check for arguments
740 warn "Usage: man2hlp file.man template_file helpfile\n";
748 # First stage - process the manual, write to the output file
750 $f_man = fopen_check "<", $c_man;
751 $f_out = fopen_check ">", $c_out;
754 # Repeat for each input line
757 # Remove terminating newline
760 my $input_line; # Input line without initial "\&"
762 if (substr($buffer, 0, 2) eq '\\&')
764 $input_line = substr($buffer, 2);
768 $input_line = $buffer;
772 $len = length($input_line);
776 # Copy the line verbatim
777 if ($input_line eq ".fi")
783 print_string $input_line;
790 handle_link $input_line;
792 elsif (substr($buffer, 0, 1) eq '.')
794 # The line is a roff command
795 handle_command $input_line;
799 #A normal line, just output it
800 print_string $input_line;
802 # .TP label processed as usual line
813 if ($col >= $indentation)
819 print $f_out " " while ++$col < $indentation;
827 # First stage ends here, closing the manual
829 # Second stage - process the template file
830 $f_tmpl = fopen_check "<", $c_tmpl;
833 # Repeat for each input line
838 if (defined $lc_node)
842 $cnode->{'lname'} = $buffer;
843 chomp $cnode->{'lname'};
849 my $char_node_end = index($buffer, $CHAR_NODE_END);
850 $lc_node = $char_node_end < 0 ? undef : substr($buffer, $char_node_end);
852 if (defined $lc_node && substr($lc_node, 1, 1) eq '[')
854 my $p = index($lc_node, ']');
856 if (substr($lc_node, 1, 6) eq '[main]')
862 if (! defined $cnode)
868 $cnode->{'next'} = struct_node();
869 $cnode = $cnode->{'next'};
871 $cnode->{'node'} = substr($lc_node, 2, $p-2);
872 $cnode->{'lname'} = undef;
873 $cnode->{'next'} = undef;
874 $cnode->{'heading_level'} = 0;
887 print $f_out $buffer;
890 $cont_start = tell $f_out;
891 if ($cont_start <= 0)
899 printf $f_out "\004[Contents]\n%s\n\n", $topics;
903 print $f_out "\004[Contents]\n";
906 for ($current_link = $links; defined $current_link && defined $current_link->{'linkname'};)
909 my $next = $current_link->{'next'};
911 if ($current_link->{'linkname'} eq "Contents")
917 for ($cnode = $nodes; defined $cnode && defined $cnode->{'node'}; $cnode = $cnode->{'next'})
919 if ($cnode->{'node'} eq $current_link->{'linkname'})
928 $buffer = sprintf "Stale link \"%s\"", $current_link->{'linkname'};
929 $c_in = $current_link->{'filename'};
930 $in_row = $current_link->{'line'};
934 $current_link = $next;
937 for ($cnode = $nodes; defined $cnode && defined $cnode->{'node'};)
939 my $next = $cnode->{'next'};
940 $lc_node = $cnode->{'node'};
942 if (defined $lc_node && $lc_node ne '') {
943 printf $f_out " %*s\001%s\002%s\003", $cnode->{'heading_level'},
944 "", $cnode->{'lname'} ? $cnode->{'lname'} : $lc_node, $lc_node;
950 $file_end = tell $f_out;
953 if (($file_end <= 0) || ($file_end - $cont_start <= 0))
960 fclose_check $f_tmpl;
961 # Second stage ends here, closing all files, note the end of output
964 # Third stage - swap two parts of the output file.
965 # First, open the output file for reading and load it into the memory.
967 $outfile_buffer = '';
968 $f_out = fopen_check '<', $c_out;
969 $outfile_buffer .= $_ while <$f_out>;
971 # Now the output file is in the memory
973 # Again open output file for writing
974 $f_out = fopen_check '>', $c_out;
976 # Write part after the "Contents" node
977 print $f_out substr($outfile_buffer, $cont_start, $file_end - $cont_start);
979 # Write part before the "Contents" node
980 print $f_out substr($outfile_buffer, 0, $cont_start-1);