Merge branch '207_minor_regex_fixes' into mc-4.6
[midnight-commander.git] / src / man2hlp.c
blob6a922377aeb308773e77c4112b6265b08a90d766
1 /* Man page to help file converter
2 Copyright (C) 1994, 1995, 1998, 2000, 2001, 2002, 2003, 2004, 2005,
3 2007 Free Software Foundation, Inc.
4 2002 Andrew V. Samoilov
5 2002 Pavel Roskin
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 #include <config.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #include <glib.h>
30 #include "help.h"
31 #include "glibcompat.h"
33 #define BUFFER_SIZE 256
35 static int col = 0; /* Current output column */
36 static int out_row = 1; /* Current output row */
37 static int in_row = 0; /* Current input row */
38 static int no_split_flag = 0; /* Flag: Don't split section on next ".SH" */
39 static int skip_flag = 0; /* Flag: Skip this section.
40 0 = don't skip,
41 1 = skipping title,
42 2 = title skipped, skipping text */
43 static int link_flag = 0; /* Flag: Next line is a link */
44 static int verbatim_flag = 0; /* Flag: Copy input to output verbatim */
45 static int node = 0; /* Flag: This line is an original ".SH" */
47 static const char *c_out; /* Output filename */
48 static FILE *f_out; /* Output file */
50 static const char *c_in; /* Current input filename */
52 static int indentation; /* Indentation level, n spaces */
53 static int tp_flag; /* Flag: .TP paragraph
54 1 = this line is .TP label,
55 2 = first line of label description. */
56 static char *topics = NULL;
58 struct node {
59 char *node; /* Section name */
60 char *lname; /* Translated .SH, NULL if not translated */
61 struct node *next;
62 int heading_level;
65 static struct node nodes;
66 static struct node *cnode; /* Current node */
68 #define MAX_STREAM_BLOCK 8192
71 * Read in blocks of reasonable size and make sure we read everything.
72 * Failure to read everything is an error, indicated by returning 0.
74 static size_t
75 persistent_fread (void *data, size_t len, FILE *stream)
77 size_t count;
78 size_t bytes_done = 0;
79 char *ptr = (char *) data;
81 if (len <= 0)
82 return 0;
84 while (bytes_done < len) {
85 count = len - bytes_done;
86 if (count > MAX_STREAM_BLOCK)
87 count = MAX_STREAM_BLOCK;
89 count = fread (ptr, 1, count, stream);
91 if (count <= 0)
92 return 0;
94 bytes_done += count;
95 ptr += count;
98 return bytes_done;
102 * Write in blocks of reasonable size and make sure we write everything.
103 * Failure to write everything is an error, indicated by returning 0.
105 static size_t
106 persistent_fwrite (const void *data, size_t len, FILE *stream)
108 size_t count;
109 size_t bytes_done = 0;
110 const char *ptr = (const char *) data;
112 if (len <= 0)
113 return 0;
115 while (bytes_done < len) {
116 count = len - bytes_done;
117 if (count > MAX_STREAM_BLOCK)
118 count = MAX_STREAM_BLOCK;
120 count = fwrite (ptr, 1, count, stream);
122 if (count <= 0)
123 return 0;
125 bytes_done += count;
126 ptr += count;
129 return bytes_done;
132 /* Report error in input */
133 static void
134 print_error (const char *message)
136 fprintf (stderr, "man2hlp: %s in file \"%s\" on line %d\n", message,
137 c_in, in_row);
140 /* Do fopen(), exit if it fails */
141 static FILE *
142 fopen_check (const char *filename, const char *flags)
144 char tmp[BUFFER_SIZE];
145 FILE *f;
147 f = fopen (filename, flags);
148 if (f == NULL) {
149 g_snprintf (tmp, sizeof (tmp), "man2hlp: Cannot open file \"%s\"",
150 filename);
151 perror (tmp);
152 exit (3);
155 return f;
158 /* Do fclose(), exit if it fails */
159 static void
160 fclose_check (FILE *f)
162 if (ferror (f)) {
163 perror ("man2hlp: File error");
164 exit (3);
167 if (fclose (f)) {
168 perror ("man2hlp: Cannot close file");
169 exit (3);
173 /* Change output line */
174 static void
175 newline (void)
177 out_row++;
178 col = 0;
179 fprintf (f_out, "\n");
182 /* Calculate the length of string */
183 static int
184 string_len (const char *buffer)
186 static int anchor_flag = 0; /* Flag: Inside hypertext anchor name */
187 static int link_flag = 0; /* Flag: Inside hypertext link target name */
188 int backslash_flag = 0; /* Flag: Backslash quoting */
189 int c; /* Current character */
190 int len = 0; /* Result: the length of the string */
192 while (*(buffer)) {
193 c = *buffer++;
194 if (c == CHAR_LINK_POINTER)
195 link_flag = 1; /* Link target name starts */
196 else if (c == CHAR_LINK_END)
197 link_flag = 0; /* Link target name ends */
198 else if (c == CHAR_NODE_END) {
199 /* Node anchor name starts */
200 anchor_flag = 1;
201 /* Ugly hack to prevent loss of one space */
202 len++;
204 /* Don't add control characters to the length */
205 if (c >= 0 && c < 32)
206 continue;
207 /* Attempt to handle backslash quoting */
208 if (c == '\\' && !backslash_flag) {
209 backslash_flag = 1;
210 continue;
212 backslash_flag = 0;
213 /* Increase length if not inside anchor name or link target name */
214 if (!anchor_flag && !link_flag)
215 len++;
216 if (anchor_flag && c == ']') {
217 /* Node anchor name ends */
218 anchor_flag = 0;
221 return len;
224 /* Output the string */
225 static void
226 print_string (char *buffer)
228 int len; /* The length of current word */
229 int c; /* Current character */
230 int backslash_flag = 0;
232 /* Skipping lines? */
233 if (skip_flag)
234 return;
235 /* Copying verbatim? */
236 if (verbatim_flag) {
237 /* Attempt to handle backslash quoting */
238 while (*(buffer)) {
239 c = *buffer++;
240 if (c == '\\' && !backslash_flag) {
241 backslash_flag = 1;
242 continue;
244 backslash_flag = 0;
245 fputc (c, f_out);
247 } else {
248 /* Split into words */
249 buffer = strtok (buffer, " \t\n");
250 /* Repeat for each word */
251 while (buffer) {
252 /* Skip empty strings */
253 if (*(buffer)) {
254 len = string_len (buffer);
255 /* Change the line if about to break the right margin */
256 if (col + len >= HELP_TEXT_WIDTH)
257 newline ();
258 /* Words are separated by spaces */
259 if (col > 0) {
260 fputc (' ', f_out);
261 col++;
262 } else if (indentation) {
263 while (col++ < indentation)
264 fputc (' ', f_out);
266 /* Attempt to handle backslash quoting */
267 while (*(buffer)) {
268 c = *buffer++;
269 if (c == '\\' && !backslash_flag) {
270 backslash_flag = 1;
271 continue;
273 backslash_flag = 0;
274 fputc (c, f_out);
276 /* Increase column */
277 col += len;
279 /* Get the next word */
280 buffer = strtok (NULL, " \t\n");
281 } /* while */
285 /* Like print_string but with printf-like syntax */
286 static void
287 printf_string (const char *format, ...)
289 va_list args;
290 char buffer[BUFFER_SIZE];
292 va_start (args, format);
293 g_vsnprintf (buffer, sizeof (buffer), format, args);
294 va_end (args);
295 print_string (buffer);
298 /* Handle NODE and .SH commands. is_sh is 1 for .SH, 0 for NODE */
299 static void
300 handle_node (char *buffer, int is_sh)
302 int len, heading_level;
304 /* If we already skipped a section, don't skip another */
305 if (skip_flag == 2) {
306 skip_flag = 0;
308 /* Get the command parameters */
309 buffer = strtok (NULL, "");
310 if (buffer == NULL) {
311 print_error ("Syntax error: .SH: no title");
312 return;
313 } else {
314 /* Remove quotes */
315 if (buffer[0] == '"') {
316 buffer++;
317 len = strlen (buffer);
318 if (buffer[len - 1] == '"') {
319 len--;
320 buffer[len] = 0;
323 /* Calculate heading level */
324 heading_level = 0;
325 while (buffer[heading_level] == ' ')
326 heading_level++;
327 /* Heading level must be even */
328 if (heading_level & 1)
329 print_error ("Syntax error: .SH: odd heading level");
330 if (no_split_flag) {
331 /* Don't start a new section */
332 newline ();
333 print_string (buffer);
334 newline ();
335 newline ();
336 no_split_flag = 0;
337 } else if (skip_flag) {
338 /* Skipping title and marking text for skipping */
339 skip_flag = 2;
340 } else {
341 buffer += heading_level;
342 if (!is_sh || !node) {
343 /* Start a new section, but omit empty section names */
344 if (*buffer) {
345 fprintf (f_out, "%c[%s]", CHAR_NODE_END, buffer);
346 col++;
347 newline ();
350 /* Add section to the linked list */
351 if (!cnode) {
352 cnode = &nodes;
353 } else {
354 cnode->next = malloc (sizeof (nodes));
355 cnode = cnode->next;
357 cnode->node = strdup (buffer);
358 cnode->lname = NULL;
359 cnode->next = NULL;
360 cnode->heading_level = heading_level;
362 if (is_sh) {
363 /* print_string() strtok()es buffer, so */
364 cnode->lname = strdup (buffer);
365 print_string (buffer);
366 newline ();
367 newline ();
369 } /* Start new section */
370 } /* Has parameters */
371 node = !is_sh;
374 /* Convert character from the macro name to the font marker */
375 static inline char
376 char_to_font (char c)
378 switch (c) {
379 case 'R':
380 return CHAR_FONT_NORMAL;
381 case 'B':
382 return CHAR_FONT_BOLD;
383 case 'I':
384 return CHAR_FONT_ITALIC;
385 default:
386 return 0;
391 * Handle alternate font commands (.BR, .IR, .RB, .RI, .BI, .IB)
392 * Return 0 if the command wasn't recognized, 1 otherwise
394 static int
395 handle_alt_font (char *buffer)
397 char *p;
398 char *w;
399 char font[2];
400 int in_quotes = 0;
401 int alt_state = 0;
403 if (strlen (buffer) != 3)
404 return 0;
406 if (buffer[0] != '.')
407 return 0;
409 font[0] = char_to_font (buffer[1]);
410 font[1] = char_to_font (buffer[2]);
412 /* Exclude names with unknown characters, .BB, .II and .RR */
413 if (font[0] == 0 || font[1] == 0 || font[0] == font[1])
414 return 0;
416 p = strtok (NULL, "");
417 if (p == NULL) {
418 return 1;
421 w = buffer;
422 *w++ = font[0];
424 while (*p) {
426 if (*p == '"') {
427 in_quotes = !in_quotes;
428 p++;
429 continue;
432 if (*p == ' ' && !in_quotes) {
433 p++;
434 /* Don't change font if we are at the end */
435 if (*p != 0) {
436 alt_state = !alt_state;
437 *w++ = font[alt_state];
440 /* Skip more spaces */
441 while (*p == ' ')
442 p++;
444 continue;
447 *w++ = *p++;
450 /* Turn off attributes if necessary */
451 if (font[alt_state] != CHAR_FONT_NORMAL)
452 *w++ = CHAR_FONT_NORMAL;
454 *w = 0;
455 print_string (buffer);
457 return 1;
460 /* Handle .IP and .TP commands. is_tp is 1 for .TP, 0 for .IP */
461 static void
462 handle_tp_ip (int is_tp)
464 if (col > 0)
465 newline ();
466 newline ();
467 if (is_tp) {
468 tp_flag = 1;
469 indentation = 0;
470 } else
471 indentation = 8;
474 /* Handle all the roff dot commands. See man groff_man for details */
475 static void
476 handle_command (char *buffer)
478 int len;
480 /* Get the command name */
481 strtok (buffer, " \t");
483 if (strcmp (buffer, ".SH") == 0) {
484 indentation = 0;
485 handle_node (buffer, 1);
486 } else if (strcmp (buffer, ".\\\"NODE") == 0) {
487 handle_node (buffer, 0);
488 } else if (strcmp (buffer, ".\\\"DONT_SPLIT\"") == 0) {
489 no_split_flag = 1;
490 } else if (strcmp (buffer, ".\\\"SKIP_SECTION\"") == 0) {
491 skip_flag = 1;
492 } else if (strcmp (buffer, ".\\\"LINK2\"") == 0) {
493 /* Next two input lines form a link */
494 link_flag = 2;
495 } else if ((strcmp (buffer, ".PP") == 0)
496 || (strcmp (buffer, ".P") == 0)
497 || (strcmp (buffer, ".LP") == 0)) {
498 indentation = 0;
499 /* End of paragraph */
500 if (col > 0)
501 newline ();
502 newline ();
503 } else if (strcmp (buffer, ".nf") == 0) {
504 /* Following input lines are to be handled verbatim */
505 verbatim_flag = 1;
506 if (col > 0)
507 newline ();
508 } else if (strcmp (buffer, ".I") == 0 || strcmp (buffer, ".B") == 0
509 || strcmp (buffer, ".SB") == 0) {
510 /* Bold text or italics text */
511 char *p;
512 char *w;
513 int backslash_flag = 0;
515 /* .SB [text]
516 * Causes the text on the same line or the text on the
517 * next line to appear in boldface font, one point
518 * size smaller than the default font.
521 /* FIXME: text is optional, so there is no error */
522 p = strtok (NULL, "");
523 if (p == NULL) {
524 print_error ("Syntax error: .I | .B | .SB : no text");
525 return;
528 *buffer = (buffer[1] == 'I') ? CHAR_FONT_ITALIC : CHAR_FONT_BOLD;
530 /* Attempt to handle backslash quoting */
531 for (w = &buffer[1]; *p; p++) {
532 if (*p == '\\' && !backslash_flag) {
533 backslash_flag = 1;
534 continue;
536 backslash_flag = 0;
537 *w++ = *p;
540 *w++ = CHAR_FONT_NORMAL;
541 *w = 0;
542 print_string (buffer);
543 } else if (strcmp (buffer, ".TP") == 0) {
544 handle_tp_ip (1);
545 } else if (strcmp (buffer, ".IP") == 0) {
546 handle_tp_ip (0);
547 } else if (strcmp (buffer, ".\\\"TOPICS") == 0) {
548 if (out_row > 1) {
549 print_error
550 ("Syntax error: .\\\"TOPICS must be first command");
551 return;
553 buffer = strtok (NULL, "");
554 if (buffer == NULL) {
555 print_error ("Syntax error: .\\\"TOPICS: no text");
556 return;
558 /* Remove quotes */
559 if (buffer[0] == '"') {
560 buffer++;
561 len = strlen (buffer);
562 if (buffer[len - 1] == '"') {
563 len--;
564 buffer[len] = 0;
567 topics = strdup (buffer);
568 } else if (strcmp (buffer, ".br") == 0) {
569 if (col)
570 newline ();
571 } else if (strncmp (buffer, ".\\\"", 3) == 0) {
572 /* Comment */
573 } else if (strcmp (buffer, ".TH") == 0) {
574 /* Title header */
575 } else if (strcmp (buffer, ".SM") == 0) {
576 /* Causes the text on the same line or the text on the
577 * next line to appear in a font that is one point
578 * size smaller than the default font. */
579 buffer = strtok (NULL, "");
580 if (buffer)
581 print_string (buffer);
582 } else if (handle_alt_font (buffer) == 1) {
583 return;
584 } else {
585 /* Other commands are ignored */
586 char warn_str[BUFFER_SIZE];
587 g_snprintf (warn_str, sizeof (warn_str),
588 "Warning: unsupported command %s", buffer);
589 print_error (warn_str);
590 return;
594 static struct links {
595 char *linkname; /* Section name */
596 int line; /* Input line in ... */
597 const char *filename;
598 struct links *next;
599 } links, *current_link;
601 static void
602 handle_link (char *buffer)
604 static char old[80];
605 int len;
606 char *amp;
607 const char *amp_arg;
609 switch (link_flag) {
610 case 1:
611 /* Old format link, not supported */
612 break;
613 case 2:
614 /* First part of new format link */
615 /* Bold text or italics text */
616 if (buffer[0] == '.' && (buffer[1] == 'I' || buffer[1] == 'B'))
617 for (buffer += 2; *buffer == ' ' || *buffer == '\t'; buffer++);
618 g_strlcpy (old, buffer, sizeof (old));
619 link_flag = 3;
620 break;
621 case 3:
622 /* Second part of new format link */
623 if (buffer[0] == '.')
624 buffer++;
625 if (buffer[0] == '\\')
626 buffer++;
627 if (buffer[0] == '"')
628 buffer++;
629 len = strlen (buffer);
630 if (len && buffer[len - 1] == '"') {
631 buffer[--len] = 0;
634 /* "Layout\&)," -- "Layout" should be highlighted, but not ")," */
635 amp = strstr (old, "\\&");
636 if (amp) {
637 *amp = 0;
638 amp += 2;
639 amp_arg = amp;
640 } else {
641 amp_arg = "";
644 printf_string ("%c%s%c%s%c%s\n", CHAR_LINK_START, old,
645 CHAR_LINK_POINTER, buffer, CHAR_LINK_END, amp_arg);
646 link_flag = 0;
647 /* Add to the linked list */
648 if (current_link) {
649 current_link->next = malloc (sizeof (links));
650 current_link = current_link->next;
651 current_link->next = NULL;
652 } else {
653 current_link = &links;
655 current_link->linkname = strdup (buffer);
656 current_link->filename = c_in;
657 current_link->line = in_row;
658 break;
663 main (int argc, char **argv)
665 int len; /* Length of input line */
666 const char *c_man; /* Manual filename */
667 const char *c_tmpl; /* Template filename */
668 FILE *f_man; /* Manual file */
669 FILE *f_tmpl; /* Template file */
670 char buffer[BUFFER_SIZE]; /* Full input line */
671 char *node = NULL;
672 char *outfile_buffer; /* Large buffer to keep the output file */
673 long cont_start; /* Start of [Contents] */
674 long file_end; /* Length of the output file */
676 /* Validity check for arguments */
677 if (argc != 4) {
678 fprintf (stderr,
679 "Usage: man2hlp file.man template_file helpfile\n");
680 return 3;
683 c_man = argv[1];
684 c_tmpl = argv[2];
685 c_out = argv[3];
687 /* First stage - process the manual, write to the output file */
688 f_man = fopen_check (c_man, "r");
689 f_out = fopen_check (c_out, "w");
690 c_in = c_man;
692 /* Repeat for each input line */
693 while (fgets (buffer, BUFFER_SIZE, f_man)) {
694 char *input_line; /* Input line without initial "\&" */
696 if (buffer[0] == '\\' && buffer[1] == '&')
697 input_line = buffer + 2;
698 else
699 input_line = buffer;
701 in_row++;
702 len = strlen (input_line);
703 /* Remove terminating newline */
704 if (input_line[len - 1] == '\n') {
705 len--;
706 input_line[len] = 0;
709 if (verbatim_flag) {
710 /* Copy the line verbatim */
711 if (strcmp (input_line, ".fi") == 0) {
712 verbatim_flag = 0;
713 } else {
714 print_string (input_line);
715 newline ();
717 } else if (link_flag) {
718 /* The line is a link */
719 handle_link (input_line);
720 } else if (buffer[0] == '.') {
721 /* The line is a roff command */
722 handle_command (input_line);
723 } else {
724 /* A normal line, just output it */
725 print_string (input_line);
727 /* .TP label processed as usual line */
728 if (tp_flag) {
729 if (tp_flag == 1) {
730 tp_flag = 2;
731 } else {
732 tp_flag = 0;
733 indentation = 8;
734 if (col >= indentation)
735 newline ();
736 else
737 while (++col < indentation)
738 fputc (' ', f_out);
743 newline ();
744 fclose_check (f_man);
745 /* First stage ends here, closing the manual */
747 /* Second stage - process the template file */
748 f_tmpl = fopen_check (c_tmpl, "r");
749 c_in = c_tmpl;
751 /* Repeat for each input line */
752 /* Read a line */
753 while (fgets (buffer, BUFFER_SIZE, f_tmpl)) {
754 if (node) {
755 if (*buffer && *buffer != '\n') {
756 cnode->lname = strdup (buffer);
757 node = strchr (cnode->lname, '\n');
758 if (node)
759 *node = 0;
761 node = NULL;
762 } else {
763 node = strchr (buffer, CHAR_NODE_END);
764 if (node && (node[1] == '[')) {
765 char *p = strchr (node, ']');
766 if (p) {
767 if (strncmp (node + 1, "[main]", 6) == 0) {
768 node = NULL;
769 } else {
770 if (!cnode) {
771 cnode = &nodes;
772 } else {
773 cnode->next = malloc (sizeof (nodes));
774 cnode = cnode->next;
776 cnode->node = strdup (node + 2);
777 cnode->node[p - node - 2] = 0;
778 cnode->lname = NULL;
779 cnode->next = NULL;
780 cnode->heading_level = 0;
782 } else
783 node = NULL;
784 } else
785 node = NULL;
787 fputs (buffer, f_out);
790 cont_start = ftell (f_out);
791 if (cont_start <= 0) {
792 perror (c_out);
793 return 1;
796 if (topics)
797 fprintf (f_out, "\004[Contents]\n%s\n\n", topics);
798 else
799 fprintf (f_out, "\004[Contents]\n");
801 for (current_link = &links; current_link && current_link->linkname;) {
802 int found = 0;
803 struct links *next = current_link->next;
805 if (strcmp (current_link->linkname, "Contents") == 0) {
806 found = 1;
807 } else {
808 for (cnode = &nodes; cnode && cnode->node; cnode = cnode->next) {
809 if (strcmp (cnode->node, current_link->linkname) == 0) {
810 found = 1;
811 break;
815 if (!found) {
816 g_snprintf (buffer, sizeof (buffer), "Stale link \"%s\"",
817 current_link->linkname);
818 c_in = current_link->filename;
819 in_row = current_link->line;
820 print_error (buffer);
822 free (current_link->linkname);
823 if (current_link != &links)
824 free (current_link);
825 current_link = next;
828 for (cnode = &nodes; cnode && cnode->node;) {
829 char *node = cnode->node;
830 struct node *next = cnode->next;
832 if (*node)
833 fprintf (f_out, " %*s\001%s\002%s\003", cnode->heading_level,
834 "", cnode->lname ? cnode->lname : node, node);
835 fprintf (f_out, "\n");
837 free (cnode->node);
838 if (cnode->lname)
839 free (cnode->lname);
840 if (cnode != &nodes)
841 free (cnode);
842 cnode = next;
845 file_end = ftell (f_out);
847 /* Sanity check */
848 if ((file_end <= 0) || (file_end - cont_start <= 0)) {
849 perror (c_out);
850 return 1;
853 fclose_check (f_out);
854 fclose_check (f_tmpl);
855 /* Second stage ends here, closing all files, note the end of output */
858 * Third stage - swap two parts of the output file.
859 * First, open the output file for reading and load it into the memory.
861 f_out = fopen_check (c_out, "r");
863 outfile_buffer = malloc (file_end);
864 if (!outfile_buffer)
865 return 1;
867 if (!persistent_fread (outfile_buffer, file_end, f_out)) {
868 perror (c_out);
869 return 1;
872 fclose_check (f_out);
873 /* Now the output file is in the memory */
875 /* Again open output file for writing */
876 f_out = fopen_check (c_out, "w");
878 /* Write part after the "Contents" node */
879 if (!persistent_fwrite
880 (outfile_buffer + cont_start, file_end - cont_start, f_out)) {
881 perror (c_out);
882 return 1;
885 /* Write part before the "Contents" node */
886 if (!persistent_fwrite (outfile_buffer, cont_start, f_out)) {
887 perror (c_out);
888 return 1;
891 free (outfile_buffer);
892 fclose_check (f_out);
893 /* Closing everything */
895 return 0;