Ticket 419: (search broken in editor/viewer on glib <2.14)
[midnight-commander.git] / src / man2hlp.c
blobbaf79dc197633e12365bc4208ce96b333e667ae8
1 /* Man page to help file converter
2 Copyright (C) 1994, 1995, 1998, 2000, 2001, 2002, 2003, 2004, 2005,
3 2007 Free Software Foundation, Inc.
4 2002 Andrew V. Samoilov
5 2002 Pavel Roskin
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 /** \file man2hlp.c
22 * \brief Source: man page to help file converter
25 #include <config.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include <glib.h>
34 #include "help.h"
35 #include "glibcompat.h"
37 #define BUFFER_SIZE 256
39 static int col = 0; /* Current output column */
40 static int out_row = 1; /* Current output row */
41 static int in_row = 0; /* Current input row */
42 static int no_split_flag = 0; /* Flag: Don't split section on next ".SH" */
43 static int skip_flag = 0; /* Flag: Skip this section.
44 0 = don't skip,
45 1 = skipping title,
46 2 = title skipped, skipping text */
47 static int link_flag = 0; /* Flag: Next line is a link */
48 static int verbatim_flag = 0; /* Flag: Copy input to output verbatim */
49 static int node = 0; /* Flag: This line is an original ".SH" */
51 static const char *c_out; /* Output filename */
52 static FILE *f_out; /* Output file */
54 static const char *c_in; /* Current input filename */
56 static int indentation; /* Indentation level, n spaces */
57 static int tp_flag; /* Flag: .TP paragraph
58 1 = this line is .TP label,
59 2 = first line of label description. */
60 static char *topics = NULL;
62 struct node {
63 char *node; /* Section name */
64 char *lname; /* Translated .SH, NULL if not translated */
65 struct node *next;
66 int heading_level;
69 static struct node nodes;
70 static struct node *cnode; /* Current node */
72 #define MAX_STREAM_BLOCK 8192
75 * Read in blocks of reasonable size and make sure we read everything.
76 * Failure to read everything is an error, indicated by returning 0.
78 static size_t
79 persistent_fread (void *data, size_t len, FILE *stream)
81 size_t count;
82 size_t bytes_done = 0;
83 char *ptr = (char *) data;
85 if (len <= 0)
86 return 0;
88 while (bytes_done < len) {
89 count = len - bytes_done;
90 if (count > MAX_STREAM_BLOCK)
91 count = MAX_STREAM_BLOCK;
93 count = fread (ptr, 1, count, stream);
95 if (count <= 0)
96 return 0;
98 bytes_done += count;
99 ptr += count;
102 return bytes_done;
106 * Write in blocks of reasonable size and make sure we write everything.
107 * Failure to write everything is an error, indicated by returning 0.
109 static size_t
110 persistent_fwrite (const void *data, size_t len, FILE *stream)
112 size_t count;
113 size_t bytes_done = 0;
114 const char *ptr = (const char *) data;
116 if (len <= 0)
117 return 0;
119 while (bytes_done < len) {
120 count = len - bytes_done;
121 if (count > MAX_STREAM_BLOCK)
122 count = MAX_STREAM_BLOCK;
124 count = fwrite (ptr, 1, count, stream);
126 if (count <= 0)
127 return 0;
129 bytes_done += count;
130 ptr += count;
133 return bytes_done;
136 /* Report error in input */
137 static void
138 print_error (const char *message)
140 fprintf (stderr, "man2hlp: %s in file \"%s\" on line %d\n", message,
141 c_in, in_row);
144 /* Do fopen(), exit if it fails */
145 static FILE *
146 fopen_check (const char *filename, const char *flags)
148 char tmp[BUFFER_SIZE];
149 FILE *f;
151 f = fopen (filename, flags);
152 if (f == NULL) {
153 g_snprintf (tmp, sizeof (tmp), "man2hlp: Cannot open file \"%s\"",
154 filename);
155 perror (tmp);
156 exit (3);
159 return f;
162 /* Do fclose(), exit if it fails */
163 static void
164 fclose_check (FILE *f)
166 if (ferror (f)) {
167 perror ("man2hlp: File error");
168 exit (3);
171 if (fclose (f)) {
172 perror ("man2hlp: Cannot close file");
173 exit (3);
177 /* Change output line */
178 static void
179 newline (void)
181 out_row++;
182 col = 0;
183 fprintf (f_out, "\n");
186 /* Calculate the length of string */
187 static int
188 string_len (const char *buffer)
190 static int anchor_flag = 0; /* Flag: Inside hypertext anchor name */
191 static int link_flag = 0; /* Flag: Inside hypertext link target name */
192 int backslash_flag = 0; /* Flag: Backslash quoting */
193 int c; /* Current character */
194 int len = 0; /* Result: the length of the string */
196 while (*(buffer)) {
197 c = *buffer++;
198 if (c == CHAR_LINK_POINTER)
199 link_flag = 1; /* Link target name starts */
200 else if (c == CHAR_LINK_END)
201 link_flag = 0; /* Link target name ends */
202 else if (c == CHAR_NODE_END) {
203 /* Node anchor name starts */
204 anchor_flag = 1;
205 /* Ugly hack to prevent loss of one space */
206 len++;
208 /* Don't add control characters to the length */
209 if (c >= 0 && c < 32)
210 continue;
211 /* Attempt to handle backslash quoting */
212 if (c == '\\' && !backslash_flag) {
213 backslash_flag = 1;
214 continue;
216 backslash_flag = 0;
217 /* Increase length if not inside anchor name or link target name */
218 if (!anchor_flag && !link_flag)
219 len++;
220 if (anchor_flag && c == ']') {
221 /* Node anchor name ends */
222 anchor_flag = 0;
225 return len;
228 /* Output the string */
229 static void
230 print_string (char *buffer)
232 int len; /* The length of current word */
233 int c; /* Current character */
234 int backslash_flag = 0;
236 /* Skipping lines? */
237 if (skip_flag)
238 return;
239 /* Copying verbatim? */
240 if (verbatim_flag) {
241 /* Attempt to handle backslash quoting */
242 while (*(buffer)) {
243 c = *buffer++;
244 if (c == '\\' && !backslash_flag) {
245 backslash_flag = 1;
246 continue;
248 backslash_flag = 0;
249 fputc (c, f_out);
251 } else {
252 /* Split into words */
253 buffer = strtok (buffer, " \t\n");
254 /* Repeat for each word */
255 while (buffer) {
256 /* Skip empty strings */
257 if (*(buffer)) {
258 len = string_len (buffer);
259 /* Change the line if about to break the right margin */
260 if (col + len >= HELP_TEXT_WIDTH)
261 newline ();
262 /* Words are separated by spaces */
263 if (col > 0) {
264 fputc (' ', f_out);
265 col++;
266 } else if (indentation) {
267 while (col++ < indentation)
268 fputc (' ', f_out);
270 /* Attempt to handle backslash quoting */
271 while (*(buffer)) {
272 c = *buffer++;
273 if (c == '\\' && !backslash_flag) {
274 backslash_flag = 1;
275 continue;
277 backslash_flag = 0;
278 fputc (c, f_out);
280 /* Increase column */
281 col += len;
283 /* Get the next word */
284 buffer = strtok (NULL, " \t\n");
285 } /* while */
289 /* Like print_string but with printf-like syntax */
290 static void
291 printf_string (const char *format, ...)
293 va_list args;
294 char buffer[BUFFER_SIZE];
296 va_start (args, format);
297 g_vsnprintf (buffer, sizeof (buffer), format, args);
298 va_end (args);
299 print_string (buffer);
302 /* Handle NODE and .SH commands. is_sh is 1 for .SH, 0 for NODE */
303 static void
304 handle_node (char *buffer, int is_sh)
306 int len, heading_level;
308 /* If we already skipped a section, don't skip another */
309 if (skip_flag == 2) {
310 skip_flag = 0;
312 /* Get the command parameters */
313 buffer = strtok (NULL, "");
314 if (buffer == NULL) {
315 print_error ("Syntax error: .SH: no title");
316 return;
317 } else {
318 /* Remove quotes */
319 if (buffer[0] == '"') {
320 buffer++;
321 len = strlen (buffer);
322 if (buffer[len - 1] == '"') {
323 len--;
324 buffer[len] = 0;
327 /* Calculate heading level */
328 heading_level = 0;
329 while (buffer[heading_level] == ' ')
330 heading_level++;
331 /* Heading level must be even */
332 if (heading_level & 1)
333 print_error ("Syntax error: .SH: odd heading level");
334 if (no_split_flag) {
335 /* Don't start a new section */
336 newline ();
337 print_string (buffer);
338 newline ();
339 newline ();
340 no_split_flag = 0;
341 } else if (skip_flag) {
342 /* Skipping title and marking text for skipping */
343 skip_flag = 2;
344 } else {
345 buffer += heading_level;
346 if (!is_sh || !node) {
347 /* Start a new section, but omit empty section names */
348 if (*buffer) {
349 fprintf (f_out, "%c[%s]", CHAR_NODE_END, buffer);
350 col++;
351 newline ();
354 /* Add section to the linked list */
355 if (!cnode) {
356 cnode = &nodes;
357 } else {
358 cnode->next = malloc (sizeof (nodes));
359 cnode = cnode->next;
361 cnode->node = strdup (buffer);
362 cnode->lname = NULL;
363 cnode->next = NULL;
364 cnode->heading_level = heading_level;
366 if (is_sh) {
367 /* print_string() strtok()es buffer, so */
368 cnode->lname = strdup (buffer);
369 print_string (buffer);
370 newline ();
371 newline ();
373 } /* Start new section */
374 } /* Has parameters */
375 node = !is_sh;
378 /* Convert character from the macro name to the font marker */
379 static inline char
380 char_to_font (char c)
382 switch (c) {
383 case 'R':
384 return CHAR_FONT_NORMAL;
385 case 'B':
386 return CHAR_FONT_BOLD;
387 case 'I':
388 return CHAR_FONT_ITALIC;
389 default:
390 return 0;
395 * Handle alternate font commands (.BR, .IR, .RB, .RI, .BI, .IB)
396 * Return 0 if the command wasn't recognized, 1 otherwise
398 static int
399 handle_alt_font (char *buffer)
401 char *p;
402 char *w;
403 char font[2];
404 int in_quotes = 0;
405 int alt_state = 0;
407 if (strlen (buffer) != 3)
408 return 0;
410 if (buffer[0] != '.')
411 return 0;
413 font[0] = char_to_font (buffer[1]);
414 font[1] = char_to_font (buffer[2]);
416 /* Exclude names with unknown characters, .BB, .II and .RR */
417 if (font[0] == 0 || font[1] == 0 || font[0] == font[1])
418 return 0;
420 p = strtok (NULL, "");
421 if (p == NULL) {
422 return 1;
425 w = buffer;
426 *w++ = font[0];
428 while (*p) {
430 if (*p == '"') {
431 in_quotes = !in_quotes;
432 p++;
433 continue;
436 if (*p == ' ' && !in_quotes) {
437 p++;
438 /* Don't change font if we are at the end */
439 if (*p != 0) {
440 alt_state = !alt_state;
441 *w++ = font[alt_state];
444 /* Skip more spaces */
445 while (*p == ' ')
446 p++;
448 continue;
451 *w++ = *p++;
454 /* Turn off attributes if necessary */
455 if (font[alt_state] != CHAR_FONT_NORMAL)
456 *w++ = CHAR_FONT_NORMAL;
458 *w = 0;
459 print_string (buffer);
461 return 1;
464 /* Handle .IP and .TP commands. is_tp is 1 for .TP, 0 for .IP */
465 static void
466 handle_tp_ip (int is_tp)
468 if (col > 0)
469 newline ();
470 newline ();
471 if (is_tp) {
472 tp_flag = 1;
473 indentation = 0;
474 } else
475 indentation = 8;
478 /* Handle all the roff dot commands. See man groff_man for details */
479 static void
480 handle_command (char *buffer)
482 int len;
484 /* Get the command name */
485 strtok (buffer, " \t");
487 if (strcmp (buffer, ".SH") == 0) {
488 indentation = 0;
489 handle_node (buffer, 1);
490 } else if (strcmp (buffer, ".\\\"NODE") == 0) {
491 handle_node (buffer, 0);
492 } else if (strcmp (buffer, ".\\\"DONT_SPLIT\"") == 0) {
493 no_split_flag = 1;
494 } else if (strcmp (buffer, ".\\\"SKIP_SECTION\"") == 0) {
495 skip_flag = 1;
496 } else if (strcmp (buffer, ".\\\"LINK2\"") == 0) {
497 /* Next two input lines form a link */
498 link_flag = 2;
499 } else if ((strcmp (buffer, ".PP") == 0)
500 || (strcmp (buffer, ".P") == 0)
501 || (strcmp (buffer, ".LP") == 0)) {
502 indentation = 0;
503 /* End of paragraph */
504 if (col > 0)
505 newline ();
506 newline ();
507 } else if (strcmp (buffer, ".nf") == 0) {
508 /* Following input lines are to be handled verbatim */
509 verbatim_flag = 1;
510 if (col > 0)
511 newline ();
512 } else if (strcmp (buffer, ".I") == 0 || strcmp (buffer, ".B") == 0
513 || strcmp (buffer, ".SB") == 0) {
514 /* Bold text or italics text */
515 char *p;
516 char *w;
517 int backslash_flag = 0;
519 /* .SB [text]
520 * Causes the text on the same line or the text on the
521 * next line to appear in boldface font, one point
522 * size smaller than the default font.
525 /* FIXME: text is optional, so there is no error */
526 p = strtok (NULL, "");
527 if (p == NULL) {
528 print_error ("Syntax error: .I | .B | .SB : no text");
529 return;
532 *buffer = (buffer[1] == 'I') ? CHAR_FONT_ITALIC : CHAR_FONT_BOLD;
534 /* Attempt to handle backslash quoting */
535 for (w = &buffer[1]; *p; p++) {
536 if (*p == '\\' && !backslash_flag) {
537 backslash_flag = 1;
538 continue;
540 backslash_flag = 0;
541 *w++ = *p;
544 *w++ = CHAR_FONT_NORMAL;
545 *w = 0;
546 print_string (buffer);
547 } else if (strcmp (buffer, ".TP") == 0) {
548 handle_tp_ip (1);
549 } else if (strcmp (buffer, ".IP") == 0) {
550 handle_tp_ip (0);
551 } else if (strcmp (buffer, ".\\\"TOPICS") == 0) {
552 if (out_row > 1) {
553 print_error
554 ("Syntax error: .\\\"TOPICS must be first command");
555 return;
557 buffer = strtok (NULL, "");
558 if (buffer == NULL) {
559 print_error ("Syntax error: .\\\"TOPICS: no text");
560 return;
562 /* Remove quotes */
563 if (buffer[0] == '"') {
564 buffer++;
565 len = strlen (buffer);
566 if (buffer[len - 1] == '"') {
567 len--;
568 buffer[len] = 0;
571 topics = strdup (buffer);
572 } else if (strcmp (buffer, ".br") == 0) {
573 if (col)
574 newline ();
575 } else if (strncmp (buffer, ".\\\"", 3) == 0) {
576 /* Comment */
577 } else if (strcmp (buffer, ".TH") == 0) {
578 /* Title header */
579 } else if (strcmp (buffer, ".SM") == 0) {
580 /* Causes the text on the same line or the text on the
581 * next line to appear in a font that is one point
582 * size smaller than the default font. */
583 buffer = strtok (NULL, "");
584 if (buffer)
585 print_string (buffer);
586 } else if (handle_alt_font (buffer) == 1) {
587 return;
588 } else {
589 /* Other commands are ignored */
590 char warn_str[BUFFER_SIZE];
591 g_snprintf (warn_str, sizeof (warn_str),
592 "Warning: unsupported command %s", buffer);
593 print_error (warn_str);
594 return;
598 static struct links {
599 char *linkname; /* Section name */
600 int line; /* Input line in ... */
601 const char *filename;
602 struct links *next;
603 } links, *current_link;
605 static void
606 handle_link (char *buffer)
608 static char old[80];
609 int len;
610 char *amp;
611 const char *amp_arg;
613 switch (link_flag) {
614 case 1:
615 /* Old format link, not supported */
616 break;
617 case 2:
618 /* First part of new format link */
619 /* Bold text or italics text */
620 if (buffer[0] == '.' && (buffer[1] == 'I' || buffer[1] == 'B'))
621 for (buffer += 2; *buffer == ' ' || *buffer == '\t'; buffer++);
622 g_strlcpy (old, buffer, sizeof (old));
623 link_flag = 3;
624 break;
625 case 3:
626 /* Second part of new format link */
627 if (buffer[0] == '.')
628 buffer++;
629 if (buffer[0] == '\\')
630 buffer++;
631 if (buffer[0] == '"')
632 buffer++;
633 len = strlen (buffer);
634 if (len && buffer[len - 1] == '"') {
635 buffer[--len] = 0;
638 /* "Layout\&)," -- "Layout" should be highlighted, but not ")," */
639 amp = strstr (old, "\\&");
640 if (amp) {
641 *amp = 0;
642 amp += 2;
643 amp_arg = amp;
644 } else {
645 amp_arg = "";
648 printf_string ("%c%s%c%s%c%s\n", CHAR_LINK_START, old,
649 CHAR_LINK_POINTER, buffer, CHAR_LINK_END, amp_arg);
650 link_flag = 0;
651 /* Add to the linked list */
652 if (current_link) {
653 current_link->next = malloc (sizeof (links));
654 current_link = current_link->next;
655 current_link->next = NULL;
656 } else {
657 current_link = &links;
659 current_link->linkname = strdup (buffer);
660 current_link->filename = c_in;
661 current_link->line = in_row;
662 break;
667 main (int argc, char **argv)
669 int len; /* Length of input line */
670 const char *c_man; /* Manual filename */
671 const char *c_tmpl; /* Template filename */
672 FILE *f_man; /* Manual file */
673 FILE *f_tmpl; /* Template file */
674 char buffer[BUFFER_SIZE]; /* Full input line */
675 char *node = NULL;
676 char *outfile_buffer; /* Large buffer to keep the output file */
677 long cont_start; /* Start of [Contents] */
678 long file_end; /* Length of the output file */
680 /* Validity check for arguments */
681 if (argc != 4) {
682 fprintf (stderr,
683 "Usage: man2hlp file.man template_file helpfile\n");
684 return 3;
687 c_man = argv[1];
688 c_tmpl = argv[2];
689 c_out = argv[3];
691 /* First stage - process the manual, write to the output file */
692 f_man = fopen_check (c_man, "r");
693 f_out = fopen_check (c_out, "w");
694 c_in = c_man;
696 /* Repeat for each input line */
697 while (fgets (buffer, BUFFER_SIZE, f_man)) {
698 char *input_line; /* Input line without initial "\&" */
700 if (buffer[0] == '\\' && buffer[1] == '&')
701 input_line = buffer + 2;
702 else
703 input_line = buffer;
705 in_row++;
706 len = strlen (input_line);
707 /* Remove terminating newline */
708 if (input_line[len - 1] == '\n') {
709 len--;
710 input_line[len] = 0;
713 if (verbatim_flag) {
714 /* Copy the line verbatim */
715 if (strcmp (input_line, ".fi") == 0) {
716 verbatim_flag = 0;
717 } else {
718 print_string (input_line);
719 newline ();
721 } else if (link_flag) {
722 /* The line is a link */
723 handle_link (input_line);
724 } else if (buffer[0] == '.') {
725 /* The line is a roff command */
726 handle_command (input_line);
727 } else {
728 /* A normal line, just output it */
729 print_string (input_line);
731 /* .TP label processed as usual line */
732 if (tp_flag) {
733 if (tp_flag == 1) {
734 tp_flag = 2;
735 } else {
736 tp_flag = 0;
737 indentation = 8;
738 if (col >= indentation)
739 newline ();
740 else
741 while (++col < indentation)
742 fputc (' ', f_out);
747 newline ();
748 fclose_check (f_man);
749 /* First stage ends here, closing the manual */
751 /* Second stage - process the template file */
752 f_tmpl = fopen_check (c_tmpl, "r");
753 c_in = c_tmpl;
755 /* Repeat for each input line */
756 /* Read a line */
757 while (fgets (buffer, BUFFER_SIZE, f_tmpl)) {
758 if (node) {
759 if (*buffer && *buffer != '\n') {
760 cnode->lname = strdup (buffer);
761 node = strchr (cnode->lname, '\n');
762 if (node)
763 *node = 0;
765 node = NULL;
766 } else {
767 node = strchr (buffer, CHAR_NODE_END);
768 if (node && (node[1] == '[')) {
769 char *p = strchr (node, ']');
770 if (p) {
771 if (strncmp (node + 1, "[main]", 6) == 0) {
772 node = NULL;
773 } else {
774 if (!cnode) {
775 cnode = &nodes;
776 } else {
777 cnode->next = malloc (sizeof (nodes));
778 cnode = cnode->next;
780 cnode->node = strdup (node + 2);
781 cnode->node[p - node - 2] = 0;
782 cnode->lname = NULL;
783 cnode->next = NULL;
784 cnode->heading_level = 0;
786 } else
787 node = NULL;
788 } else
789 node = NULL;
791 fputs (buffer, f_out);
794 cont_start = ftell (f_out);
795 if (cont_start <= 0) {
796 perror (c_out);
797 return 1;
800 if (topics)
801 fprintf (f_out, "\004[Contents]\n%s\n\n", topics);
802 else
803 fprintf (f_out, "\004[Contents]\n");
805 for (current_link = &links; current_link && current_link->linkname;) {
806 int found = 0;
807 struct links *next = current_link->next;
809 if (strcmp (current_link->linkname, "Contents") == 0) {
810 found = 1;
811 } else {
812 for (cnode = &nodes; cnode && cnode->node; cnode = cnode->next) {
813 if (strcmp (cnode->node, current_link->linkname) == 0) {
814 found = 1;
815 break;
819 if (!found) {
820 g_snprintf (buffer, sizeof (buffer), "Stale link \"%s\"",
821 current_link->linkname);
822 c_in = current_link->filename;
823 in_row = current_link->line;
824 print_error (buffer);
826 free (current_link->linkname);
827 if (current_link != &links)
828 free (current_link);
829 current_link = next;
832 for (cnode = &nodes; cnode && cnode->node;) {
833 char *node = cnode->node;
834 struct node *next = cnode->next;
836 if (*node)
837 fprintf (f_out, " %*s\001%s\002%s\003", cnode->heading_level,
838 "", cnode->lname ? cnode->lname : node, node);
839 fprintf (f_out, "\n");
841 free (cnode->node);
842 if (cnode->lname)
843 free (cnode->lname);
844 if (cnode != &nodes)
845 free (cnode);
846 cnode = next;
849 file_end = ftell (f_out);
851 /* Sanity check */
852 if ((file_end <= 0) || (file_end - cont_start <= 0)) {
853 perror (c_out);
854 return 1;
857 fclose_check (f_out);
858 fclose_check (f_tmpl);
859 /* Second stage ends here, closing all files, note the end of output */
862 * Third stage - swap two parts of the output file.
863 * First, open the output file for reading and load it into the memory.
865 f_out = fopen_check (c_out, "r");
867 outfile_buffer = malloc (file_end);
868 if (!outfile_buffer)
869 return 1;
871 if (!persistent_fread (outfile_buffer, file_end, f_out)) {
872 perror (c_out);
873 return 1;
876 fclose_check (f_out);
877 /* Now the output file is in the memory */
879 /* Again open output file for writing */
880 f_out = fopen_check (c_out, "w");
882 /* Write part after the "Contents" node */
883 if (!persistent_fwrite
884 (outfile_buffer + cont_start, file_end - cont_start, f_out)) {
885 perror (c_out);
886 return 1;
889 /* Write part before the "Contents" node */
890 if (!persistent_fwrite (outfile_buffer, cont_start, f_out)) {
891 perror (c_out);
892 return 1;
895 free (outfile_buffer);
896 fclose_check (f_out);
897 /* Closing everything */
899 return 0;