9621 Make createtxg and guid properties public
[unleashed.git] / usr / src / cmd / man / makewhatis.c
blobc5428e46330490a40d6dcb275a6faf30a820526d
1 /*
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
31 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/param.h>
38 #include <ctype.h>
39 #include <dirent.h>
40 #include <err.h>
41 #include <signal.h>
42 #include <stddef.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
48 #include "man.h"
49 #include "stringlist.h"
52 /* Information collected about each man page in a section */
53 struct page_info {
54 char *filename;
55 char *name;
56 char *suffix;
57 ino_t inode;
60 /* An expanding string */
61 struct sbuf {
62 char *content; /* the start of the buffer */
63 char *end; /* just past the end of the content */
64 char *last; /* the last allocated character */
67 /* Remove the last amount characters from the sbuf */
68 #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount))
69 /* Return the length of the sbuf content */
70 #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content)
72 typedef char *edited_copy(char *from, char *to, int length);
75 * While the whatis line is being formed, it is stored in whatis_proto.
76 * When finished, it is reformatted into whatis_final and then appended
77 * to whatis_lines.
79 static struct sbuf *whatis_proto;
80 static struct sbuf *whatis_final;
81 static stringlist *whatis_lines; /* collected output lines */
83 static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */
85 #define MDOC_COMMANDS "ArDvErEvFlLiNmPa"
88 /* Free a struct page_info and its content */
89 static void
90 free_page_info(struct page_info *info)
93 free(info->filename);
94 free(info->name);
95 free(info->suffix);
96 free(info);
100 * Allocate and fill in a new struct page_info given the
101 * name of the man section directory and the dirent of the file.
102 * If the file is not a man page, return NULL.
104 static struct page_info *
105 new_page_info(char *dir, struct dirent *dirent)
107 struct page_info *info;
108 int basename_length;
109 char *suffix;
110 struct stat st;
112 if ((info = malloc(sizeof (struct page_info))) == NULL)
113 err(1, "malloc");
114 basename_length = strlen(dirent->d_name);
115 suffix = &dirent->d_name[basename_length];
116 if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1)
117 err(1, "asprintf");
118 for (;;) {
119 if (--suffix == dirent->d_name || !isalnum(*suffix)) {
120 if (*suffix == '.')
121 break;
122 free(info->filename);
123 free(info);
124 return (NULL);
127 *suffix++ = '\0';
128 info->name = strdup(dirent->d_name);
129 info->suffix = strdup(suffix);
130 if (stat(info->filename, &st) < 0) {
131 warn("%s", info->filename);
132 free_page_info(info);
133 return (NULL);
135 if (!S_ISREG(st.st_mode)) {
136 free_page_info(info);
137 return (NULL);
139 info->inode = st.st_ino;
140 return (info);
144 * Reset sbuf length to 0.
146 static void
147 sbuf_clear(struct sbuf *sbuf)
150 sbuf->end = sbuf->content;
154 * Allocate a new sbuf.
156 static struct sbuf *
157 new_sbuf(void)
159 struct sbuf *sbuf;
161 if ((sbuf = malloc(sizeof (struct sbuf))) == NULL)
162 err(1, "malloc");
163 if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL)
164 err(1, "malloc");
165 sbuf->last = sbuf->content + LINE_ALLOC - 1;
166 sbuf_clear(sbuf);
168 return (sbuf);
172 * Ensure that there is enough room in the sbuf
173 * for nchars more characters.
175 static void
176 sbuf_need(struct sbuf *sbuf, int nchars)
178 char *new_content;
179 size_t size, cntsize;
180 size_t grow = 128;
182 while (grow < nchars) {
183 grow += 128; /* we grow in chunks of 128 bytes */
186 /* Grow if the buffer isn't big enough */
187 if (sbuf->end + nchars > sbuf->last) {
188 size = sbuf->last + 1 - sbuf->content;
189 size += grow;
190 cntsize = sbuf->end - sbuf->content;
192 if ((new_content = realloc(sbuf->content, size)) == NULL) {
193 perror("realloc");
194 if (tempfile[0] != '\0')
195 (void) unlink(tempfile);
196 exit(1);
198 sbuf->content = new_content;
199 sbuf->end = new_content + cntsize;
200 sbuf->last = new_content + size - 1;
205 * Append a string of a given length to the sbuf.
207 static void
208 sbuf_append(struct sbuf *sbuf, const char *text, int length)
210 if (length > 0) {
211 sbuf_need(sbuf, length);
212 (void) memcpy(sbuf->end, text, length);
213 sbuf->end += length;
218 * Append a null-terminated string to the sbuf.
220 static void
221 sbuf_append_str(struct sbuf *sbuf, char *text)
224 sbuf_append(sbuf, text, strlen(text));
228 * Append an edited null-terminated string to the sbuf.
230 static void
231 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
233 int length;
235 if ((length = strlen(text)) > 0) {
236 sbuf_need(sbuf, length);
237 sbuf->end = copy(text, sbuf->end, length);
242 * Strip any of a set of chars from the end of the sbuf.
244 static void
245 sbuf_strip(struct sbuf *sbuf, const char *set)
248 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
249 sbuf->end--;
253 * Return the null-terminated string built by the sbuf.
255 static char *
256 sbuf_content(struct sbuf *sbuf)
259 *sbuf->end = '\0';
260 return (sbuf->content);
264 * Return true if no man page exists in the directory with
265 * any of the names in the stringlist.
267 static int
268 no_page_exists(char *dir, stringlist *names, char *suffix)
270 char path[MAXPATHLEN];
271 char *suffixes[] = { "", ".gz", ".bz2", NULL };
272 size_t i;
273 int j;
275 for (i = 0; i < names->sl_cur; i++) {
276 for (j = 0; suffixes[j] != NULL; j++) {
277 (void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s",
278 dir, names->sl_str[i], suffix, suffixes[j]);
279 if (access(path, F_OK) == 0) {
280 return (0);
284 return (1);
287 /* ARGSUSED sig */
288 static void
289 trap_signal(int sig)
292 if (tempfile[0] != '\0')
293 (void) unlink(tempfile);
295 exit(1);
299 * Attempt to open an output file.
300 * Return NULL if unsuccessful.
302 static FILE *
303 open_output(char *name)
305 FILE *output;
307 whatis_lines = sl_init();
308 (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name);
309 name = tempfile;
310 if ((output = fopen(name, "w")) == NULL) {
311 warn("%s", name);
312 return (NULL);
314 return (output);
317 static int
318 linesort(const void *a, const void *b)
321 return (strcmp((*(const char * const *)a), (*(const char * const *)b)));
325 * Write the unique sorted lines to the output file.
327 static void
328 finish_output(FILE *output, char *name)
330 size_t i;
331 char *prev = NULL;
333 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *),
334 linesort);
335 for (i = 0; i < whatis_lines->sl_cur; i++) {
336 char *line = whatis_lines->sl_str[i];
337 if (i > 0 && strcmp(line, prev) == 0)
338 continue;
339 prev = line;
340 (void) fputs(line, output);
341 (void) putc('\n', output);
343 (void) fclose(output);
344 sl_free(whatis_lines, 1);
345 (void) rename(tempfile, name);
346 (void) unlink(tempfile);
349 static FILE *
350 open_whatis(char *mandir)
352 char filename[MAXPATHLEN];
354 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
355 return (open_output(filename));
358 static void
359 finish_whatis(FILE *output, char *mandir)
361 char filename[MAXPATHLEN];
363 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
364 finish_output(output, filename);
368 * Remove trailing spaces from a string, returning a pointer to just
369 * beyond the new last character.
371 static char *
372 trim_rhs(char *str)
374 char *rhs;
376 rhs = &str[strlen(str)];
377 while (--rhs > str && isspace(*rhs))
379 *++rhs = '\0';
380 return (rhs);
384 * Return a pointer to the next non-space character in the string.
386 static char *
387 skip_spaces(char *s)
390 while (*s != '\0' && isspace(*s))
391 s++;
393 return (s);
397 * Return whether the line is of one of the forms:
398 * .Sh NAME
399 * .Sh "NAME"
400 * etc.
401 * assuming that section_start is ".Sh".
403 static int
404 name_section_line(char *line, const char *section_start)
406 char *rhs;
408 if (strncmp(line, section_start, 3) != 0)
409 return (0);
410 line = skip_spaces(line + 3);
411 rhs = trim_rhs(line);
412 if (*line == '"') {
413 line++;
414 if (*--rhs == '"')
415 *rhs = '\0';
417 if (strcmp(line, "NAME") == 0)
418 return (1);
420 return (0);
424 * Copy characters while removing the most common nroff/troff markup:
425 * \(em, \(mi, \s[+-N], \&
426 * \fF, \f(fo, \f[font]
427 * \*s, \*(st, \*[stringvar]
429 static char *
430 de_nroff_copy(char *from, char *to, int fromlen)
432 char *from_end = &from[fromlen];
434 while (from < from_end) {
435 switch (*from) {
436 case '\\':
437 switch (*++from) {
438 case '(':
439 if (strncmp(&from[1], "em", 2) == 0 ||
440 strncmp(&from[1], "mi", 2) == 0) {
441 from += 3;
442 continue;
444 break;
445 case 's':
446 if (*++from == '-')
447 from++;
448 while (isdigit(*from))
449 from++;
450 continue;
451 case 'f':
452 case '*':
453 if (*++from == '(') {
454 from += 3;
455 } else if (*from == '[') {
456 while (*++from != ']' &&
457 from < from_end)
459 from++;
460 } else {
461 from++;
463 continue;
464 case '&':
465 from++;
466 continue;
468 break;
470 *to++ = *from++;
472 return (to);
476 * Append a string with the nroff formatting removed.
478 static void
479 add_nroff(char *text)
482 sbuf_append_edited(whatis_proto, text, de_nroff_copy);
486 * Appends "name(suffix), " to whatis_final
488 static void
489 add_whatis_name(char *name, char *suffix)
492 if (*name != '\0') {
493 sbuf_append_str(whatis_final, name);
494 sbuf_append(whatis_final, "(", 1);
495 sbuf_append_str(whatis_final, suffix);
496 sbuf_append(whatis_final, "), ", 3);
501 * Processes an old-style man(7) line. This ignores commands with only
502 * a single number argument.
504 static void
505 process_man_line(char *line)
507 char *p;
509 if (*line == '.') {
510 while (isalpha(*++line))
512 p = line = skip_spaces(line);
513 while (*p != '\0') {
514 if (!isdigit(*p))
515 break;
516 p++;
518 if (*p == '\0')
519 return;
520 } else
521 line = skip_spaces(line);
522 if (*line != '\0') {
523 add_nroff(line);
524 sbuf_append(whatis_proto, " ", 1);
529 * Processes a new-style mdoc(7) line.
531 static void
532 process_mdoc_line(char *line)
534 int xref;
535 int arg = 0;
536 char *line_end = &line[strlen(line)];
537 int orig_length = sbuf_length(whatis_proto);
538 char *next;
540 if (*line == '\0')
541 return;
542 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
543 add_nroff(skip_spaces(line));
544 sbuf_append(whatis_proto, " ", 1);
545 return;
547 xref = strncmp(line, ".Xr", 3) == 0;
548 line += 3;
549 while ((line = skip_spaces(line)) < line_end) {
550 if (*line == '"') {
551 next = ++line;
552 for (;;) {
553 next = strchr(next, '"');
554 if (next == NULL)
555 break;
556 (void) memmove(next, next + 1, strlen(next));
557 line_end--;
558 if (*next != '"')
559 break;
560 next++;
562 } else {
563 next = strpbrk(line, " \t");
565 if (next != NULL)
566 *next++ = '\0';
567 else
568 next = line_end;
569 if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
570 if (strcmp(line, "Ns") == 0) {
571 arg = 0;
572 line = next;
573 continue;
575 if (strstr(line, MDOC_COMMANDS) != NULL) {
576 line = next;
577 continue;
580 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
581 if (xref) {
582 sbuf_append(whatis_proto, "(", 1);
583 add_nroff(line);
584 sbuf_append(whatis_proto, ")", 1);
585 xref = 0;
586 } else {
587 sbuf_append(whatis_proto, " ", 1);
590 add_nroff(line);
591 arg++;
592 line = next;
594 if (sbuf_length(whatis_proto) > orig_length)
595 sbuf_append(whatis_proto, " ", 1);
599 * Collect a list of comma-separated names from the text.
601 static void
602 collect_names(stringlist *names, char *text)
604 char *arg;
606 for (;;) {
607 arg = text;
608 text = strchr(text, ',');
609 if (text != NULL)
610 *text++ = '\0';
611 (void) sl_add(names, arg);
612 if (text == NULL)
613 return;
614 if (*text == ' ')
615 text++;
619 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
622 * Process a man page source into a single whatis line and add it
623 * to whatis_lines.
625 static void
626 process_page(struct page_info *page, char *section_dir)
628 FILE *fp;
629 stringlist *names;
630 char *descr;
631 int state = STATE_UNKNOWN;
632 size_t i;
633 char *line = NULL;
634 size_t linecap = 0;
636 sbuf_clear(whatis_proto);
637 if ((fp = fopen(page->filename, "r")) == NULL) {
638 warn("%s", page->filename);
639 return;
641 while (getline(&line, &linecap, fp) > 0) {
642 /* Skip comments */
643 if (strncmp(line, ".\\\"", 3) == 0)
644 continue;
645 switch (state) {
646 /* Haven't reached the NAME section yet */
647 case STATE_UNKNOWN:
648 if (name_section_line(line, ".SH"))
649 state = STATE_MANSTYLE;
650 else if (name_section_line(line, ".Sh"))
651 state = STATE_MDOCNAME;
652 continue;
653 /* Inside an old-style .SH NAME section */
654 case STATE_MANSTYLE:
655 if (strncmp(line, ".SH", 3) == 0 ||
656 strncmp(line, ".SS", 3) == 0)
657 break;
658 (void) trim_rhs(line);
659 if (strcmp(line, ".") == 0)
660 continue;
661 if (strncmp(line, ".IX", 3) == 0) {
662 line += 3;
663 line = skip_spaces(line);
665 process_man_line(line);
666 continue;
667 /* Inside a new-style .Sh NAME section (the .Nm part) */
668 case STATE_MDOCNAME:
669 (void) trim_rhs(line);
670 if (strncmp(line, ".Nm", 3) == 0) {
671 process_mdoc_line(line);
672 continue;
673 } else {
674 if (strcmp(line, ".") == 0)
675 continue;
676 sbuf_append(whatis_proto, "- ", 2);
677 state = STATE_MDOCDESC;
679 /* FALLTHROUGH */
680 /* Inside a new-style .Sh NAME section (after the .Nm-s) */
681 case STATE_MDOCDESC:
682 if (strncmp(line, ".Sh", 3) == 0)
683 break;
684 (void) trim_rhs(line);
685 if (strcmp(line, ".") == 0)
686 continue;
687 process_mdoc_line(line);
688 continue;
690 break;
692 (void) fclose(fp);
693 sbuf_strip(whatis_proto, " \t.-");
694 line = sbuf_content(whatis_proto);
696 * Line now contains the appropriate data, but without the
697 * proper indentation or the section appended to each name.
699 descr = strstr(line, " - ");
700 if (descr == NULL) {
701 descr = strchr(line, ' ');
702 if (descr == NULL)
703 return;
704 *descr++ = '\0';
705 } else {
706 *descr = '\0';
707 descr += 3;
709 names = sl_init();
710 collect_names(names, line);
711 sbuf_clear(whatis_final);
712 if (!sl_find(names, page->name) &&
713 no_page_exists(section_dir, names, page->suffix)) {
715 * Add the page name since that's the only
716 * thing that man(1) will find.
718 add_whatis_name(page->name, page->suffix);
720 for (i = 0; i < names->sl_cur; i++)
721 add_whatis_name(names->sl_str[i], page->suffix);
722 sl_free(names, 0);
723 /* Remove last ", " */
724 sbuf_retract(whatis_final, 2);
725 while (sbuf_length(whatis_final) < INDENT)
726 sbuf_append(whatis_final, " ", 1);
727 sbuf_append(whatis_final, " - ", 3);
728 sbuf_append_str(whatis_final, skip_spaces(descr));
729 (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
733 * Sort pages first by inode number, then by name.
735 static int
736 pagesort(const void *a, const void *b)
738 const struct page_info *p1 = *(struct page_info * const *) a;
739 const struct page_info *p2 = *(struct page_info * const *) b;
741 if (p1->inode == p2->inode)
742 return (strcmp(p1->name, p2->name));
744 return (p1->inode - p2->inode);
748 * Process a single man section.
750 static void
751 process_section(char *section_dir)
753 struct dirent **entries;
754 int nentries;
755 struct page_info **pages;
756 int npages = 0;
757 int i;
758 ino_t prev_inode = 0;
760 /* Scan the man section directory for pages */
761 nentries = scandir(section_dir, &entries, NULL, alphasort);
763 /* Collect information about man pages */
764 pages = (struct page_info **)calloc(nentries,
765 sizeof (struct page_info *));
766 for (i = 0; i < nentries; i++) {
767 struct page_info *info = new_page_info(section_dir, entries[i]);
768 if (info != NULL)
769 pages[npages++] = info;
770 free(entries[i]);
772 free(entries);
773 qsort(pages, npages, sizeof (struct page_info *), pagesort);
775 /* Process each unique page */
776 for (i = 0; i < npages; i++) {
777 struct page_info *page = pages[i];
778 if (page->inode != prev_inode) {
779 prev_inode = page->inode;
780 process_page(page, section_dir);
782 free_page_info(page);
784 free(pages);
788 * Return whether the directory entry is a man page section.
790 static int
791 select_sections(const struct dirent *entry)
793 const char *p = &entry->d_name[3];
795 if (strncmp(entry->d_name, "man", 3) != 0)
796 return (0);
797 while (*p != '\0') {
798 if (!isalnum(*p++))
799 return (0);
801 return (1);
805 * Process a single top-level man directory by finding all the
806 * sub-directories named man* and processing each one in turn.
808 void
809 mwpath(char *path)
811 FILE *fp = NULL;
812 struct dirent **entries;
813 int nsections;
814 int i;
816 (void) signal(SIGINT, trap_signal);
817 (void) signal(SIGHUP, trap_signal);
818 (void) signal(SIGQUIT, trap_signal);
819 (void) signal(SIGTERM, trap_signal);
821 whatis_proto = new_sbuf();
822 whatis_final = new_sbuf();
824 nsections = scandir(path, &entries, select_sections, alphasort);
825 if ((fp = open_whatis(path)) == NULL)
826 return;
827 for (i = 0; i < nsections; i++) {
828 char section_dir[MAXPATHLEN];
830 (void) snprintf(section_dir, MAXPATHLEN, "%s/%s",
831 path, entries[i]->d_name);
832 process_section(section_dir);
833 free(entries[i]);
835 free(entries);
836 finish_whatis(fp, path);