2 * Copyright (c) 2002 John Rochester
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
31 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
34 #include <sys/types.h>
36 #include <sys/param.h>
49 #include "stringlist.h"
52 /* Information collected about each man page in a section */
60 /* An expanding string */
62 char *content
; /* the start of the buffer */
63 char *end
; /* just past the end of the content */
64 char *last
; /* the last allocated character */
67 /* Remove the last amount characters from the sbuf */
68 #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount))
69 /* Return the length of the sbuf content */
70 #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content)
72 typedef char *edited_copy(char *from
, char *to
, int length
);
75 * While the whatis line is being formed, it is stored in whatis_proto.
76 * When finished, it is reformatted into whatis_final and then appended
79 static struct sbuf
*whatis_proto
;
80 static struct sbuf
*whatis_final
;
81 static stringlist
*whatis_lines
; /* collected output lines */
83 static char tempfile
[MAXPATHLEN
]; /* path of temporary file, if any */
85 #define MDOC_COMMANDS "ArDvErEvFlLiNmPa"
88 /* Free a struct page_info and its content */
90 free_page_info(struct page_info
*info
)
100 * Allocate and fill in a new struct page_info given the
101 * name of the man section directory and the dirent of the file.
102 * If the file is not a man page, return NULL.
104 static struct page_info
*
105 new_page_info(char *dir
, struct dirent
*dirent
)
107 struct page_info
*info
;
112 if ((info
= malloc(sizeof (struct page_info
))) == NULL
)
114 basename_length
= strlen(dirent
->d_name
);
115 suffix
= &dirent
->d_name
[basename_length
];
116 if (asprintf(&info
->filename
, "%s/%s", dir
, dirent
->d_name
) == -1)
119 if (--suffix
== dirent
->d_name
|| !isalnum(*suffix
)) {
122 free(info
->filename
);
128 info
->name
= strdup(dirent
->d_name
);
129 info
->suffix
= strdup(suffix
);
130 if (stat(info
->filename
, &st
) < 0) {
131 warn("%s", info
->filename
);
132 free_page_info(info
);
135 if (!S_ISREG(st
.st_mode
)) {
136 free_page_info(info
);
139 info
->inode
= st
.st_ino
;
144 * Reset sbuf length to 0.
147 sbuf_clear(struct sbuf
*sbuf
)
150 sbuf
->end
= sbuf
->content
;
154 * Allocate a new sbuf.
161 if ((sbuf
= malloc(sizeof (struct sbuf
))) == NULL
)
163 if ((sbuf
->content
= (char *)malloc(LINE_ALLOC
)) == NULL
)
165 sbuf
->last
= sbuf
->content
+ LINE_ALLOC
- 1;
172 * Ensure that there is enough room in the sbuf
173 * for nchars more characters.
176 sbuf_need(struct sbuf
*sbuf
, int nchars
)
179 size_t size
, cntsize
;
182 while (grow
< nchars
) {
183 grow
+= 128; /* we grow in chunks of 128 bytes */
186 /* Grow if the buffer isn't big enough */
187 if (sbuf
->end
+ nchars
> sbuf
->last
) {
188 size
= sbuf
->last
+ 1 - sbuf
->content
;
190 cntsize
= sbuf
->end
- sbuf
->content
;
192 if ((new_content
= realloc(sbuf
->content
, size
)) == NULL
) {
194 if (tempfile
[0] != '\0')
195 (void) unlink(tempfile
);
198 sbuf
->content
= new_content
;
199 sbuf
->end
= new_content
+ cntsize
;
200 sbuf
->last
= new_content
+ size
- 1;
205 * Append a string of a given length to the sbuf.
208 sbuf_append(struct sbuf
*sbuf
, const char *text
, int length
)
211 sbuf_need(sbuf
, length
);
212 (void) memcpy(sbuf
->end
, text
, length
);
218 * Append a null-terminated string to the sbuf.
221 sbuf_append_str(struct sbuf
*sbuf
, char *text
)
224 sbuf_append(sbuf
, text
, strlen(text
));
228 * Append an edited null-terminated string to the sbuf.
231 sbuf_append_edited(struct sbuf
*sbuf
, char *text
, edited_copy copy
)
235 if ((length
= strlen(text
)) > 0) {
236 sbuf_need(sbuf
, length
);
237 sbuf
->end
= copy(text
, sbuf
->end
, length
);
242 * Strip any of a set of chars from the end of the sbuf.
245 sbuf_strip(struct sbuf
*sbuf
, const char *set
)
248 while (sbuf
->end
> sbuf
->content
&& strchr(set
, sbuf
->end
[-1]) != NULL
)
253 * Return the null-terminated string built by the sbuf.
256 sbuf_content(struct sbuf
*sbuf
)
260 return (sbuf
->content
);
264 * Return true if no man page exists in the directory with
265 * any of the names in the stringlist.
268 no_page_exists(char *dir
, stringlist
*names
, char *suffix
)
270 char path
[MAXPATHLEN
];
271 char *suffixes
[] = { "", ".gz", ".bz2", NULL
};
275 for (i
= 0; i
< names
->sl_cur
; i
++) {
276 for (j
= 0; suffixes
[j
] != NULL
; j
++) {
277 (void) snprintf(path
, MAXPATHLEN
, "%s/%s.%s%s",
278 dir
, names
->sl_str
[i
], suffix
, suffixes
[j
]);
279 if (access(path
, F_OK
) == 0) {
292 if (tempfile
[0] != '\0')
293 (void) unlink(tempfile
);
299 * Attempt to open an output file.
300 * Return NULL if unsuccessful.
303 open_output(char *name
)
307 whatis_lines
= sl_init();
308 (void) snprintf(tempfile
, MAXPATHLEN
, "%s.tmp", name
);
310 if ((output
= fopen(name
, "w")) == NULL
) {
318 linesort(const void *a
, const void *b
)
321 return (strcmp((*(const char * const *)a
), (*(const char * const *)b
)));
325 * Write the unique sorted lines to the output file.
328 finish_output(FILE *output
, char *name
)
333 qsort(whatis_lines
->sl_str
, whatis_lines
->sl_cur
, sizeof (char *),
335 for (i
= 0; i
< whatis_lines
->sl_cur
; i
++) {
336 char *line
= whatis_lines
->sl_str
[i
];
337 if (i
> 0 && strcmp(line
, prev
) == 0)
340 (void) fputs(line
, output
);
341 (void) putc('\n', output
);
343 (void) fclose(output
);
344 sl_free(whatis_lines
, 1);
345 (void) rename(tempfile
, name
);
346 (void) unlink(tempfile
);
350 open_whatis(char *mandir
)
352 char filename
[MAXPATHLEN
];
354 (void) snprintf(filename
, MAXPATHLEN
, "%s/%s", mandir
, WHATIS
);
355 return (open_output(filename
));
359 finish_whatis(FILE *output
, char *mandir
)
361 char filename
[MAXPATHLEN
];
363 (void) snprintf(filename
, MAXPATHLEN
, "%s/%s", mandir
, WHATIS
);
364 finish_output(output
, filename
);
368 * Remove trailing spaces from a string, returning a pointer to just
369 * beyond the new last character.
376 rhs
= &str
[strlen(str
)];
377 while (--rhs
> str
&& isspace(*rhs
))
384 * Return a pointer to the next non-space character in the string.
390 while (*s
!= '\0' && isspace(*s
))
397 * Return whether the line is of one of the forms:
401 * assuming that section_start is ".Sh".
404 name_section_line(char *line
, const char *section_start
)
408 if (strncmp(line
, section_start
, 3) != 0)
410 line
= skip_spaces(line
+ 3);
411 rhs
= trim_rhs(line
);
417 if (strcmp(line
, "NAME") == 0)
424 * Copy characters while removing the most common nroff/troff markup:
425 * \(em, \(mi, \s[+-N], \&
426 * \fF, \f(fo, \f[font]
427 * \*s, \*(st, \*[stringvar]
430 de_nroff_copy(char *from
, char *to
, int fromlen
)
432 char *from_end
= &from
[fromlen
];
434 while (from
< from_end
) {
439 if (strncmp(&from
[1], "em", 2) == 0 ||
440 strncmp(&from
[1], "mi", 2) == 0) {
448 while (isdigit(*from
))
453 if (*++from
== '(') {
455 } else if (*from
== '[') {
456 while (*++from
!= ']' &&
476 * Append a string with the nroff formatting removed.
479 add_nroff(char *text
)
482 sbuf_append_edited(whatis_proto
, text
, de_nroff_copy
);
486 * Appends "name(suffix), " to whatis_final
489 add_whatis_name(char *name
, char *suffix
)
493 sbuf_append_str(whatis_final
, name
);
494 sbuf_append(whatis_final
, "(", 1);
495 sbuf_append_str(whatis_final
, suffix
);
496 sbuf_append(whatis_final
, "), ", 3);
501 * Processes an old-style man(7) line. This ignores commands with only
502 * a single number argument.
505 process_man_line(char *line
)
510 while (isalpha(*++line
))
512 p
= line
= skip_spaces(line
);
521 line
= skip_spaces(line
);
524 sbuf_append(whatis_proto
, " ", 1);
529 * Processes a new-style mdoc(7) line.
532 process_mdoc_line(char *line
)
536 char *line_end
= &line
[strlen(line
)];
537 int orig_length
= sbuf_length(whatis_proto
);
542 if (line
[0] != '.' || !isupper(line
[1]) || !islower(line
[2])) {
543 add_nroff(skip_spaces(line
));
544 sbuf_append(whatis_proto
, " ", 1);
547 xref
= strncmp(line
, ".Xr", 3) == 0;
549 while ((line
= skip_spaces(line
)) < line_end
) {
553 next
= strchr(next
, '"');
556 (void) memmove(next
, next
+ 1, strlen(next
));
563 next
= strpbrk(line
, " \t");
569 if (isupper(*line
) && islower(line
[1]) && line
[2] == '\0') {
570 if (strcmp(line
, "Ns") == 0) {
575 if (strstr(line
, MDOC_COMMANDS
) != NULL
) {
580 if (arg
> 0 && strchr(",.:;?!)]", *line
) == 0) {
582 sbuf_append(whatis_proto
, "(", 1);
584 sbuf_append(whatis_proto
, ")", 1);
587 sbuf_append(whatis_proto
, " ", 1);
594 if (sbuf_length(whatis_proto
) > orig_length
)
595 sbuf_append(whatis_proto
, " ", 1);
599 * Collect a list of comma-separated names from the text.
602 collect_names(stringlist
*names
, char *text
)
608 text
= strchr(text
, ',');
611 (void) sl_add(names
, arg
);
619 enum { STATE_UNKNOWN
, STATE_MANSTYLE
, STATE_MDOCNAME
, STATE_MDOCDESC
};
622 * Process a man page source into a single whatis line and add it
626 process_page(struct page_info
*page
, char *section_dir
)
631 int state
= STATE_UNKNOWN
;
636 sbuf_clear(whatis_proto
);
637 if ((fp
= fopen(page
->filename
, "r")) == NULL
) {
638 warn("%s", page
->filename
);
641 while (getline(&line
, &linecap
, fp
) > 0) {
643 if (strncmp(line
, ".\\\"", 3) == 0)
646 /* Haven't reached the NAME section yet */
648 if (name_section_line(line
, ".SH"))
649 state
= STATE_MANSTYLE
;
650 else if (name_section_line(line
, ".Sh"))
651 state
= STATE_MDOCNAME
;
653 /* Inside an old-style .SH NAME section */
655 if (strncmp(line
, ".SH", 3) == 0 ||
656 strncmp(line
, ".SS", 3) == 0)
658 (void) trim_rhs(line
);
659 if (strcmp(line
, ".") == 0)
661 if (strncmp(line
, ".IX", 3) == 0) {
663 line
= skip_spaces(line
);
665 process_man_line(line
);
667 /* Inside a new-style .Sh NAME section (the .Nm part) */
669 (void) trim_rhs(line
);
670 if (strncmp(line
, ".Nm", 3) == 0) {
671 process_mdoc_line(line
);
674 if (strcmp(line
, ".") == 0)
676 sbuf_append(whatis_proto
, "- ", 2);
677 state
= STATE_MDOCDESC
;
680 /* Inside a new-style .Sh NAME section (after the .Nm-s) */
682 if (strncmp(line
, ".Sh", 3) == 0)
684 (void) trim_rhs(line
);
685 if (strcmp(line
, ".") == 0)
687 process_mdoc_line(line
);
693 sbuf_strip(whatis_proto
, " \t.-");
694 line
= sbuf_content(whatis_proto
);
696 * Line now contains the appropriate data, but without the
697 * proper indentation or the section appended to each name.
699 descr
= strstr(line
, " - ");
701 descr
= strchr(line
, ' ');
710 collect_names(names
, line
);
711 sbuf_clear(whatis_final
);
712 if (!sl_find(names
, page
->name
) &&
713 no_page_exists(section_dir
, names
, page
->suffix
)) {
715 * Add the page name since that's the only
716 * thing that man(1) will find.
718 add_whatis_name(page
->name
, page
->suffix
);
720 for (i
= 0; i
< names
->sl_cur
; i
++)
721 add_whatis_name(names
->sl_str
[i
], page
->suffix
);
723 /* Remove last ", " */
724 sbuf_retract(whatis_final
, 2);
725 while (sbuf_length(whatis_final
) < INDENT
)
726 sbuf_append(whatis_final
, " ", 1);
727 sbuf_append(whatis_final
, " - ", 3);
728 sbuf_append_str(whatis_final
, skip_spaces(descr
));
729 (void) sl_add(whatis_lines
, strdup(sbuf_content(whatis_final
)));
733 * Sort pages first by inode number, then by name.
736 pagesort(const void *a
, const void *b
)
738 const struct page_info
*p1
= *(struct page_info
* const *) a
;
739 const struct page_info
*p2
= *(struct page_info
* const *) b
;
741 if (p1
->inode
== p2
->inode
)
742 return (strcmp(p1
->name
, p2
->name
));
744 return (p1
->inode
- p2
->inode
);
748 * Process a single man section.
751 process_section(char *section_dir
)
753 struct dirent
**entries
;
755 struct page_info
**pages
;
758 ino_t prev_inode
= 0;
760 /* Scan the man section directory for pages */
761 nentries
= scandir(section_dir
, &entries
, NULL
, alphasort
);
763 /* Collect information about man pages */
764 pages
= (struct page_info
**)calloc(nentries
,
765 sizeof (struct page_info
*));
766 for (i
= 0; i
< nentries
; i
++) {
767 struct page_info
*info
= new_page_info(section_dir
, entries
[i
]);
769 pages
[npages
++] = info
;
773 qsort(pages
, npages
, sizeof (struct page_info
*), pagesort
);
775 /* Process each unique page */
776 for (i
= 0; i
< npages
; i
++) {
777 struct page_info
*page
= pages
[i
];
778 if (page
->inode
!= prev_inode
) {
779 prev_inode
= page
->inode
;
780 process_page(page
, section_dir
);
782 free_page_info(page
);
788 * Return whether the directory entry is a man page section.
791 select_sections(const struct dirent
*entry
)
793 const char *p
= &entry
->d_name
[3];
795 if (strncmp(entry
->d_name
, "man", 3) != 0)
805 * Process a single top-level man directory by finding all the
806 * sub-directories named man* and processing each one in turn.
812 struct dirent
**entries
;
816 (void) signal(SIGINT
, trap_signal
);
817 (void) signal(SIGHUP
, trap_signal
);
818 (void) signal(SIGQUIT
, trap_signal
);
819 (void) signal(SIGTERM
, trap_signal
);
821 whatis_proto
= new_sbuf();
822 whatis_final
= new_sbuf();
824 nsections
= scandir(path
, &entries
, select_sections
, alphasort
);
825 if ((fp
= open_whatis(path
)) == NULL
)
827 for (i
= 0; i
< nsections
; i
++) {
828 char section_dir
[MAXPATHLEN
];
830 (void) snprintf(section_dir
, MAXPATHLEN
, "%s/%s",
831 path
, entries
[i
]->d_name
);
832 process_section(section_dir
);
836 finish_whatis(fp
, path
);