2 * Copyright (c) 2002 John Rochester
3 * Copyright (c) 2013-2014 Franco Fichtner <franco@lastsummer.de>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer,
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * $FreeBSD: src/usr.bin/makewhatis/makewhatis.c,v 1.9 2002/09/04 23:29:04 dwmalone Exp $
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/queue.h>
44 #include <stringlist.h>
48 #define DEFAULT_MANPATH "/usr/share/man"
49 #define LINE_ALLOC 4096
51 static char blank
[] = "";
54 * Information collected about each man page alias.
57 RB_ENTRY(page_alias
) entry
;
65 * Information collected about each unique man page.
68 RB_HEAD(page_alias_tree
, page_alias
) head
;
69 RB_ENTRY(page_info
) entry
;
73 static RB_HEAD(page_info_tree
, page_info
) page_head
= RB_INITIALIZER(&page_head
);
76 * Sorts page info by inode number.
79 infosort(const struct page_info
*a
, const struct page_info
*b
)
81 return (memcmp(&a
->inode
, &b
->inode
, sizeof(a
->inode
)));
84 RB_PROTOTYPE(page_info_tree
, page_info
, entry
, infosort
);
85 RB_GENERATE(page_info_tree
, page_info
, entry
, infosort
);
88 * Sorts page alias first by suffix, then name.
91 aliassort(const struct page_alias
*a
, const struct page_alias
*b
)
93 int ret
= strcmp(a
->suffix
, b
->suffix
);
98 return (strcmp(a
->name
, b
->name
));
101 RB_PROTOTYPE(page_alias_tree
, page_alias
, entry
, aliassort
);
102 RB_GENERATE(page_alias_tree
, page_alias
, entry
, aliassort
);
105 * An entry kept for each visited directory.
110 SLIST_ENTRY(visited_dir
) next
;
114 * an expanding string
117 char * content
; /* the start of the buffer */
118 char * end
; /* just past the end of the content */
119 char * last
; /* the last allocated character */
123 * Removes the last amount characters from the sbuf.
125 #define sbuf_retract(sbuf, amount) \
126 ((sbuf)->end -= (amount))
128 * Returns the length of the sbuf content.
130 #define sbuf_length(sbuf) \
131 ((sbuf)->end - (sbuf)->content)
133 typedef char *edited_copy(char *from
, char *to
, int length
);
135 static int append
; /* -a flag: append to existing whatis */
136 static int verbose
; /* -v flag: be verbose with warnings */
137 static int indent
= 24; /* -i option: description indentation */
138 static const char *whatis_name
="whatis";/* -n option: the name */
139 static char *common_output
; /* -o option: the single output file */
140 static char *locale
; /* user's locale if -L is used */
141 static char *lang_locale
; /* short form of locale */
142 static const char *machine
;
144 static int exit_code
; /* exit code to use when finished */
145 static SLIST_HEAD(, visited_dir
) visited_dirs
=
146 SLIST_HEAD_INITIALIZER(visited_dirs
);
149 * While the whatis line is being formed, it is stored in whatis_proto.
150 * When finished, it is reformatted into whatis_final and then appended
153 static struct sbuf
*whatis_proto
;
154 static struct sbuf
*whatis_final
;
155 static StringList
*whatis_lines
; /* collected output lines */
157 static char tmp_file
[MAXPATHLEN
]; /* path of temporary file, if any */
159 /* A set of possible names for the NAME man page section */
160 static const char *name_section_titles
[] = {
161 "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
162 "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
165 /* A subset of the mdoc(7) commands to ignore */
166 static char mdoc_commands
[] = "ArDvErEvFlLiNmPa";
169 * Frees a struct page_info and its content.
172 free_page_info(struct page_info
*info
)
174 struct page_alias
*alias
;
176 while ((alias
= RB_ROOT(&info
->head
))) {
177 RB_REMOVE(page_alias_tree
, &info
->head
, alias
);
178 free(alias
->filename
);
188 * Allocates and fills in a new struct page_alias given the
189 * full file name of the man page and its dirent.
190 * If the file is not a man page, nothing is added.
193 new_page_alias(struct page_info
*info
, char *filename
, struct dirent
*dirent
)
196 struct page_alias
*alias
;
200 basename_length
= strlen(dirent
->d_name
);
201 suffix
= &dirent
->d_name
[basename_length
];
202 gzipped
= basename_length
>= 4 &&
203 strcmp(&dirent
->d_name
[basename_length
- 3], ".gz") == 0;
209 if (--suffix
== dirent
->d_name
|| !isalnum(*suffix
)) {
213 warnx("%s: invalid man page name",
221 alias
= malloc(sizeof(*alias
));
226 alias
->name
= strdup(dirent
->d_name
);
227 alias
->filename
= strdup(filename
);
228 alias
->suffix
= strdup(suffix
);
229 alias
->gzipped
= gzipped
;
231 if (!alias
->name
|| !alias
->filename
|| !alias
->suffix
) {
235 RB_INSERT(page_alias_tree
, &info
->head
, alias
);
239 * Reset an sbuf's length to 0.
242 sbuf_clear(struct sbuf
*sbuf
)
244 sbuf
->end
= sbuf
->content
;
248 * Allocate a new sbuf.
253 struct sbuf
*sbuf
= (struct sbuf
*) malloc(sizeof(struct sbuf
));
254 sbuf
->content
= malloc(LINE_ALLOC
);
255 sbuf
->last
= sbuf
->content
+ LINE_ALLOC
- 1;
261 * Ensure that there is enough room in the sbuf for nchars more characters.
264 sbuf_need(struct sbuf
*sbuf
, int nchars
)
267 size_t size
, cntsize
;
269 /* double the size of the allocation until the buffer is big enough */
270 while (sbuf
->end
+ nchars
> sbuf
->last
) {
271 size
= sbuf
->last
+ 1 - sbuf
->content
;
273 cntsize
= sbuf
->end
- sbuf
->content
;
275 new_content
= malloc(size
);
276 memcpy(new_content
, sbuf
->content
, cntsize
);
278 sbuf
->content
= new_content
;
279 sbuf
->end
= new_content
+ cntsize
;
280 sbuf
->last
= new_content
+ size
- 1;
285 * Appends a string of a given length to the sbuf.
288 sbuf_append(struct sbuf
*sbuf
, const char *text
, int length
)
291 sbuf_need(sbuf
, length
);
292 memcpy(sbuf
->end
, text
, length
);
298 * Appends a null-terminated string to the sbuf.
301 sbuf_append_str(struct sbuf
*sbuf
, char *text
)
303 sbuf_append(sbuf
, text
, strlen(text
));
307 * Appends an edited null-terminated string to the sbuf.
310 sbuf_append_edited(struct sbuf
*sbuf
, char *text
, edited_copy copy
)
312 int length
= strlen(text
);
314 sbuf_need(sbuf
, length
);
315 sbuf
->end
= copy(text
, sbuf
->end
, length
);
320 * Strips any of a set of chars from the end of the sbuf.
323 sbuf_strip(struct sbuf
*sbuf
, const char *set
)
325 while (sbuf
->end
> sbuf
->content
&& strchr(set
, sbuf
->end
[-1]) != NULL
)
330 * Returns the null-terminated string built by the sbuf.
333 sbuf_content(struct sbuf
*sbuf
)
336 return(sbuf
->content
);
340 trap_signal(int sig __unused
)
342 if (tmp_file
[0] != '\0')
348 * Attempts to open an output file. Returns NULL if unsuccessful.
351 open_output(char *name
)
355 whatis_lines
= sl_init();
357 char line
[LINE_ALLOC
];
359 output
= fopen(name
, "r");
360 if (output
== NULL
) {
365 while (fgets(line
, sizeof line
, output
) != NULL
) {
366 line
[strlen(line
) - 1] = '\0';
367 sl_add(whatis_lines
, strdup(line
));
370 if (common_output
== NULL
) {
371 snprintf(tmp_file
, sizeof tmp_file
, "%s.tmp", name
);
374 output
= fopen(name
, "w");
375 if (output
== NULL
) {
384 linesort(const void *a
, const void *b
)
386 return(strcmp((*(const char * const *)a
), (*(const char * const *)b
)));
390 * Writes the unique sorted lines to the output file.
393 finish_output(FILE *output
, char *name
)
398 qsort(whatis_lines
->sl_str
, whatis_lines
->sl_cur
, sizeof(char *),
400 for (i
= 0; i
< whatis_lines
->sl_cur
; i
++) {
401 char *line
= whatis_lines
->sl_str
[i
];
402 if (i
> 0 && strcmp(line
, prev
) == 0)
409 sl_free(whatis_lines
, 1);
410 if (common_output
== NULL
) {
411 rename(tmp_file
, name
);
417 open_whatis(char *mandir
)
419 char filename
[MAXPATHLEN
];
421 snprintf(filename
, sizeof filename
, "%s/%s", mandir
, whatis_name
);
422 return(open_output(filename
));
426 finish_whatis(FILE *output
, char *mandir
)
428 char filename
[MAXPATHLEN
];
430 snprintf(filename
, sizeof filename
, "%s/%s", mandir
, whatis_name
);
431 finish_output(output
, filename
);
435 * Tests to see if the given directory has already been visited.
438 already_visited(char *dir
)
441 struct visited_dir
*visit
;
443 if (stat(dir
, &st
) < 0) {
448 SLIST_FOREACH(visit
, &visited_dirs
, next
) {
449 if (visit
->inode
== st
.st_ino
&&
450 visit
->device
== st
.st_dev
) {
451 warnx("already visited %s", dir
);
455 visit
= (struct visited_dir
*) malloc(sizeof(struct visited_dir
));
456 visit
->device
= st
.st_dev
;
457 visit
->inode
= st
.st_ino
;
458 SLIST_INSERT_HEAD(&visited_dirs
, visit
, next
);
463 * Removes trailing spaces from a string, returning a pointer to just
464 * beyond the new last character.
469 char *rhs
= &str
[strlen(str
)];
470 while (--rhs
> str
&& isspace(*rhs
))
477 * Returns a pointer to the next non-space character in the string.
482 while (*s
!= '\0' && isspace(*s
))
488 * Returns whether the string contains only digits.
491 only_digits(char *line
)
493 if (!isdigit(*line
++))
495 while (isdigit(*line
))
497 return(*line
== '\0');
501 * Returns whether the line is of one of the forms:
505 * assuming that section_start is ".Sh".
508 name_section_line(char *line
, const char *section_start
)
513 if (strncmp(line
, section_start
, 3) != 0)
515 line
= skip_spaces(line
+ 3);
516 rhs
= trim_rhs(line
);
522 for (title
= name_section_titles
; *title
!= NULL
; title
++)
523 if (strcmp(*title
, line
) == 0)
529 * Copies characters while removing the most common nroff/troff
531 * \(em, \(mi, \s[+-N], \&
532 * \fF, \f(fo, \f[font]
533 * \*s, \*(st, \*[stringvar]
536 de_nroff_copy(char *from
, char *to
, int fromlen
)
538 char *from_end
= &from
[fromlen
];
539 while (from
< from_end
) {
544 if (strncmp(&from
[1], "em", 2) == 0 ||
545 strncmp(&from
[1], "mi", 2) == 0) {
553 while (isdigit(*from
))
560 else if (*from
== '[') {
561 while (*++from
!= ']' && from
< from_end
)
579 * Appends a string with the nroff formatting removed.
582 add_nroff(char *text
)
584 sbuf_append_edited(whatis_proto
, text
, de_nroff_copy
);
588 * Appends "name(suffix), " to whatis_final.
591 add_whatis_name(char *name
, char *suffix
)
594 sbuf_append_str(whatis_final
, name
);
595 sbuf_append(whatis_final
, "(", 1);
596 sbuf_append_str(whatis_final
, suffix
);
597 sbuf_append(whatis_final
, "), ", 3);
602 * Processes an old-style man(7) line. This ignores commands with only
603 * a single number argument.
606 process_man_line(char *line
)
609 while (isalpha(*++line
))
611 line
= skip_spaces(line
);
612 if (only_digits(line
))
615 line
= skip_spaces(line
);
618 sbuf_append(whatis_proto
, " ", 1);
628 process_mdoc_macro(char *line
)
630 static const struct mdoc_text list
[] = {
631 { ".At", "AT&T UNIX" },
632 { ".Bsx", "BSD/OS" },
634 { ".Dx", "DragonFly" },
635 { ".Fx", "FreeBSD" },
637 { ".Ox", "OpenBSD" },
642 for (i
= 0; i
< sizeof(list
) / sizeof(list
[0]); ++i
) {
643 if (!strcmp(line
, list
[i
].mdoc
)) {
644 sbuf_append(whatis_proto
, list
[i
].text
,
645 strlen(list
[i
].text
));
646 sbuf_append(whatis_proto
, " ", 1);
655 * Processes a new-style mdoc(7) line.
658 process_mdoc_line(char *line
)
662 char *line_end
= &line
[strlen(line
)];
663 int orig_length
= sbuf_length(whatis_proto
);
668 if (line
[0] != '.' || !isupper(line
[1]) || !islower(line
[2])) {
669 add_nroff(skip_spaces(line
));
670 sbuf_append(whatis_proto
, " ", 1);
673 if (process_mdoc_macro(line
)) {
676 xref
= strncmp(line
, ".Xr", 3) == 0;
678 while ((line
= skip_spaces(line
)) < line_end
) {
682 next
= strchr(next
, '"');
685 memmove(next
, next
+ 1, strlen(next
));
692 next
= strpbrk(line
, " \t");
697 if (isupper(*line
) && islower(line
[1]) && line
[2] == '\0') {
698 if (strcmp(line
, "Ns") == 0) {
703 if (strstr(mdoc_commands
, line
) != NULL
) {
708 if (arg
> 0 && strchr(",.:;?!)]", *line
) == 0) {
710 sbuf_append(whatis_proto
, "(", 1);
712 sbuf_append(whatis_proto
, ")", 1);
716 sbuf_append(whatis_proto
, " ", 1);
722 if (sbuf_length(whatis_proto
) > orig_length
)
723 sbuf_append(whatis_proto
, " ", 1);
726 enum { STATE_UNKNOWN
, STATE_MANSTYLE
, STATE_MDOCNAME
, STATE_MDOCDESC
};
729 * Processes a man page source into a single whatis line and adds it
733 process_page(struct page_info
*info
)
739 int state
= STATE_UNKNOWN
;
740 struct page_alias
*alias
;
743 * Only read the page once for each inode. It's
744 * safe to assume that page->list is set.
746 alias
= RB_MIN(page_alias_tree
, &info
->head
);
749 fprintf(stderr
, "\treading %s\n", alias
->filename
);
752 sbuf_clear(whatis_proto
);
753 if ((in
= gzopen(alias
->filename
, "r")) == NULL
) {
754 warn("%s", alias
->filename
);
758 while (gzgets(in
, buffer
, sizeof buffer
) != NULL
) {
760 if (strncmp(line
, ".\\\"", 3) == 0) /* ignore comments */
764 * haven't reached the NAME section yet.
767 if (name_section_line(line
, ".SH"))
768 state
= STATE_MANSTYLE
;
769 else if (name_section_line(line
, ".Sh"))
770 state
= STATE_MDOCNAME
;
773 * Inside an old-style .SH NAME section.
776 if (strncmp(line
, ".SH", 3) == 0)
778 if (strncmp(line
, ".SS", 3) == 0)
781 if (strcmp(line
, ".") == 0)
783 if (strncmp(line
, ".IX", 3) == 0) {
785 line
= skip_spaces(line
);
787 process_man_line(line
);
790 * Inside a new-style .Sh NAME section (the .Nm part).
794 if (strncmp(line
, ".Nm", 3) == 0) {
795 process_mdoc_line(line
);
798 if (strcmp(line
, ".") == 0)
800 sbuf_append(whatis_proto
, "- ", 2);
801 state
= STATE_MDOCDESC
;
805 * Inside a new-style .Sh NAME section (after the .Nm-s).
808 if (strncmp(line
, ".Sh", 3) == 0)
811 if (strcmp(line
, ".") == 0)
813 process_mdoc_line(line
);
819 sbuf_strip(whatis_proto
, " \t.-");
820 line
= sbuf_content(whatis_proto
);
822 * line now contains the appropriate data, but without
823 * the proper indentation or the section appended to each name.
825 descr
= strstr(line
, " - ");
827 descr
= strchr(line
, ' ');
831 "\tignoring junk description \"%s\"\n",
840 sbuf_clear(whatis_final
);
841 RB_FOREACH(alias
, page_alias_tree
, &info
->head
) {
843 * This won't append names stored in `line'.
844 * The reason for that is that we cannot be sure
845 * which section they belong to unless we have
846 * a real alias (via MLINKS) in this list.
848 add_whatis_name(alias
->name
, alias
->suffix
);
851 char *arg
, *text
= line
;
857 * See if there are names in the manual that
858 * are not in the alias list provided by the
859 * MLINKS. We may want to add those as well.
861 RB_FOREACH(alias
, page_alias_tree
, &info
->head
)
862 sl_add(names
, alias
->name
);
866 text
= strchr(text
, ',');
869 if (!sl_find(names
, arg
)) {
870 fprintf(stderr
, "\tpage alias \"%s\" "
871 "may be missing\n", arg
);
881 sbuf_retract(whatis_final
, 2); /* remove last ", " */
882 while (sbuf_length(whatis_final
) < indent
)
883 sbuf_append(whatis_final
, " ", 1);
884 sbuf_append(whatis_final
, " - ", 3);
885 sbuf_append_str(whatis_final
, skip_spaces(descr
));
886 sl_add(whatis_lines
, strdup(sbuf_content(whatis_final
)));
890 * Processes a single man section.
893 process_section(char *section_dir
)
895 struct dirent
**entries
;
896 struct page_info
*info
;
901 fprintf(stderr
, " %s\n", section_dir
);
904 * scan the man section directory for pages
906 nentries
= scandir(section_dir
, &entries
, NULL
, alphasort
);
908 warn("%s", section_dir
);
914 * collect information about man pages
916 for (i
= 0; i
< nentries
; i
++) {
917 struct page_info ref
;
921 if (asprintf(&filename
, "%s/%s", section_dir
,
922 entries
[i
]->d_name
) < 0) {
926 if (stat(filename
, &st
) < 0) {
927 warn("%s", filename
);
928 goto process_section_next
;
931 if (!S_ISREG(st
.st_mode
)) {
932 if (verbose
&& !S_ISDIR(st
.st_mode
))
933 warnx("%s: not a regular file", filename
);
934 goto process_section_next
;
937 ref
.inode
= st
.st_ino
;
939 info
= RB_FIND(page_info_tree
, &page_head
, &ref
);
941 info
= malloc(sizeof(*info
));
946 bzero(info
, sizeof(*info
));
947 info
->inode
= st
.st_ino
;
948 RB_INIT(&info
->head
);
950 RB_INSERT(page_info_tree
, &page_head
, info
);
953 new_page_alias(info
, filename
, entries
[i
]);
955 process_section_next
:
964 * Returns whether the directory entry is a man page section.
967 select_sections(const struct dirent
*entry
)
969 const char *p
= &entry
->d_name
[3];
971 if (strncmp(entry
->d_name
, "man", 3) != 0)
981 * Processes a single top-level man directory by finding all the
982 * sub-directories named man* and processing each one in turn.
985 process_mandir(char *dir_name
)
987 struct dirent
**entries
;
988 struct page_info
*info
;
994 if (already_visited(dir_name
))
997 fprintf(stderr
, "man directory %s\n", dir_name
);
998 nsections
= scandir(dir_name
, &entries
, select_sections
, alphasort
);
1000 warn("%s", dir_name
);
1004 if (common_output
== NULL
&& (fp
= open_whatis(dir_name
)) == NULL
)
1006 for (i
= 0; i
< nsections
; i
++) {
1007 char section_dir
[MAXPATHLEN
];
1008 snprintf(section_dir
, sizeof section_dir
, "%s/%s", dir_name
,
1009 entries
[i
]->d_name
);
1010 process_section(section_dir
);
1011 snprintf(section_dir
, sizeof section_dir
, "%s/%s/%s", dir_name
,
1012 entries
[i
]->d_name
, machine
);
1013 if (stat(section_dir
, &st
) == 0 && S_ISDIR(st
.st_mode
))
1014 process_section(section_dir
);
1020 * process and free all pages
1022 while ((info
= RB_ROOT(&page_head
))) {
1023 RB_REMOVE(page_info_tree
, &page_head
, info
);
1025 free_page_info(info
);
1028 if (common_output
== NULL
)
1029 finish_whatis(fp
, dir_name
);
1033 * Processes one argument, which may be a colon-separated list of
1037 process_argument(const char *arg
)
1045 err(1, "out of memory");
1046 while ((dir
= strsep(&parg
, ":")) != NULL
) {
1047 if (locale
!= NULL
) {
1048 asprintf(&mandir
, "%s/%s", dir
, locale
);
1049 process_mandir(mandir
);
1051 if (lang_locale
!= NULL
) {
1052 asprintf(&mandir
, "%s/%s", dir
, lang_locale
);
1053 process_mandir(mandir
);
1057 process_mandir(dir
);
1065 main(int argc
, char **argv
)
1070 while ((opt
= getopt(argc
, argv
, "ai:n:o:vL")) != -1) {
1076 indent
= atoi(optarg
);
1079 whatis_name
= optarg
;
1082 common_output
= optarg
;
1088 locale
= getenv("LC_ALL");
1090 locale
= getenv("LC_CTYPE");
1092 locale
= getenv("LANG");
1093 if (locale
!= NULL
) {
1094 char *sep
= strchr(locale
, '_');
1095 if (sep
!= NULL
&& isupper(sep
[1]) &&
1097 asprintf(&lang_locale
, "%.*s%s",
1098 (int)(sep
- locale
),
1104 fprintf(stderr
, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv
[0]);
1109 signal(SIGINT
, trap_signal
);
1110 signal(SIGHUP
, trap_signal
);
1111 signal(SIGQUIT
, trap_signal
);
1112 signal(SIGTERM
, trap_signal
);
1113 SLIST_INIT(&visited_dirs
);
1114 whatis_proto
= new_sbuf();
1115 whatis_final
= new_sbuf();
1117 if ((machine
= getenv("MACHINE")) == NULL
)
1120 if (common_output
!= NULL
&& (fp
= open_output(common_output
)) == NULL
)
1121 err(1, "%s", common_output
);
1122 if (optind
== argc
) {
1123 const char *manpath
= getenv("MANPATH");
1124 if (manpath
== NULL
)
1125 manpath
= DEFAULT_MANPATH
;
1126 process_argument(manpath
);
1128 while (optind
< argc
)
1129 process_argument(argv
[optind
++]);
1131 if (common_output
!= NULL
)
1132 finish_output(fp
, common_output
);