2 * Copyright (c) 2002 John Rochester
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * $FreeBSD: src/usr.bin/makewhatis/makewhatis.c,v 1.9 2002/09/04 23:29:04 dwmalone Exp $
29 * $DragonFly: src/usr.sbin/makewhatis/makewhatis.c,v 1.2 2005/01/16 04:59:53 cpressey Exp $
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/queue.h>
43 #include <stringlist.h>
47 #define DEFAULT_MANPATH "/usr/share/man"
48 #define LINE_ALLOC 4096
50 static char blank
[] = "";
53 * Information collected about each man page in a section.
64 * An entry kept for each visited directory.
69 SLIST_ENTRY(visited_dir
) next
;
76 char * content
; /* the start of the buffer */
77 char * end
; /* just past the end of the content */
78 char * last
; /* the last allocated character */
82 * Removes the last amount characters from the sbuf.
84 #define sbuf_retract(sbuf, amount) \
85 ((sbuf)->end -= (amount))
87 * Returns the length of the sbuf content.
89 #define sbuf_length(sbuf) \
90 ((sbuf)->end - (sbuf)->content)
92 typedef char *edited_copy(char *from
, char *to
, int length
);
94 static int append
; /* -a flag: append to existing whatis */
95 static int verbose
; /* -v flag: be verbose with warnings */
96 static int indent
= 24; /* -i option: description indentation */
97 static const char *whatis_name
="whatis";/* -n option: the name */
98 static char *common_output
; /* -o option: the single output file */
99 static char *locale
; /* user's locale if -L is used */
100 static char *lang_locale
; /* short form of locale */
101 static const char *machine
;
103 static int exit_code
; /* exit code to use when finished */
104 static SLIST_HEAD(, visited_dir
) visited_dirs
=
105 SLIST_HEAD_INITIALIZER(visited_dirs
);
108 * While the whatis line is being formed, it is stored in whatis_proto.
109 * When finished, it is reformatted into whatis_final and then appended
112 static struct sbuf
*whatis_proto
;
113 static struct sbuf
*whatis_final
;
114 static StringList
*whatis_lines
; /* collected output lines */
116 static char tmp_file
[MAXPATHLEN
]; /* path of temporary file, if any */
118 /* A set of possible names for the NAME man page section */
119 static const char *name_section_titles
[] = {
120 "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
121 "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
124 /* A subset of the mdoc(7) commands to ignore */
125 static char mdoc_commands
[] = "ArDvErEvFlLiNmPa";
128 * Frees a struct page_info and its content.
131 free_page_info(struct page_info
*info
)
133 free(info
->filename
);
140 * Allocates and fills in a new struct page_info given the
141 * name of the man section directory and the dirent of the file.
142 * If the file is not a man page, returns NULL.
144 static struct page_info
*
145 new_page_info(char *dir
, struct dirent
*dirent
)
147 struct page_info
*info
;
152 info
= malloc(sizeof(struct page_info
));
155 basename_length
= strlen(dirent
->d_name
);
156 suffix
= &dirent
->d_name
[basename_length
];
157 asprintf(&info
->filename
, "%s/%s", dir
, dirent
->d_name
);
158 if ((info
->gzipped
= basename_length
>= 4 &&
159 strcmp(&dirent
->d_name
[basename_length
- 3], ".gz") == 0)) {
164 if (--suffix
== dirent
->d_name
|| !isalnum(*suffix
)) {
168 warnx("%s: invalid man page name",
170 free(info
->filename
);
176 info
->name
= strdup(dirent
->d_name
);
177 info
->suffix
= strdup(suffix
);
178 if (stat(info
->filename
, &st
) < 0) {
179 warn("%s", info
->filename
);
180 free_page_info(info
);
183 if (!S_ISREG(st
.st_mode
)) {
184 if (verbose
&& !S_ISDIR(st
.st_mode
))
185 warnx("%s: not a regular file", info
->filename
);
186 free_page_info(info
);
189 info
->inode
= st
.st_ino
;
194 * Reset an sbuf's length to 0.
197 sbuf_clear(struct sbuf
*sbuf
)
199 sbuf
->end
= sbuf
->content
;
203 * Allocate a new sbuf.
208 struct sbuf
*sbuf
= (struct sbuf
*) malloc(sizeof(struct sbuf
));
209 sbuf
->content
= malloc(LINE_ALLOC
);
210 sbuf
->last
= sbuf
->content
+ LINE_ALLOC
- 1;
216 * Ensure that there is enough room in the sbuf for nchars more characters.
219 sbuf_need(struct sbuf
*sbuf
, int nchars
)
222 size_t size
, cntsize
;
224 /* double the size of the allocation until the buffer is big enough */
225 while (sbuf
->end
+ nchars
> sbuf
->last
) {
226 size
= sbuf
->last
+ 1 - sbuf
->content
;
228 cntsize
= sbuf
->end
- sbuf
->content
;
230 new_content
= malloc(size
);
231 memcpy(new_content
, sbuf
->content
, cntsize
);
233 sbuf
->content
= new_content
;
234 sbuf
->end
= new_content
+ cntsize
;
235 sbuf
->last
= new_content
+ size
- 1;
240 * Appends a string of a given length to the sbuf.
243 sbuf_append(struct sbuf
*sbuf
, const char *text
, int length
)
246 sbuf_need(sbuf
, length
);
247 memcpy(sbuf
->end
, text
, length
);
253 * Appends a null-terminated string to the sbuf.
256 sbuf_append_str(struct sbuf
*sbuf
, char *text
)
258 sbuf_append(sbuf
, text
, strlen(text
));
262 * Appends an edited null-terminated string to the sbuf.
265 sbuf_append_edited(struct sbuf
*sbuf
, char *text
, edited_copy copy
)
267 int length
= strlen(text
);
269 sbuf_need(sbuf
, length
);
270 sbuf
->end
= copy(text
, sbuf
->end
, length
);
275 * Strips any of a set of chars from the end of the sbuf.
278 sbuf_strip(struct sbuf
*sbuf
, const char *set
)
280 while (sbuf
->end
> sbuf
->content
&& strchr(set
, sbuf
->end
[-1]) != NULL
)
285 * Returns the null-terminated string built by the sbuf.
288 sbuf_content(struct sbuf
*sbuf
)
291 return(sbuf
->content
);
295 * Returns true if no man page exists in the directory with
296 * any of the names in the StringList.
299 no_page_exists(char *dir
, StringList
*names
, char *suffix
)
301 char path
[MAXPATHLEN
];
304 for (i
= 0; i
< names
->sl_cur
; i
++) {
305 snprintf(path
, sizeof path
, "%s/%s.%s.gz", dir
, names
->sl_str
[i
], suffix
);
306 if (access(path
, F_OK
) < 0) {
307 path
[strlen(path
) - 3] = '\0';
308 if (access(path
, F_OK
) < 0)
317 trap_signal(int sig __unused
)
319 if (tmp_file
[0] != '\0')
325 * Attempts to open an output file. Returns NULL if unsuccessful.
328 open_output(char *name
)
332 whatis_lines
= sl_init();
334 char line
[LINE_ALLOC
];
336 output
= fopen(name
, "r");
337 if (output
== NULL
) {
342 while (fgets(line
, sizeof line
, output
) != NULL
) {
343 line
[strlen(line
) - 1] = '\0';
344 sl_add(whatis_lines
, strdup(line
));
347 if (common_output
== NULL
) {
348 snprintf(tmp_file
, sizeof tmp_file
, "%s.tmp", name
);
351 output
= fopen(name
, "w");
352 if (output
== NULL
) {
361 linesort(const void *a
, const void *b
)
363 return(strcmp((*(const char * const *)a
), (*(const char * const *)b
)));
367 * Writes the unique sorted lines to the output file.
370 finish_output(FILE *output
, char *name
)
375 qsort(whatis_lines
->sl_str
, whatis_lines
->sl_cur
, sizeof(char *),
377 for (i
= 0; i
< whatis_lines
->sl_cur
; i
++) {
378 char *line
= whatis_lines
->sl_str
[i
];
379 if (i
> 0 && strcmp(line
, prev
) == 0)
386 sl_free(whatis_lines
, 1);
387 if (common_output
== NULL
) {
388 rename(tmp_file
, name
);
394 open_whatis(char *mandir
)
396 char filename
[MAXPATHLEN
];
398 snprintf(filename
, sizeof filename
, "%s/%s", mandir
, whatis_name
);
399 return(open_output(filename
));
403 finish_whatis(FILE *output
, char *mandir
)
405 char filename
[MAXPATHLEN
];
407 snprintf(filename
, sizeof filename
, "%s/%s", mandir
, whatis_name
);
408 finish_output(output
, filename
);
412 * Tests to see if the given directory has already been visited.
415 already_visited(char *dir
)
418 struct visited_dir
*visit
;
420 if (stat(dir
, &st
) < 0) {
425 SLIST_FOREACH(visit
, &visited_dirs
, next
) {
426 if (visit
->inode
== st
.st_ino
&&
427 visit
->device
== st
.st_dev
) {
428 warnx("already visited %s", dir
);
432 visit
= (struct visited_dir
*) malloc(sizeof(struct visited_dir
));
433 visit
->device
= st
.st_dev
;
434 visit
->inode
= st
.st_ino
;
435 SLIST_INSERT_HEAD(&visited_dirs
, visit
, next
);
440 * Removes trailing spaces from a string, returning a pointer to just
441 * beyond the new last character.
446 char *rhs
= &str
[strlen(str
)];
447 while (--rhs
> str
&& isspace(*rhs
))
454 * Returns a pointer to the next non-space character in the string.
459 while (*s
!= '\0' && isspace(*s
))
465 * Returns whether the string contains only digits.
468 only_digits(char *line
)
470 if (!isdigit(*line
++))
472 while (isdigit(*line
))
474 return(*line
== '\0');
478 * Returns whether the line is of one of the forms:
482 * assuming that section_start is ".Sh".
485 name_section_line(char *line
, const char *section_start
)
490 if (strncmp(line
, section_start
, 3) != 0)
492 line
= skip_spaces(line
+ 3);
493 rhs
= trim_rhs(line
);
499 for (title
= name_section_titles
; *title
!= NULL
; title
++)
500 if (strcmp(*title
, line
) == 0)
506 * Copies characters while removing the most common nroff/troff
508 * \(em, \(mi, \s[+-N], \&
509 * \fF, \f(fo, \f[font]
510 * \*s, \*(st, \*[stringvar]
513 de_nroff_copy(char *from
, char *to
, int fromlen
)
515 char *from_end
= &from
[fromlen
];
516 while (from
< from_end
) {
521 if (strncmp(&from
[1], "em", 2) == 0 ||
522 strncmp(&from
[1], "mi", 2) == 0) {
530 while (isdigit(*from
))
537 else if (*from
== '[') {
538 while (*++from
!= ']' && from
< from_end
)
556 * Appends a string with the nroff formatting removed.
559 add_nroff(char *text
)
561 sbuf_append_edited(whatis_proto
, text
, de_nroff_copy
);
565 * Appends "name(suffix), " to whatis_final.
568 add_whatis_name(char *name
, char *suffix
)
571 sbuf_append_str(whatis_final
, name
);
572 sbuf_append(whatis_final
, "(", 1);
573 sbuf_append_str(whatis_final
, suffix
);
574 sbuf_append(whatis_final
, "), ", 3);
579 * Processes an old-style man(7) line. This ignores commands with only
580 * a single number argument.
583 process_man_line(char *line
)
586 while (isalpha(*++line
))
588 line
= skip_spaces(line
);
589 if (only_digits(line
))
592 line
= skip_spaces(line
);
595 sbuf_append(whatis_proto
, " ", 1);
600 * Processes a new-style mdoc(7) line.
603 process_mdoc_line(char *line
)
607 char *line_end
= &line
[strlen(line
)];
608 int orig_length
= sbuf_length(whatis_proto
);
613 if (line
[0] != '.' || !isupper(line
[1]) || !islower(line
[2])) {
614 add_nroff(skip_spaces(line
));
615 sbuf_append(whatis_proto
, " ", 1);
618 xref
= strncmp(line
, ".Xr", 3) == 0;
620 while ((line
= skip_spaces(line
)) < line_end
) {
624 next
= strchr(next
, '"');
627 memmove(next
, next
+ 1, strlen(next
));
634 next
= strpbrk(line
, " \t");
639 if (isupper(*line
) && islower(line
[1]) && line
[2] == '\0') {
640 if (strcmp(line
, "Ns") == 0) {
645 if (strstr(mdoc_commands
, line
) != NULL
) {
650 if (arg
> 0 && strchr(",.:;?!)]", *line
) == 0) {
652 sbuf_append(whatis_proto
, "(", 1);
654 sbuf_append(whatis_proto
, ")", 1);
658 sbuf_append(whatis_proto
, " ", 1);
664 if (sbuf_length(whatis_proto
) > orig_length
)
665 sbuf_append(whatis_proto
, " ", 1);
669 * Collects a list of comma-separated names from the text.
672 collect_names(StringList
*names
, char *text
)
678 text
= strchr(text
, ',');
689 enum { STATE_UNKNOWN
, STATE_MANSTYLE
, STATE_MDOCNAME
, STATE_MDOCDESC
};
692 * Processes a man page source into a single whatis line and adds it
696 process_page(struct page_info
*page
, char *section_dir
)
703 int state
= STATE_UNKNOWN
;
706 sbuf_clear(whatis_proto
);
707 if ((in
= gzopen(page
->filename
, "r")) == NULL
) {
708 warn("%s", page
->filename
);
712 while (gzgets(in
, buffer
, sizeof buffer
) != NULL
) {
714 if (strncmp(line
, ".\\\"", 3) == 0) /* ignore comments */
718 * haven't reached the NAME section yet.
721 if (name_section_line(line
, ".SH"))
722 state
= STATE_MANSTYLE
;
723 else if (name_section_line(line
, ".Sh"))
724 state
= STATE_MDOCNAME
;
727 * Inside an old-style .SH NAME section.
730 if (strncmp(line
, ".SH", 3) == 0)
733 if (strcmp(line
, ".") == 0)
735 if (strncmp(line
, ".IX", 3) == 0) {
737 line
= skip_spaces(line
);
739 process_man_line(line
);
742 * Inside a new-style .Sh NAME section (the .Nm part).
746 if (strncmp(line
, ".Nm", 3) == 0) {
747 process_mdoc_line(line
);
750 if (strcmp(line
, ".") == 0)
752 sbuf_append(whatis_proto
, "- ", 2);
753 state
= STATE_MDOCDESC
;
757 * Inside a new-style .Sh NAME section (after the .Nm-s).
760 if (strncmp(line
, ".Sh", 3) == 0)
763 if (strcmp(line
, ".") == 0)
765 process_mdoc_line(line
);
771 sbuf_strip(whatis_proto
, " \t.-");
772 line
= sbuf_content(whatis_proto
);
774 * line now contains the appropriate data, but without
775 * the proper indentation or the section appended to each name.
777 descr
= strstr(line
, " - ");
779 descr
= strchr(line
, ' ');
783 "\tignoring junk description \"%s\"\n",
793 collect_names(names
, line
);
794 sbuf_clear(whatis_final
);
795 if (!sl_find(names
, page
->name
) &&
796 no_page_exists(section_dir
, names
, page
->suffix
)) {
798 * Add the page name since that's the only thing that
801 add_whatis_name(page
->name
, page
->suffix
);
803 for (i
= 0; i
< names
->sl_cur
; i
++)
804 add_whatis_name(names
->sl_str
[i
], page
->suffix
);
806 sbuf_retract(whatis_final
, 2); /* remove last ", " */
807 while (sbuf_length(whatis_final
) < indent
)
808 sbuf_append(whatis_final
, " ", 1);
809 sbuf_append(whatis_final
, " - ", 3);
810 sbuf_append_str(whatis_final
, skip_spaces(descr
));
811 sl_add(whatis_lines
, strdup(sbuf_content(whatis_final
)));
815 * Sorts pages first by inode number, then by name.
818 pagesort(const void *a
, const void *b
)
820 const struct page_info
*p1
= *(const struct page_info
* const *)a
;
821 const struct page_info
*p2
= *(const struct page_info
* const *)b
;
822 if (p1
->inode
== p2
->inode
)
823 return(strcmp(p1
->name
, p2
->name
));
824 return(p1
->inode
- p2
->inode
);
828 * Processes a single man section.
831 process_section(char *section_dir
)
833 struct dirent
**entries
;
835 struct page_info
**pages
;
838 ino_t prev_inode
= 0;
841 fprintf(stderr
, " %s\n", section_dir
);
844 * scan the man section directory for pages
846 nentries
= scandir(section_dir
, &entries
, NULL
, alphasort
);
848 warn("%s", section_dir
);
853 * collect information about man pages
855 pages
= calloc(nentries
, sizeof(struct page_info
*));
856 for (i
= 0; i
< nentries
; i
++) {
857 struct page_info
*info
= new_page_info(section_dir
, entries
[i
]);
859 pages
[npages
++] = info
;
863 qsort(pages
, npages
, sizeof(struct page_info
*), pagesort
);
865 * process each unique page
867 for (i
= 0; i
< npages
; i
++) {
868 struct page_info
*page
= pages
[i
];
869 if (page
->inode
!= prev_inode
) {
870 prev_inode
= page
->inode
;
872 fprintf(stderr
, "\treading %s\n",
874 process_page(page
, section_dir
);
876 fprintf(stderr
, "\tskipping %s, duplicate\n",
878 free_page_info(page
);
884 * Returns whether the directory entry is a man page section.
887 select_sections(struct dirent
*entry
)
889 char *p
= &entry
->d_name
[3];
891 if (strncmp(entry
->d_name
, "man", 3) != 0)
901 * Processes a single top-level man directory by finding all the
902 * sub-directories named man* and processing each one in turn.
905 process_mandir(char *dir_name
)
907 struct dirent
**entries
;
913 if (already_visited(dir_name
))
916 fprintf(stderr
, "man directory %s\n", dir_name
);
917 nsections
= scandir(dir_name
, &entries
, select_sections
, alphasort
);
919 warn("%s", dir_name
);
923 if (common_output
== NULL
&& (fp
= open_whatis(dir_name
)) == NULL
)
925 for (i
= 0; i
< nsections
; i
++) {
926 char section_dir
[MAXPATHLEN
];
927 snprintf(section_dir
, sizeof section_dir
, "%s/%s", dir_name
,
929 process_section(section_dir
);
930 snprintf(section_dir
, sizeof section_dir
, "%s/%s/%s", dir_name
,
931 entries
[i
]->d_name
, machine
);
932 if (stat(section_dir
, &st
) == 0 && S_ISDIR(st
.st_mode
))
933 process_section(section_dir
);
937 if (common_output
== NULL
)
938 finish_whatis(fp
, dir_name
);
942 * Processes one argument, which may be a colon-separated list of
946 process_argument(const char *arg
)
954 err(1, "out of memory");
955 while ((dir
= strsep(&parg
, ":")) != NULL
) {
956 if (locale
!= NULL
) {
957 asprintf(&mandir
, "%s/%s", dir
, locale
);
958 process_mandir(mandir
);
960 if (lang_locale
!= NULL
) {
961 asprintf(&mandir
, "%s/%s", dir
, lang_locale
);
962 process_mandir(mandir
);
974 main(int argc
, char **argv
)
979 while ((opt
= getopt(argc
, argv
, "ai:n:o:vL")) != -1) {
985 indent
= atoi(optarg
);
988 whatis_name
= optarg
;
991 common_output
= optarg
;
997 locale
= getenv("LC_ALL");
999 locale
= getenv("LC_CTYPE");
1001 locale
= getenv("LANG");
1002 if (locale
!= NULL
) {
1003 char *sep
= strchr(locale
, '_');
1004 if (sep
!= NULL
&& isupper(sep
[1]) &&
1006 asprintf(&lang_locale
, "%.*s%s", sep
- locale
, locale
, &sep
[3]);
1011 fprintf(stderr
, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv
[0]);
1016 signal(SIGINT
, trap_signal
);
1017 signal(SIGHUP
, trap_signal
);
1018 signal(SIGQUIT
, trap_signal
);
1019 signal(SIGTERM
, trap_signal
);
1020 SLIST_INIT(&visited_dirs
);
1021 whatis_proto
= new_sbuf();
1022 whatis_final
= new_sbuf();
1024 if ((machine
= getenv("MACHINE")) == NULL
)
1027 if (common_output
!= NULL
&& (fp
= open_output(common_output
)) == NULL
)
1028 err(1, "%s", common_output
);
1029 if (optind
== argc
) {
1030 const char *manpath
= getenv("MANPATH");
1031 if (manpath
== NULL
)
1032 manpath
= DEFAULT_MANPATH
;
1033 process_argument(manpath
);
1035 while (optind
< argc
)
1036 process_argument(argv
[optind
++]);
1038 if (common_output
!= NULL
)
1039 finish_output(fp
, common_output
);