2 * Copyright (c) 2002 John Rochester
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * $FreeBSD: src/usr.bin/catman/catman.c,v 1.9 2003/06/10 02:18:00 ache Exp $
31 #include <sys/types.h>
33 #include <sys/param.h>
47 #define DEFAULT_MANPATH "/usr/share/man"
49 #define TOP_LEVEL_DIR 0 /* signifies a top-level man directory */
50 #define MAN_SECTION_DIR 1 /* signifies a man section directory */
51 #define UNKNOWN 2 /* signifies an unclassifiable directory */
53 #define TEST_EXISTS 0x01
55 #define TEST_FILE 0x04
56 #define TEST_READABLE 0x08
57 #define TEST_WRITABLE 0x10
58 #define TEST_EXECUTABLE 0x20
60 static int verbose
; /* -v flag: be verbose with warnings */
61 static int pretend
; /* -n, -p flags: print out what would be done
62 instead of actually doing it */
63 static int force
; /* -f flag: force overwriting all cat pages */
64 static int rm_junk
; /* -r flag: remove garbage pages */
65 static char *locale
; /* user's locale if -L is used */
66 static char *lang_locale
; /* short form of locale */
67 static const char *machine
;
68 static int exit_code
; /* exit code to use when finished */
71 * -T argument for nroff
73 static const char *nroff_device
= "ascii";
76 * Mapping from locale to nroff device
78 static const char *locale_device
[] = {
80 "ISO8859-1", "latin1",
81 "ISO8859-15", "latin1",
85 #define BZ2_CMD "bzip2"
86 #define BZ2_EXT ".bz2"
87 #define BZ2CAT_CMD "bz"
91 enum Ziptype
{NONE
, BZIP
, GZIP
};
94 static gid_t gids
[NGROUPS_MAX
];
96 static int starting_dir
;
97 static char tmp_file
[MAXPATHLEN
];
101 * A hashtable is an array of chains composed of this entry structure.
107 struct hash_entry
*next
;
110 #define HASHTABLE_ALLOC 16384 /* allocation for hashtable (power of 2) */
111 #define HASH_MASK (HASHTABLE_ALLOC - 1)
113 static struct hash_entry
*visited
[HASHTABLE_ALLOC
];
114 static struct hash_entry
*links
[HASHTABLE_ALLOC
];
117 * Inserts a string into a hashtable keyed by inode & device number.
120 insert_hashtable(struct hash_entry
**table
, ino_t inode_number
,
121 dev_t device_number
, const char *data
)
123 struct hash_entry
*new_entry
;
124 struct hash_entry
**chain
;
126 new_entry
= malloc(sizeof(struct hash_entry
));
127 if (new_entry
== NULL
)
128 err(1, "can't insert into hashtable");
129 chain
= &table
[inode_number
& HASH_MASK
];
130 new_entry
->inode_number
= inode_number
;
131 new_entry
->device_number
= device_number
;
132 new_entry
->data
= data
;
133 new_entry
->next
= *chain
;
138 * Finds a string in a hashtable keyed by inode & device number.
141 find_hashtable(struct hash_entry
**table
, ino_t inode_number
,
144 struct hash_entry
*chain
;
146 chain
= table
[inode_number
& HASH_MASK
];
147 while (chain
!= NULL
) {
148 if (chain
->inode_number
== inode_number
&&
149 chain
->device_number
== device_number
)
157 trap_signal(int sig __unused
)
159 if (tmp_file
[0] != '\0')
165 * Deals with junk files in the man or cat section directories.
168 junk(const char *mandir
, const char *name
, const char *reason
)
171 fprintf(stderr
, "%s/%s: %s\n", mandir
, name
, reason
);
173 fprintf(stderr
, "rm %s/%s\n", mandir
, name
);
174 if (!pretend
&& unlink(name
) < 0)
175 warn("%s/%s", mandir
, name
);
180 * Returns TOP_LEVEL_DIR for .../man, MAN_SECTION_DIR for .../manXXX,
181 * and UNKNOWN for everything else.
184 directory_type(char *dir
)
189 p
= strrchr(dir
, '/');
190 if (p
== NULL
|| p
[1] != '\0')
198 if (strncmp(p
, "man", 3) == 0) {
201 return TOP_LEVEL_DIR
;
202 while (isalnum((unsigned char)*p
) || *p
== '_') {
204 return(MAN_SECTION_DIR
);
211 * Tests whether the given file name (without a preceding path)
212 * is a proper man page name (like "dntpd.8.gz").
213 * Only alphanumerics and '_' are allowed after the last '.' and
214 * the last '.' can't be the first or last characters.
217 is_manpage_name(char *name
)
219 char *lastdot
= NULL
;
222 for (n
= name
; *n
!= '\0'; n
++) {
241 return(lastdot
> name
&& lastdot
+ 1 < n
);
245 is_bzipped(char *name
)
247 int len
= strlen(name
);
248 return(len
>= 5 && strcmp(&name
[len
- 4], BZ2_EXT
) == 0);
252 is_gzipped(char *name
)
254 int len
= strlen(name
);
255 return(len
>= 4 && strcmp(&name
[len
- 3], GZ_EXT
) == 0);
259 * Converts manXXX to catXXX.
262 get_cat_section(char *section
)
266 cat_section
= strdup(section
);
267 cat_section
[0] = 'c';
268 cat_section
[1] = 'a';
269 cat_section
[2] = 't';
274 * Tests to see if the given directory has already been visited.
277 already_visited(char *mandir
, char *dir
, int count_visit
)
281 if (stat(dir
, &st
) < 0) {
283 warn("%s/%s", mandir
, dir
);
289 if (find_hashtable(visited
, st
.st_ino
, st
.st_dev
) != NULL
) {
291 warnx("already visited %s/%s", mandir
, dir
);
293 warnx("already visited %s", dir
);
297 insert_hashtable(visited
, st
.st_ino
, st
.st_dev
, "");
302 * Returns a set of TEST_* bits describing a file's type and permissions.
303 * If mod_time isn't NULL, it will contain the file's modification time.
306 test_path(char *name
, time_t *mod_time
)
310 if (stat(name
, &test_st
) < 0)
312 result
= TEST_EXISTS
;
313 if (mod_time
!= NULL
)
314 *mod_time
= test_st
.st_mtime
;
315 if (S_ISDIR(test_st
.st_mode
))
317 else if (S_ISREG(test_st
.st_mode
))
319 if (test_st
.st_uid
== uid
) {
320 test_st
.st_mode
>>= 6;
323 for (i
= 0; i
< ngids
; i
++) {
324 if (test_st
.st_gid
== gids
[i
]) {
325 test_st
.st_mode
>>= 3;
330 if (test_st
.st_mode
& S_IROTH
)
331 result
|= TEST_READABLE
;
332 if (test_st
.st_mode
& S_IWOTH
)
333 result
|= TEST_WRITABLE
;
334 if (test_st
.st_mode
& S_IXOTH
)
335 result
|= TEST_EXECUTABLE
;
340 * Checks whether a file is a symbolic link.
343 is_symlink(char *path
)
347 return(lstat(path
, &st
) >= 0 && S_ISLNK(st
.st_mode
));
351 * Tests to see if the given directory can be written to.
354 check_writable(char *mandir
)
356 if (verbose
&& !(test_path(mandir
, NULL
) & TEST_WRITABLE
))
357 fprintf(stderr
, "%s: not writable - will only be able to write "
358 "to existing cat directories\n", mandir
);
362 * If the directory exists, attempt to make it writable, otherwise
363 * attempt to create it.
366 make_writable_dir(char *mandir
, char *dir
)
370 if ((test
= test_path(dir
, NULL
)) != 0) {
371 if (!(test
& TEST_WRITABLE
) && chmod(dir
, 0755) < 0) {
372 warn("%s/%s: chmod", mandir
, dir
);
377 if (verbose
|| pretend
)
378 fprintf(stderr
, "mkdir %s\n", dir
);
381 if (mkdir(dir
, 0755) < 0) {
382 warn("%s/%s: mkdir", mandir
, dir
);
392 * Processes a single man page source by using nroff to create
393 * the preformatted cat page.
396 process_page(char *mandir
, char *src
, char *cat
, enum Ziptype zipped
)
398 int src_test
, cat_test
;
399 time_t src_mtime
, cat_mtime
;
400 char cmd
[MAXPATHLEN
];
403 const char *link_name
;
405 src_test
= test_path(src
, &src_mtime
);
406 if (!(src_test
& (TEST_FILE
|TEST_READABLE
))) {
407 if (!(src_test
& TEST_DIR
)) {
408 warnx("%s/%s: unreadable", mandir
, src
);
410 if (rm_junk
&& is_symlink(src
))
411 junk(mandir
, src
, "bogus symlink");
415 src_dev
= test_st
.st_dev
;
416 src_ino
= test_st
.st_ino
;
417 cat_test
= test_path(cat
, &cat_mtime
);
418 if (cat_test
& (TEST_FILE
|TEST_READABLE
)) {
419 if (!force
&& cat_mtime
>= src_mtime
) {
421 fprintf(stderr
, "\t%s/%s: up to date\n",
427 * Is the man page a link to one we've already processed?
429 if ((link_name
= find_hashtable(links
, src_ino
, src_dev
)) != NULL
) {
430 if (verbose
|| pretend
)
431 fprintf(stderr
, "%slink %s -> %s\n",
432 verbose
? "\t" : "", cat
, link_name
);
434 link(link_name
, cat
);
437 insert_hashtable(links
, src_ino
, src_dev
, strdup(cat
));
438 if (verbose
|| pretend
) {
439 fprintf(stderr
, "%sformat %s -> %s\n",
440 verbose
? "\t" : "", src
, cat
);
444 snprintf(tmp_file
, sizeof tmp_file
, "%s.tmp", cat
);
445 snprintf(cmd
, sizeof cmd
,
446 "%scat %s | tbl | nroff -T%s -man | col | %s > %s.tmp",
447 zipped
== BZIP
? BZ2CAT_CMD
: zipped
== GZIP
? GZCAT_CMD
: "",
449 zipped
== BZIP
? BZ2_CMD
: zipped
== GZIP
? GZ_CMD
: "cat",
451 if (system(cmd
) != 0)
452 err(1, "formatting pipeline");
453 if (rename(tmp_file
, cat
) < 0)
459 * Scan the man section directory for pages and process each one,
460 * then check for junk in the corresponding cat section.
463 scan_section(char *mandir
, char *section
, char *cat_section
)
465 struct dirent
**entries
;
466 char **expected
= NULL
;
472 char page_path
[MAXPATHLEN
- 3]; /* allow for '.gz' addition */
473 char cat_path
[MAXPATHLEN
];
474 char zip_path
[MAXPATHLEN
];
477 * scan the man section directory for pages
479 npages
= scandir(section
, &entries
, NULL
, alphasort
);
481 warn("%s/%s", mandir
, section
);
485 if (verbose
|| rm_junk
) {
487 * Maintain a list of all cat pages that should exist,
488 * corresponding to existing man pages.
490 expected
= (char **) calloc(npages
, sizeof(char *));
492 for (i
= 0; i
< npages
; free(entries
[i
++])) {
493 page_name
= entries
[i
]->d_name
;
494 snprintf(page_path
, sizeof page_path
, "%s/%s", section
,
496 if (!is_manpage_name(page_name
)) {
497 if (!(test_path(page_path
, NULL
) & TEST_DIR
)) {
498 junk(mandir
, page_path
,
499 "invalid man page name");
503 zipped
= is_bzipped(page_name
) ? BZIP
:
504 is_gzipped(page_name
) ? GZIP
: NONE
;
505 if (zipped
!= NONE
) {
506 snprintf(cat_path
, sizeof cat_path
, "%s/%s",
507 cat_section
, page_name
);
508 if (expected
!= NULL
)
509 expected
[nexpected
++] = strdup(page_name
);
510 process_page(mandir
, page_path
, cat_path
, zipped
);
513 * We've got an uncompressed man page,
514 * check to see if there's a (preferred)
517 snprintf(zip_path
, sizeof zip_path
, "%s%s",
519 if (test_path(zip_path
, NULL
) != 0) {
520 junk(mandir
, page_path
,
521 "man page unused due to existing " GZ_EXT
);
525 "warning, %s is uncompressed\n",
528 snprintf(cat_path
, sizeof cat_path
, "%s/%s",
529 cat_section
, page_name
);
530 if (expected
!= NULL
) {
531 asprintf(&expected
[nexpected
++],
534 process_page(mandir
, page_path
, cat_path
, NONE
);
539 if (expected
== NULL
)
542 * scan cat sections for junk
544 npages
= scandir(cat_section
, &entries
, NULL
, alphasort
);
546 for (i
= 0; i
< npages
; free(entries
[i
++])) {
547 const char *junk_reason
;
550 page_name
= entries
[i
]->d_name
;
551 if (strcmp(page_name
, ".") == 0 || strcmp(page_name
, "..") == 0)
554 * Keep the index into the expected cat page list
555 * ahead of the name we've found.
557 while (e
< nexpected
&&
558 (cmp
= strcmp(page_name
, expected
[e
])) > 0)
562 /* we have an unexpected page */
563 snprintf(cat_path
, sizeof cat_path
, "%s/%s", cat_section
,
565 if (!is_manpage_name(page_name
)) {
566 if (test_path(cat_path
, NULL
) & TEST_DIR
)
568 junk_reason
= "invalid cat page name";
569 } else if (!is_gzipped(page_name
) && e
+ 1 < nexpected
&&
570 strncmp(page_name
, expected
[e
+ 1], strlen(page_name
)) == 0 &&
571 strlen(expected
[e
+ 1]) == strlen(page_name
) + 3) {
572 junk_reason
= "cat page unused due to existing " GZ_EXT
;
574 junk_reason
= "cat page without man page";
575 junk(mandir
, cat_path
, junk_reason
);
578 while (e
< nexpected
)
585 * Processes a single man section.
588 process_section(char *mandir
, char *section
)
592 if (already_visited(mandir
, section
, 1))
595 fprintf(stderr
, " section %s\n", section
);
596 cat_section
= get_cat_section(section
);
597 if (make_writable_dir(mandir
, cat_section
))
598 scan_section(mandir
, section
, cat_section
);
603 select_sections(const struct dirent
*entry
)
608 name
= strdup(entry
->d_name
);
609 ret
= directory_type(name
) == MAN_SECTION_DIR
;
615 * Processes a single top-level man directory. If section isn't NULL,
616 * it will only process that section sub-directory, otherwise it will
617 * process all of them.
620 process_mandir(char *dir_name
, char *section
)
622 fchdir(starting_dir
);
623 if (already_visited(NULL
, dir_name
, section
== NULL
))
625 check_writable(dir_name
);
627 fprintf(stderr
, "man directory %s\n", dir_name
);
629 fprintf(stderr
, "cd %s\n", dir_name
);
630 if (chdir(dir_name
) < 0) {
631 warn("%s: chdir", dir_name
);
635 if (section
!= NULL
) {
636 process_section(dir_name
, section
);
638 struct dirent
**entries
;
643 nsections
= scandir(".", &entries
, select_sections
, alphasort
);
645 warn("%s", dir_name
);
649 for (i
= 0; i
< nsections
; i
++) {
650 process_section(dir_name
, entries
[i
]->d_name
);
651 asprintf(&machine_dir
, "%s/%s", entries
[i
]->d_name
,
653 if (test_path(machine_dir
, NULL
) & TEST_DIR
)
654 process_section(dir_name
, machine_dir
);
663 * Processes one argument, which may be a colon-separated list of
667 process_argument(const char *arg
)
676 err(1, "out of memory");
677 while ((dir
= strsep(&parg
, ":")) != NULL
) {
678 switch (directory_type(dir
)) {
680 if (locale
!= NULL
) {
681 asprintf(&mandir
, "%s/%s", dir
, locale
);
682 process_mandir(mandir
, NULL
);
684 if (lang_locale
!= NULL
) {
685 asprintf(&mandir
, "%s/%s", dir
,
687 process_mandir(mandir
, NULL
);
691 process_mandir(dir
, NULL
);
694 case MAN_SECTION_DIR
: {
695 mandir
= strdup(dirname(dir
));
696 section
= strdup(basename(dir
));
697 process_mandir(mandir
, section
);
703 warnx("%s: directory name not in proper man form", dir
);
711 determine_locale(void)
715 if ((locale
= setlocale(LC_CTYPE
, "")) == NULL
) {
716 warnx("-L option used, but no locale found\n");
719 sep
= strchr(locale
, '_');
720 if (sep
!= NULL
&& isupper(sep
[1]) && isupper(sep
[2]))
721 asprintf(&lang_locale
, "%.*s%s", (int)(sep
- locale
), locale
,
723 sep
= nl_langinfo(CODESET
);
724 if (sep
!= NULL
&& *sep
!= '\0' && strcmp(sep
, "US-ASCII") != 0) {
727 for (i
= 0; locale_device
[i
] != NULL
; i
+= 2) {
728 if (strcmp(sep
, locale_device
[i
]) == 0) {
729 nroff_device
= locale_device
[i
+ 1];
735 if (lang_locale
!= NULL
)
736 fprintf(stderr
, "short locale is %s\n", lang_locale
);
737 fprintf(stderr
, "nroff device is %s\n", nroff_device
);
744 fprintf(stderr
, "usage: %s [-fLnrv] [directories ...]\n",
750 main(int argc
, char **argv
)
754 if ((uid
= getuid()) == 0) {
755 fprintf(stderr
, "don't run %s as root, use:\n echo", argv
[0]);
756 for (optind
= 0; optind
< argc
; optind
++)
757 fprintf(stderr
, " %s", argv
[optind
]);
758 fprintf(stderr
, " | nice -5 su -m man\n");
761 while ((opt
= getopt(argc
, argv
, "vnfLrh")) != -1) {
783 ngids
= getgroups(NGROUPS_MAX
, gids
);
784 if ((starting_dir
= open(".", 0)) < 0)
787 signal(SIGINT
, trap_signal
);
788 signal(SIGHUP
, trap_signal
);
789 signal(SIGQUIT
, trap_signal
);
790 signal(SIGTERM
, trap_signal
);
792 if ((machine
= getenv("MACHINE")) == NULL
)
795 if (optind
== argc
) {
796 const char *manpath
= getenv("MANPATH");
798 manpath
= DEFAULT_MANPATH
;
799 process_argument(manpath
);
801 while (optind
< argc
)
802 process_argument(argv
[optind
++]);