mail(1): Invert calloc(3) argument order.
[freebsd-src.git] / usr.bin / catman / catman.c
blob47906f5c35f3161e994e27a99856c8e39ef245d4
1 /*-
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/param.h>
35 #include <sys/utsname.h>
37 #include <assert.h>
38 #include <ctype.h>
39 #include <dirent.h>
40 #include <err.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <locale.h>
44 #include <langinfo.h>
45 #include <libgen.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
51 #define DEFAULT_MANPATH "/usr/share/man"
53 #define TOP_LEVEL_DIR 0 /* signifies a top-level man directory */
54 #define MAN_SECTION_DIR 1 /* signifies a man section directory */
55 #define UNKNOWN 2 /* signifies an unclassifiable directory */
57 #define TEST_EXISTS 0x01
58 #define TEST_DIR 0x02
59 #define TEST_FILE 0x04
60 #define TEST_READABLE 0x08
61 #define TEST_WRITABLE 0x10
63 static int verbose; /* -v flag: be verbose with warnings */
64 static int pretend; /* -n, -p flags: print out what would be done
65 instead of actually doing it */
66 static int force; /* -f flag: force overwriting all cat pages */
67 static int rm_junk; /* -r flag: remove garbage pages */
68 static char *locale; /* user's locale if -L is used */
69 static char *lang_locale; /* short form of locale */
70 static const char *machine, *machine_arch;
71 static int exit_code; /* exit code to use when finished */
74 * -T argument for nroff
76 static const char *nroff_device = "ascii";
79 * Mapping from locale to nroff device
81 static const char *locale_device[] = {
82 "KOI8-R", "koi8-r",
83 "ISO8859-1", "latin1",
84 "ISO8859-15", "latin1",
85 NULL
88 #define BZ2_CMD "bzip2"
89 #define BZ2_EXT ".bz2"
90 #define BZ2CAT_CMD "bz"
91 #define GZ_CMD "gzip"
92 #define GZ_EXT ".gz"
93 #define GZCAT_CMD "z"
94 enum Ziptype {NONE, BZIP, GZIP};
96 static uid_t uid;
97 static int starting_dir;
98 static char tmp_file[MAXPATHLEN];
99 static struct stat test_st;
102 * A hashtable is an array of chains composed of this entry structure.
104 struct hash_entry {
105 ino_t inode_number;
106 dev_t device_number;
107 const char *data;
108 struct hash_entry *next;
111 #define HASHTABLE_ALLOC 16384 /* allocation for hashtable (power of 2) */
112 #define HASH_MASK (HASHTABLE_ALLOC - 1)
114 static struct hash_entry *visited[HASHTABLE_ALLOC];
115 static struct hash_entry *links[HASHTABLE_ALLOC];
118 * Inserts a string into a hashtable keyed by inode & device number.
120 static void
121 insert_hashtable(struct hash_entry **table,
122 ino_t inode_number,
123 dev_t device_number,
124 const char *data)
126 struct hash_entry *new_entry;
127 struct hash_entry **chain;
129 new_entry = (struct hash_entry *) malloc(sizeof(struct hash_entry));
130 if (new_entry == NULL)
131 err(1, "can't insert into hashtable");
132 chain = &table[inode_number & HASH_MASK];
133 new_entry->inode_number = inode_number;
134 new_entry->device_number = device_number;
135 new_entry->data = data;
136 new_entry->next = *chain;
137 *chain = new_entry;
141 * Finds a string in a hashtable keyed by inode & device number.
143 static const char *
144 find_hashtable(struct hash_entry **table,
145 ino_t inode_number,
146 dev_t device_number)
148 struct hash_entry *chain;
150 chain = table[inode_number & HASH_MASK];
151 while (chain != NULL) {
152 if (chain->inode_number == inode_number &&
153 chain->device_number == device_number)
154 return chain->data;
155 chain = chain->next;
157 return NULL;
160 static void
161 trap_signal(int sig __unused)
163 if (tmp_file[0] != '\0')
164 unlink(tmp_file);
165 exit(1);
169 * Deals with junk files in the man or cat section directories.
171 static void
172 junk(const char *mandir, const char *name, const char *reason)
174 if (verbose)
175 fprintf(stderr, "%s/%s: %s\n", mandir, name, reason);
176 if (rm_junk) {
177 fprintf(stderr, "rm %s/%s\n", mandir, name);
178 if (!pretend && unlink(name) < 0)
179 warn("%s/%s", mandir, name);
184 * Returns TOP_LEVEL_DIR for .../man, MAN_SECTION_DIR for .../manXXX,
185 * and UNKNOWN for everything else.
187 static int
188 directory_type(char *dir)
190 char *p;
192 for (;;) {
193 p = strrchr(dir, '/');
194 if (p == NULL || p[1] != '\0')
195 break;
196 *p = '\0';
198 if (p == NULL)
199 p = dir;
200 else
201 p++;
202 if (strncmp(p, "man", 3) == 0) {
203 p += 3;
204 if (*p == '\0')
205 return TOP_LEVEL_DIR;
206 while (isalnum((unsigned char)*p) || *p == '_') {
207 if (*++p == '\0')
208 return MAN_SECTION_DIR;
211 return UNKNOWN;
215 * Tests whether the given file name (without a preceding path)
216 * is a proper man page name (like "mk-amd-map.8.gz").
217 * Only alphanumerics and '_' are allowed after the last '.' and
218 * the last '.' can't be the first or last characters.
220 static int
221 is_manpage_name(char *name)
223 char *lastdot = NULL;
224 char *n = name;
226 while (*n != '\0') {
227 if (!isalnum((unsigned char)*n)) {
228 switch (*n) {
229 case '_':
230 break;
231 case '-':
232 case '+':
233 case '[':
234 case ':':
235 lastdot = NULL;
236 break;
237 case '.':
238 lastdot = n;
239 break;
240 default:
241 return 0;
244 n++;
246 return lastdot > name && lastdot + 1 < n;
249 static int
250 is_bzipped(char *name)
252 int len = strlen(name);
253 return len >= 5 && strcmp(&name[len - 4], BZ2_EXT) == 0;
256 static int
257 is_gzipped(char *name)
259 int len = strlen(name);
260 return len >= 4 && strcmp(&name[len - 3], GZ_EXT) == 0;
264 * Converts manXXX to catXXX.
266 static char *
267 get_cat_section(char *section)
269 char *cat_section;
271 cat_section = strdup(section);
272 assert(strlen(section) > 3 && strncmp(section, "man", 3) == 0);
273 memcpy(cat_section, "cat", 3);
274 return cat_section;
278 * Tests to see if the given directory has already been visited.
280 static int
281 already_visited(char *mandir, char *dir, int count_visit)
283 struct stat st;
285 if (stat(dir, &st) < 0) {
286 if (mandir != NULL)
287 warn("%s/%s", mandir, dir);
288 else
289 warn("%s", dir);
290 exit_code = 1;
291 return 1;
293 if (find_hashtable(visited, st.st_ino, st.st_dev) != NULL) {
294 if (mandir != NULL)
295 warnx("already visited %s/%s", mandir, dir);
296 else
297 warnx("already visited %s", dir);
298 return 1;
300 if (count_visit)
301 insert_hashtable(visited, st.st_ino, st.st_dev, "");
302 return 0;
306 * Returns a set of TEST_* bits describing a file's type and permissions.
307 * If mod_time isn't NULL, it will contain the file's modification time.
309 static int
310 test_path(char *name, time_t *mod_time)
312 int result;
314 if (stat(name, &test_st) < 0)
315 return 0;
316 result = TEST_EXISTS;
317 if (mod_time != NULL)
318 *mod_time = test_st.st_mtime;
319 if (S_ISDIR(test_st.st_mode))
320 result |= TEST_DIR;
321 else if (S_ISREG(test_st.st_mode))
322 result |= TEST_FILE;
323 if (access(name, R_OK))
324 result |= TEST_READABLE;
325 if (access(name, W_OK))
326 result |= TEST_WRITABLE;
327 return result;
331 * Checks whether a file is a symbolic link.
333 static int
334 is_symlink(char *path)
336 struct stat st;
338 return lstat(path, &st) >= 0 && S_ISLNK(st.st_mode);
342 * Tests to see if the given directory can be written to.
344 static void
345 check_writable(char *mandir)
347 if (verbose && !(test_path(mandir, NULL) & TEST_WRITABLE))
348 fprintf(stderr, "%s: not writable - will only be able to write to existing cat directories\n", mandir);
352 * If the directory exists, attempt to make it writable, otherwise
353 * attempt to create it.
355 static int
356 make_writable_dir(char *mandir, char *dir)
358 int test;
360 if ((test = test_path(dir, NULL)) != 0) {
361 if (!(test & TEST_WRITABLE) && chmod(dir, 0755) < 0) {
362 warn("%s/%s: chmod", mandir, dir);
363 exit_code = 1;
364 return 0;
366 } else {
367 if (verbose || pretend)
368 fprintf(stderr, "mkdir %s\n", dir);
369 if (!pretend) {
370 unlink(dir);
371 if (mkdir(dir, 0755) < 0) {
372 warn("%s/%s: mkdir", mandir, dir);
373 exit_code = 1;
374 return 0;
378 return 1;
382 * Processes a single man page source by using nroff to create
383 * the preformatted cat page.
385 static void
386 process_page(char *mandir, char *src, char *cat, enum Ziptype zipped)
388 int src_test, cat_test;
389 time_t src_mtime, cat_mtime;
390 char cmd[MAXPATHLEN];
391 dev_t src_dev;
392 ino_t src_ino;
393 const char *link_name;
395 src_test = test_path(src, &src_mtime);
396 if (!(src_test & (TEST_FILE|TEST_READABLE))) {
397 if (!(src_test & TEST_DIR)) {
398 warnx("%s/%s: unreadable", mandir, src);
399 exit_code = 1;
400 if (rm_junk && is_symlink(src))
401 junk(mandir, src, "bogus symlink");
403 return;
405 src_dev = test_st.st_dev;
406 src_ino = test_st.st_ino;
407 cat_test = test_path(cat, &cat_mtime);
408 if (cat_test & (TEST_FILE|TEST_READABLE)) {
409 if (!force && cat_mtime >= src_mtime) {
410 if (verbose) {
411 fprintf(stderr, "\t%s/%s: up to date\n",
412 mandir, src);
414 return;
418 * Is the man page a link to one we've already processed?
420 if ((link_name = find_hashtable(links, src_ino, src_dev)) != NULL) {
421 if (verbose || pretend) {
422 fprintf(stderr, "%slink %s -> %s\n",
423 verbose ? "\t" : "", cat, link_name);
425 if (!pretend) {
426 (void) unlink(cat);
427 if (link(link_name, cat) < 0)
428 warn("%s %s: link", link_name, cat);
430 return;
432 insert_hashtable(links, src_ino, src_dev, strdup(cat));
433 if (verbose || pretend) {
434 fprintf(stderr, "%sformat %s -> %s\n",
435 verbose ? "\t" : "", src, cat);
436 if (pretend)
437 return;
439 snprintf(tmp_file, sizeof tmp_file, "%s.tmp", cat);
440 snprintf(cmd, sizeof cmd,
441 "%scat %s | tbl | nroff -c -T%s -man | %s > %s.tmp",
442 zipped == BZIP ? BZ2CAT_CMD : zipped == GZIP ? GZCAT_CMD : "",
443 src, nroff_device,
444 zipped == BZIP ? BZ2_CMD : zipped == GZIP ? GZ_CMD : "cat",
445 cat);
446 if (system(cmd) != 0)
447 err(1, "formatting pipeline");
448 if (rename(tmp_file, cat) < 0)
449 warn("%s", cat);
450 tmp_file[0] = '\0';
454 * Scan the man section directory for pages and process each one,
455 * then check for junk in the corresponding cat section.
457 static void
458 scan_section(char *mandir, char *section, char *cat_section)
460 struct dirent **entries;
461 char **expected = NULL;
462 int npages;
463 int nexpected = 0;
464 int i, e;
465 enum Ziptype zipped;
466 char *page_name;
467 char page_path[MAXPATHLEN];
468 char cat_path[MAXPATHLEN];
469 char zip_path[MAXPATHLEN];
472 * scan the man section directory for pages
474 npages = scandir(section, &entries, NULL, alphasort);
475 if (npages < 0) {
476 warn("%s/%s", mandir, section);
477 exit_code = 1;
478 return;
480 if (verbose || rm_junk) {
482 * Maintain a list of all cat pages that should exist,
483 * corresponding to existing man pages.
485 expected = (char **) calloc(npages, sizeof(char *));
487 for (i = 0; i < npages; free(entries[i++])) {
488 page_name = entries[i]->d_name;
489 snprintf(page_path, sizeof page_path, "%s/%s", section,
490 page_name);
491 if (!is_manpage_name(page_name)) {
492 if (!(test_path(page_path, NULL) & TEST_DIR)) {
493 junk(mandir, page_path,
494 "invalid man page name");
496 continue;
498 zipped = is_bzipped(page_name) ? BZIP :
499 is_gzipped(page_name) ? GZIP : NONE;
500 if (zipped != NONE) {
501 snprintf(cat_path, sizeof cat_path, "%s/%s",
502 cat_section, page_name);
503 if (expected != NULL)
504 expected[nexpected++] = strdup(page_name);
505 process_page(mandir, page_path, cat_path, zipped);
506 } else {
508 * We've got an uncompressed man page,
509 * check to see if there's a (preferred)
510 * compressed one.
512 snprintf(zip_path, sizeof zip_path, "%s%s",
513 page_path, GZ_EXT);
514 if (test_path(zip_path, NULL) != 0) {
515 junk(mandir, page_path,
516 "man page unused due to existing " GZ_EXT);
517 } else {
518 if (verbose) {
519 fprintf(stderr,
520 "warning, %s is uncompressed\n",
521 page_path);
523 snprintf(cat_path, sizeof cat_path, "%s/%s",
524 cat_section, page_name);
525 if (expected != NULL) {
526 asprintf(&expected[nexpected++],
527 "%s", page_name);
529 process_page(mandir, page_path, cat_path, NONE);
533 free(entries);
534 if (expected == NULL)
535 return;
537 * scan cat sections for junk
539 npages = scandir(cat_section, &entries, NULL, alphasort);
540 e = 0;
541 for (i = 0; i < npages; free(entries[i++])) {
542 const char *junk_reason;
543 int cmp = 1;
545 page_name = entries[i]->d_name;
546 if (strcmp(page_name, ".") == 0 || strcmp(page_name, "..") == 0)
547 continue;
549 * Keep the index into the expected cat page list
550 * ahead of the name we've found.
552 while (e < nexpected &&
553 (cmp = strcmp(page_name, expected[e])) > 0)
554 free(expected[e++]);
555 if (cmp == 0)
556 continue;
557 /* we have an unexpected page */
558 snprintf(cat_path, sizeof cat_path, "%s/%s", cat_section,
559 page_name);
560 if (!is_manpage_name(page_name)) {
561 if (test_path(cat_path, NULL) & TEST_DIR)
562 continue;
563 junk_reason = "invalid cat page name";
564 } else if (!is_gzipped(page_name) && e + 1 < nexpected &&
565 strncmp(page_name, expected[e + 1], strlen(page_name)) == 0 &&
566 strlen(expected[e + 1]) == strlen(page_name) + 3) {
567 junk_reason = "cat page unused due to existing " GZ_EXT;
568 } else
569 junk_reason = "cat page without man page";
570 junk(mandir, cat_path, junk_reason);
572 free(entries);
573 while (e < nexpected)
574 free(expected[e++]);
575 free(expected);
580 * Processes a single man section.
582 static void
583 process_section(char *mandir, char *section)
585 char *cat_section;
587 if (already_visited(mandir, section, 1))
588 return;
589 if (verbose)
590 fprintf(stderr, " section %s\n", section);
591 cat_section = get_cat_section(section);
592 if (make_writable_dir(mandir, cat_section))
593 scan_section(mandir, section, cat_section);
594 free(cat_section);
597 static int
598 select_sections(const struct dirent *entry)
600 char *name;
601 int ret;
603 name = strdup(entry->d_name);
604 ret = directory_type(name) == MAN_SECTION_DIR;
605 free(name);
606 return (ret);
610 * Processes a single top-level man directory. If section isn't NULL,
611 * it will only process that section sub-directory, otherwise it will
612 * process all of them.
614 static void
615 process_mandir(char *dir_name, char *section)
617 if (fchdir(starting_dir) < 0)
618 err(1, "fchdir");
619 if (already_visited(NULL, dir_name, section == NULL))
620 return;
621 check_writable(dir_name);
622 if (verbose)
623 fprintf(stderr, "man directory %s\n", dir_name);
624 if (pretend)
625 fprintf(stderr, "cd %s\n", dir_name);
626 if (chdir(dir_name) < 0) {
627 warn("%s: chdir", dir_name);
628 exit_code = 1;
629 return;
631 if (section != NULL) {
632 process_section(dir_name, section);
633 } else {
634 struct dirent **entries;
635 char *machine_dir, *arch_dir;
636 int nsections;
637 int i;
639 nsections = scandir(".", &entries, select_sections, alphasort);
640 if (nsections < 0) {
641 warn("%s", dir_name);
642 exit_code = 1;
643 return;
645 for (i = 0; i < nsections; i++) {
646 process_section(dir_name, entries[i]->d_name);
647 asprintf(&machine_dir, "%s/%s", entries[i]->d_name,
648 machine);
649 if (test_path(machine_dir, NULL) & TEST_DIR)
650 process_section(dir_name, machine_dir);
651 free(machine_dir);
652 if (strcmp(machine_arch, machine) != 0) {
653 asprintf(&arch_dir, "%s/%s", entries[i]->d_name,
654 machine_arch);
655 if (test_path(arch_dir, NULL) & TEST_DIR)
656 process_section(dir_name, arch_dir);
657 free(arch_dir);
659 free(entries[i]);
661 free(entries);
666 * Processes one argument, which may be a colon-separated list of
667 * directories.
669 static void
670 process_argument(const char *arg)
672 char *dir;
673 char *mandir;
674 char *section;
675 char *parg;
677 parg = strdup(arg);
678 if (parg == NULL)
679 err(1, "out of memory");
680 while ((dir = strsep(&parg, ":")) != NULL) {
681 switch (directory_type(dir)) {
682 case TOP_LEVEL_DIR:
683 if (locale != NULL) {
684 asprintf(&mandir, "%s/%s", dir, locale);
685 process_mandir(mandir, NULL);
686 free(mandir);
687 if (lang_locale != NULL) {
688 asprintf(&mandir, "%s/%s", dir,
689 lang_locale);
690 process_mandir(mandir, NULL);
691 free(mandir);
693 } else {
694 process_mandir(dir, NULL);
696 break;
697 case MAN_SECTION_DIR: {
698 mandir = strdup(dirname(dir));
699 section = strdup(basename(dir));
700 process_mandir(mandir, section);
701 free(mandir);
702 free(section);
703 break;
705 default:
706 warnx("%s: directory name not in proper man form", dir);
707 exit_code = 1;
710 free(parg);
713 static void
714 determine_locale(void)
716 char *sep;
718 if ((locale = setlocale(LC_CTYPE, "")) == NULL) {
719 warnx("-L option used, but no locale found\n");
720 return;
722 sep = strchr(locale, '_');
723 if (sep != NULL && isupper((unsigned char)sep[1])
724 && isupper((unsigned char)sep[2])) {
725 asprintf(&lang_locale, "%.*s%s", (int)(sep - locale),
726 locale, &sep[3]);
728 sep = nl_langinfo(CODESET);
729 if (sep != NULL && *sep != '\0' && strcmp(sep, "US-ASCII") != 0) {
730 int i;
732 for (i = 0; locale_device[i] != NULL; i += 2) {
733 if (strcmp(sep, locale_device[i]) == 0) {
734 nroff_device = locale_device[i + 1];
735 break;
739 if (verbose) {
740 if (lang_locale != NULL)
741 fprintf(stderr, "short locale is %s\n", lang_locale);
742 fprintf(stderr, "nroff device is %s\n", nroff_device);
746 static void
747 usage(void)
749 fprintf(stderr, "usage: %s [-fLnrv] [directories ...]\n",
750 getprogname());
751 exit(1);
755 main(int argc, char **argv)
757 int opt;
759 if ((uid = getuid()) == 0) {
760 fprintf(stderr, "don't run %s as root, use:\n echo", argv[0]);
761 for (optind = 0; optind < argc; optind++) {
762 fprintf(stderr, " %s", argv[optind]);
764 fprintf(stderr, " | nice -5 su -m man\n");
765 exit(1);
767 while ((opt = getopt(argc, argv, "vnfLrh")) != -1) {
768 switch (opt) {
769 case 'f':
770 force++;
771 break;
772 case 'L':
773 determine_locale();
774 break;
775 case 'n':
776 pretend++;
777 break;
778 case 'r':
779 rm_junk++;
780 break;
781 case 'v':
782 verbose++;
783 break;
784 default:
785 usage();
786 /* NOTREACHED */
789 if ((starting_dir = open(".", 0)) < 0) {
790 err(1, ".");
792 umask(022);
793 signal(SIGINT, trap_signal);
794 signal(SIGHUP, trap_signal);
795 signal(SIGQUIT, trap_signal);
796 signal(SIGTERM, trap_signal);
798 if ((machine = getenv("MACHINE")) == NULL) {
799 static struct utsname utsname;
801 if (uname(&utsname) == -1)
802 err(1, "uname");
803 machine = utsname.machine;
806 if ((machine_arch = getenv("MACHINE_ARCH")) == NULL)
807 machine_arch = MACHINE_ARCH;
809 if (optind == argc) {
810 const char *manpath = getenv("MANPATH");
811 if (manpath == NULL)
812 manpath = DEFAULT_MANPATH;
813 process_argument(manpath);
814 } else {
815 while (optind < argc)
816 process_argument(argv[optind++]);
818 exit(exit_code);