5292 findunref is both slow and broken
[illumos-gate.git] / usr / src / tools / findunref / findunref.c
blob649527200491549433a495f3977bd9cc61ca0984
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
21 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
22 * Use is subject to license terms.
26 * Finds all unreferenced files in a source tree that do not match a list of
27 * permitted pathnames.
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fnmatch.h>
33 #include <ftw.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
45 * Pathname set: a simple datatype for storing pathname pattern globs and
46 * for checking whether a given pathname is matched by a pattern glob in
47 * the set.
49 typedef struct {
50 char **paths;
51 unsigned int npath;
52 unsigned int maxpaths;
53 } pnset_t;
56 * Data associated with the current SCM manifest.
58 typedef struct scmdata {
59 pnset_t *manifest;
60 char metapath[MAXPATHLEN];
61 char root[MAXPATHLEN];
62 unsigned int rootlen;
63 boolean_t rootwarn;
64 } scmdata_t;
67 * Hooks used to check if a given unreferenced file is known to an SCM
68 * (currently Git, Mercurial and TeamWare).
70 typedef int checkscm_func_t(const char *, const struct FTW *);
71 typedef void chdirscm_func_t(const char *);
73 typedef struct {
74 const char *name;
75 checkscm_func_t *checkfunc;
76 chdirscm_func_t *chdirfunc;
77 } scm_t;
79 static checkscm_func_t check_tw, check_scmdata;
80 static chdirscm_func_t chdir_hg, chdir_git;
81 static int pnset_add(pnset_t *, const char *);
82 static int pnset_check(const pnset_t *, const char *);
83 static void pnset_empty(pnset_t *);
84 static void pnset_free(pnset_t *);
85 static int checkpath(const char *, const struct stat *, int, struct FTW *);
86 static pnset_t *make_exset(const char *);
87 static void warn(const char *, ...);
88 static void die(const char *, ...);
90 static const scm_t scms[] = {
91 { "tw", check_tw, NULL },
92 { "teamware", check_tw, NULL },
93 { "hg", check_scmdata, chdir_hg },
94 { "mercurial", check_scmdata, chdir_hg },
95 { "git", check_scmdata, chdir_git },
96 { NULL, NULL, NULL }
99 static const scm_t *scm;
100 static scmdata_t scmdata;
101 static time_t tstamp; /* timestamp to compare files to */
102 static pnset_t *exsetp; /* pathname globs to ignore */
103 static const char *progname;
106 main(int argc, char *argv[])
108 int c;
109 char path[MAXPATHLEN];
110 char subtree[MAXPATHLEN] = "./";
111 char *tstampfile = ".build.tstamp";
112 struct stat tsstat;
114 progname = strrchr(argv[0], '/');
115 if (progname == NULL)
116 progname = argv[0];
117 else
118 progname++;
120 while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
121 switch (c) {
122 case 'a':
123 /* for compatibility; now the default */
124 break;
126 case 's':
127 (void) strlcat(subtree, optarg, MAXPATHLEN);
128 break;
130 case 't':
131 tstampfile = optarg;
132 break;
134 case 'S':
135 for (scm = scms; scm->name != NULL; scm++) {
136 if (strcmp(scm->name, optarg) == 0)
137 break;
139 if (scm->name == NULL)
140 die("unsupported SCM `%s'\n", optarg);
141 break;
143 default:
144 case '?':
145 goto usage;
149 argc -= optind;
150 argv += optind;
152 if (argc != 2) {
153 usage: (void) fprintf(stderr, "usage: %s [-s <subtree>] "
154 "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
155 progname);
156 return (EXIT_FAILURE);
160 * Interpret a relative timestamp path as relative to srcroot.
162 if (tstampfile[0] == '/')
163 (void) strlcpy(path, tstampfile, MAXPATHLEN);
164 else
165 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
167 if (stat(path, &tsstat) == -1)
168 die("cannot stat timestamp file \"%s\"", path);
169 tstamp = tsstat.st_mtime;
172 * Create the exception pathname set.
174 exsetp = make_exset(argv[1]);
175 if (exsetp == NULL)
176 die("cannot make exception pathname set\n");
179 * Walk the specified subtree of the tree rooted at argv[0].
181 if (chdir(argv[0]) == -1)
182 die("cannot change directory to \"%s\"", argv[0]);
184 if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
185 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
187 pnset_empty(exsetp);
188 return (EXIT_SUCCESS);
192 * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
194 static pnset_t *
195 hg_manifest(const char *hgroot)
197 FILE *fp = NULL;
198 char *hgcmd = NULL;
199 char *newline;
200 pnset_t *pnsetp;
201 char path[MAXPATHLEN];
203 pnsetp = calloc(sizeof (pnset_t), 1);
204 if (pnsetp == NULL ||
205 asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
206 goto fail;
208 fp = popen(hgcmd, "r");
209 if (fp == NULL)
210 goto fail;
212 while (fgets(path, sizeof (path), fp) != NULL) {
213 newline = strrchr(path, '\n');
214 if (newline != NULL)
215 *newline = '\0';
217 if (pnset_add(pnsetp, path) == 0)
218 goto fail;
221 (void) pclose(fp);
222 free(hgcmd);
223 return (pnsetp);
224 fail:
225 warn("cannot load hg manifest at %s", hgroot);
226 if (fp != NULL)
227 (void) pclose(fp);
228 free(hgcmd);
229 pnset_free(pnsetp);
230 return (NULL);
234 * Load and return a pnset for the manifest for the Git repo at `gitroot'.
236 static pnset_t *
237 git_manifest(const char *gitroot)
239 FILE *fp = NULL;
240 char *gitcmd = NULL;
241 char *newline;
242 pnset_t *pnsetp;
243 char path[MAXPATHLEN];
245 pnsetp = calloc(sizeof (pnset_t), 1);
246 if (pnsetp == NULL ||
247 asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
248 goto fail;
250 fp = popen(gitcmd, "r");
251 if (fp == NULL)
252 goto fail;
254 while (fgets(path, sizeof (path), fp) != NULL) {
255 newline = strrchr(path, '\n');
256 if (newline != NULL)
257 *newline = '\0';
259 if (pnset_add(pnsetp, path) == 0)
260 goto fail;
263 (void) pclose(fp);
264 free(gitcmd);
265 return (pnsetp);
266 fail:
267 warn("cannot load git manifest at %s", gitroot);
268 if (fp != NULL)
269 (void) pclose(fp);
270 free(gitcmd);
271 pnset_free(pnsetp);
272 return (NULL);
276 * If necessary, change our active manifest to be appropriate for `path'.
278 static void
279 chdir_scmdata(const char *path, const char *meta,
280 pnset_t *(*manifest_func)(const char *path))
282 char scmpath[MAXPATHLEN];
283 char basepath[MAXPATHLEN];
284 char *slash;
286 (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
289 * Change our active manifest if any one of the following is true:
291 * 1. No manifest is loaded. Find the nearest SCM root to load from.
293 * 2. A manifest is loaded, but we've moved into a directory with
294 * its own metadata directory (e.g., usr/closed). Load from its
295 * root.
297 * 3. A manifest is loaded, but no longer applies (e.g., the manifest
298 * under usr/closed is loaded, but we've moved to usr/src).
300 if (scmdata.manifest == NULL ||
301 (strcmp(scmpath, scmdata.metapath) != 0 &&
302 access(scmpath, X_OK) == 0) ||
303 strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
304 pnset_free(scmdata.manifest);
305 scmdata.manifest = NULL;
307 (void) strlcpy(basepath, path, MAXPATHLEN);
310 * Walk up the directory tree looking for metadata
311 * subdirectories.
313 while (access(scmpath, X_OK) == -1) {
314 slash = strrchr(basepath, '/');
315 if (slash == NULL) {
316 if (!scmdata.rootwarn) {
317 warn("no metadata directory "
318 "for \"%s\"\n", path);
319 scmdata.rootwarn = B_TRUE;
321 return;
323 *slash = '\0';
324 (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
325 meta);
329 * We found a directory with an SCM metadata directory; record
330 * it and load its manifest.
332 (void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
333 (void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
334 scmdata.manifest = manifest_func(scmdata.root);
337 * The logic in check_scmdata() depends on scmdata.root having
338 * a single trailing slash, so only add it if it's missing.
340 if (scmdata.root[strlen(scmdata.root) - 1] != '/')
341 (void) strlcat(scmdata.root, "/", MAXPATHLEN);
342 scmdata.rootlen = strlen(scmdata.root);
347 * If necessary, change our active manifest to be appropriate for `path'.
349 static void
350 chdir_git(const char *path)
352 chdir_scmdata(path, ".git", git_manifest);
355 static void
356 chdir_hg(const char *path)
358 chdir_scmdata(path, ".hg", hg_manifest);
361 /* ARGSUSED */
362 static int
363 check_scmdata(const char *path, const struct FTW *ftwp)
366 * The manifest paths are relative to the manifest root; skip past it.
368 path += scmdata.rootlen;
370 return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
371 path));
375 * Check if a file is under TeamWare control by checking for its corresponding
376 * SCCS "s-dot" file.
378 static int
379 check_tw(const char *path, const struct FTW *ftwp)
381 char sccspath[MAXPATHLEN];
383 (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
384 path, path + ftwp->base);
386 return (access(sccspath, F_OK) == 0);
390 * Using `exceptfile' and a built-in list of exceptions, build and return a
391 * pnset_t consisting of all of the pathnames globs which are allowed to be
392 * unreferenced in the source tree.
394 static pnset_t *
395 make_exset(const char *exceptfile)
397 FILE *fp;
398 char line[MAXPATHLEN];
399 char *newline;
400 pnset_t *pnsetp;
401 unsigned int i;
403 pnsetp = calloc(sizeof (pnset_t), 1);
404 if (pnsetp == NULL)
405 return (NULL);
408 * Add any exceptions from the file.
410 fp = fopen(exceptfile, "r");
411 if (fp == NULL) {
412 warn("cannot open exception file \"%s\"", exceptfile);
413 goto fail;
416 while (fgets(line, sizeof (line), fp) != NULL) {
417 newline = strrchr(line, '\n');
418 if (newline != NULL)
419 *newline = '\0';
421 for (i = 0; isspace(line[i]); i++)
424 if (line[i] == '#' || line[i] == '\0')
425 continue;
427 if (pnset_add(pnsetp, line) == 0) {
428 (void) fclose(fp);
429 goto fail;
433 (void) fclose(fp);
434 return (pnsetp);
435 fail:
436 pnset_free(pnsetp);
437 return (NULL);
441 * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
443 static int
444 checkpath(const char *path, const struct stat *statp, int type,
445 struct FTW *ftwp)
447 switch (type) {
448 case FTW_F:
450 * Skip if the file is referenced or in the exception list.
452 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
453 return (0);
456 * If requested, restrict ourselves to unreferenced files
457 * under SCM control.
459 if (scm == NULL || scm->checkfunc(path, ftwp))
460 (void) puts(path);
461 return (0);
463 case FTW_D:
465 * Prune any directories in the exception list.
467 if (pnset_check(exsetp, path)) {
468 ftwp->quit = FTW_PRUNE;
469 return (0);
473 * If necessary, advise the SCM logic of our new directory.
475 if (scm != NULL && scm->chdirfunc != NULL)
476 scm->chdirfunc(path);
478 return (0);
480 case FTW_DNR:
481 warn("cannot read \"%s\"", path);
482 return (0);
484 case FTW_NS:
485 warn("cannot stat \"%s\"", path);
486 return (0);
488 default:
489 break;
492 return (0);
496 * Add `path' to the pnset_t pointed to by `pnsetp'.
498 static int
499 pnset_add(pnset_t *pnsetp, const char *path)
501 char **newpaths;
502 unsigned int maxpaths;
504 if (pnsetp->npath == pnsetp->maxpaths) {
505 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
506 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
507 if (newpaths == NULL)
508 return (0);
509 pnsetp->paths = newpaths;
510 pnsetp->maxpaths = maxpaths;
513 pnsetp->paths[pnsetp->npath] = strdup(path);
514 if (pnsetp->paths[pnsetp->npath] == NULL)
515 return (0);
517 pnsetp->npath++;
518 return (1);
522 * Check `path' against the pnset_t pointed to by `pnsetp'.
524 static int
525 pnset_check(const pnset_t *pnsetp, const char *path)
527 unsigned int i;
529 for (i = 0; i < pnsetp->npath; i++) {
530 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
531 return (1);
533 return (0);
537 * Empty the pnset_t pointed to by `pnsetp'.
539 static void
540 pnset_empty(pnset_t *pnsetp)
542 while (pnsetp->npath-- != 0)
543 free(pnsetp->paths[pnsetp->npath]);
545 free(pnsetp->paths);
546 pnsetp->maxpaths = 0;
550 * Free the pnset_t pointed to by `pnsetp'.
552 static void
553 pnset_free(pnset_t *pnsetp)
555 if (pnsetp != NULL) {
556 pnset_empty(pnsetp);
557 free(pnsetp);
561 /* PRINTFLIKE1 */
562 static void
563 warn(const char *format, ...)
565 va_list alist;
566 char *errstr = strerror(errno);
568 if (errstr == NULL)
569 errstr = "<unknown error>";
571 (void) fprintf(stderr, "%s: ", progname);
573 va_start(alist, format);
574 (void) vfprintf(stderr, format, alist);
575 va_end(alist);
577 if (strrchr(format, '\n') == NULL)
578 (void) fprintf(stderr, ": %s\n", errstr);
581 /* PRINTFLIKE1 */
582 static void
583 die(const char *format, ...)
585 va_list alist;
586 char *errstr = strerror(errno);
588 if (errstr == NULL)
589 errstr = "<unknown error>";
591 (void) fprintf(stderr, "%s: fatal: ", progname);
593 va_start(alist, format);
594 (void) vfprintf(stderr, format, alist);
595 va_end(alist);
597 if (strrchr(format, '\n') == NULL)
598 (void) fprintf(stderr, ": %s\n", errstr);
600 exit(EXIT_FAILURE);