HAMMER Util - Add new features, fix history retention bug in prune
[dragonfly.git] / contrib / libarchive / tar / tree.c
blobbebfb7977f97bbf08aa286cdeee42b8f260059da
1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 /*-
27 * This is a new directory-walking system that addresses a number
28 * of problems I've had with fts(3). In particular, it has no
29 * pathname-length limits (other than the size of 'int'), handles
30 * deep logical traversals, uses considerably less memory, and has
31 * an opaque interface (easier to modify in the future).
33 * Internally, it keeps a single list of "tree_entry" items that
34 * represent filesystem objects that require further attention.
35 * Non-directories are not kept in memory: they are pulled from
36 * readdir(), returned to the client, then freed as soon as possible.
37 * Any directory entry to be traversed gets pushed onto the stack.
39 * There is surprisingly little information that needs to be kept for
40 * each item on the stack. Just the name, depth (represented here as the
41 * string length of the parent directory's pathname), and some markers
42 * indicating how to get back to the parent (via chdir("..") for a
43 * regular dir or via fchdir(2) for a symlink).
45 #include "bsdtar_platform.h"
46 __FBSDID("$FreeBSD: src/usr.bin/tar/tree.c,v 1.9 2008/11/27 05:49:52 kientzle Exp $");
48 #ifdef HAVE_SYS_STAT_H
49 #include <sys/stat.h>
50 #endif
51 #ifdef HAVE_DIRENT_H
52 #include <dirent.h>
53 #endif
54 #ifdef HAVE_ERRNO_H
55 #include <errno.h>
56 #endif
57 #ifdef HAVE_FCNTL_H
58 #include <fcntl.h>
59 #endif
60 #ifdef HAVE_STDLIB_H
61 #include <stdlib.h>
62 #endif
63 #ifdef HAVE_STRING_H
64 #include <string.h>
65 #endif
66 #ifdef HAVE_UNISTD_H
67 #include <unistd.h>
68 #endif
70 #include "tree.h"
73 * TODO:
74 * 1) Loop checking.
75 * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
78 struct tree_entry {
79 struct tree_entry *next;
80 struct tree_entry *parent;
81 char *name;
82 size_t dirname_length;
83 dev_t dev;
84 ino_t ino;
85 #ifdef HAVE_FCHDIR
86 int fd;
87 #elif defined(_WIN32) && !defined(__CYGWIN__)
88 char *fullpath;
89 #else
90 #error fchdir function required.
91 #endif
92 int flags;
95 /* Definitions for tree_entry.flags bitmap. */
96 #define isDir 1 /* This entry is a regular directory. */
97 #define isDirLink 2 /* This entry is a symbolic link to a directory. */
98 #define needsPreVisit 4 /* This entry needs to be previsited. */
99 #define needsPostVisit 8 /* This entry needs to be postvisited. */
102 * Local data for this package.
104 struct tree {
105 struct tree_entry *stack;
106 struct tree_entry *current;
107 DIR *d;
108 #ifdef HAVE_FCHDIR
109 int initialDirFd;
110 #elif defined(_WIN32) && !defined(__CYGWIN__)
111 char *initialDir;
112 #endif
113 int flags;
114 int visit_type;
115 int tree_errno; /* Error code from last failed operation. */
117 char *buff;
118 const char *basename;
119 size_t buff_length;
120 size_t path_length;
121 size_t dirname_length;
123 int depth;
124 int openCount;
125 int maxOpenCount;
127 struct stat lst;
128 struct stat st;
131 /* Definitions for tree.flags bitmap. */
132 #define needsReturn 8 /* Marks first entry as not having been returned yet. */
133 #define hasStat 16 /* The st entry is set. */
134 #define hasLstat 32 /* The lst entry is set. */
137 #ifdef HAVE_DIRENT_D_NAMLEN
138 /* BSD extension; avoids need for a strlen() call. */
139 #define D_NAMELEN(dp) (dp)->d_namlen
140 #else
141 #define D_NAMELEN(dp) (strlen((dp)->d_name))
142 #endif
144 #if 0
145 #include <stdio.h>
146 void
147 tree_dump(struct tree *t, FILE *out)
149 struct tree_entry *te;
151 fprintf(out, "\tdepth: %d\n", t->depth);
152 fprintf(out, "\tbuff: %s\n", t->buff);
153 fprintf(out, "\tpwd: "); fflush(stdout); system("pwd");
154 fprintf(out, "\taccess: %s\n", t->basename);
155 fprintf(out, "\tstack:\n");
156 for (te = t->stack; te != NULL; te = te->next) {
157 fprintf(out, "\t\tte->name: %s%s%s\n", te->name,
158 te->flags & needsPreVisit ? "" : " *",
159 t->current == te ? " (current)" : "");
162 #endif
165 * Add a directory path to the current stack.
167 static void
168 tree_push(struct tree *t, const char *path)
170 struct tree_entry *te;
172 te = malloc(sizeof(*te));
173 memset(te, 0, sizeof(*te));
174 te->next = t->stack;
175 t->stack = te;
176 #ifdef HAVE_FCHDIR
177 te->fd = -1;
178 #elif defined(_WIN32) && !defined(__CYGWIN__)
179 te->fullpath = NULL;
180 #endif
181 te->name = strdup(path);
182 te->flags = needsPreVisit | needsPostVisit;
183 te->dirname_length = t->dirname_length;
187 * Append a name to the current path.
189 static void
190 tree_append(struct tree *t, const char *name, size_t name_length)
192 char *p;
194 if (t->buff != NULL)
195 t->buff[t->dirname_length] = '\0';
196 /* Strip trailing '/' from name, unless entire name is "/". */
197 while (name_length > 1 && name[name_length - 1] == '/')
198 name_length--;
200 /* Resize pathname buffer as needed. */
201 while (name_length + 1 + t->dirname_length >= t->buff_length) {
202 t->buff_length *= 2;
203 if (t->buff_length < 1024)
204 t->buff_length = 1024;
205 t->buff = realloc(t->buff, t->buff_length);
207 p = t->buff + t->dirname_length;
208 t->path_length = t->dirname_length + name_length;
209 /* Add a separating '/' if it's needed. */
210 if (t->dirname_length > 0 && p[-1] != '/') {
211 *p++ = '/';
212 t->path_length ++;
214 strncpy(p, name, name_length);
215 p[name_length] = '\0';
216 t->basename = p;
220 * Open a directory tree for traversal.
222 struct tree *
223 tree_open(const char *path)
225 struct tree *t;
227 t = malloc(sizeof(*t));
228 memset(t, 0, sizeof(*t));
229 tree_append(t, path, strlen(path));
230 #ifdef HAVE_FCHDIR
231 t->initialDirFd = open(".", O_RDONLY);
232 #elif defined(_WIN32) && !defined(__CYGWIN__)
233 t->initialDir = getcwd(NULL, 0);
234 #endif
236 * During most of the traversal, items are set up and then
237 * returned immediately from tree_next(). That doesn't work
238 * for the very first entry, so we set a flag for this special
239 * case.
241 t->flags = needsReturn;
242 return (t);
246 * We've finished a directory; ascend back to the parent.
248 static int
249 tree_ascend(struct tree *t)
251 struct tree_entry *te;
252 int r = 0;
254 te = t->stack;
255 t->depth--;
256 if (te->flags & isDirLink) {
257 #ifdef HAVE_FCHDIR
258 if (fchdir(te->fd) != 0) {
259 t->tree_errno = errno;
260 r = TREE_ERROR_FATAL;
262 close(te->fd);
263 #elif defined(_WIN32) && !defined(__CYGWIN__)
264 if (chdir(te->fullpath) != 0) {
265 t->tree_errno = errno;
266 r = TREE_ERROR_FATAL;
268 free(te->fullpath);
269 te->fullpath = NULL;
270 #endif
271 t->openCount--;
272 } else {
273 if (chdir("..") != 0) {
274 t->tree_errno = errno;
275 r = TREE_ERROR_FATAL;
278 return (r);
282 * Pop the working stack.
284 static void
285 tree_pop(struct tree *t)
287 struct tree_entry *te;
289 t->buff[t->dirname_length] = '\0';
290 if (t->stack == t->current && t->current != NULL)
291 t->current = t->current->parent;
292 te = t->stack;
293 t->stack = te->next;
294 t->dirname_length = te->dirname_length;
295 t->basename = t->buff + t->dirname_length;
296 /* Special case: starting dir doesn't skip leading '/'. */
297 if (t->dirname_length > 0)
298 t->basename++;
299 free(te->name);
300 free(te);
304 * Get the next item in the tree traversal.
307 tree_next(struct tree *t)
309 struct dirent *de = NULL;
310 int r;
312 /* If we're called again after a fatal error, that's an API
313 * violation. Just crash now. */
314 if (t->visit_type == TREE_ERROR_FATAL) {
315 const char *msg = "Unable to continue traversing"
316 " directory heirarchy after a fatal error.";
317 write(2, msg, strlen(msg));
318 *(int *)0 = 1; /* Deliberate SEGV; NULL pointer dereference. */
319 exit(1); /* In case the SEGV didn't work. */
322 /* Handle the startup case by returning the initial entry. */
323 if (t->flags & needsReturn) {
324 t->flags &= ~needsReturn;
325 return (t->visit_type = TREE_REGULAR);
328 while (t->stack != NULL) {
329 /* If there's an open dir, get the next entry from there. */
330 while (t->d != NULL) {
331 de = readdir(t->d);
332 if (de == NULL) {
333 closedir(t->d);
334 t->d = NULL;
335 } else if (de->d_name[0] == '.'
336 && de->d_name[1] == '\0') {
337 /* Skip '.' */
338 } else if (de->d_name[0] == '.'
339 && de->d_name[1] == '.'
340 && de->d_name[2] == '\0') {
341 /* Skip '..' */
342 } else {
344 * Append the path to the current path
345 * and return it.
347 tree_append(t, de->d_name, D_NAMELEN(de));
348 t->flags &= ~hasLstat;
349 t->flags &= ~hasStat;
350 return (t->visit_type = TREE_REGULAR);
354 /* If the current dir needs to be visited, set it up. */
355 if (t->stack->flags & needsPreVisit) {
356 t->current = t->stack;
357 tree_append(t, t->stack->name, strlen(t->stack->name));
358 t->stack->flags &= ~needsPreVisit;
359 /* If it is a link, set up fd for the ascent. */
360 if (t->stack->flags & isDirLink) {
361 #ifdef HAVE_FCHDIR
362 t->stack->fd = open(".", O_RDONLY);
363 #elif defined(_WIN32) && !defined(__CYGWIN__)
364 t->stack->fullpath = getcwd(NULL, 0);
365 #endif
366 t->openCount++;
367 if (t->openCount > t->maxOpenCount)
368 t->maxOpenCount = t->openCount;
370 t->dirname_length = t->path_length;
371 if (chdir(t->stack->name) != 0) {
372 /* chdir() failed; return error */
373 tree_pop(t);
374 t->tree_errno = errno;
375 return (t->visit_type = TREE_ERROR_DIR);
377 t->depth++;
378 t->d = opendir(".");
379 if (t->d == NULL) {
380 r = tree_ascend(t); /* Undo "chdir" */
381 tree_pop(t);
382 t->tree_errno = errno;
383 t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
384 return (t->visit_type);
386 t->flags &= ~hasLstat;
387 t->flags &= ~hasStat;
388 t->basename = ".";
389 return (t->visit_type = TREE_POSTDESCENT);
392 /* We've done everything necessary for the top stack entry. */
393 if (t->stack->flags & needsPostVisit) {
394 r = tree_ascend(t);
395 tree_pop(t);
396 t->flags &= ~hasLstat;
397 t->flags &= ~hasStat;
398 t->visit_type = r != 0 ? r : TREE_POSTASCENT;
399 return (t->visit_type);
402 return (t->visit_type = 0);
406 * Return error code.
409 tree_errno(struct tree *t)
411 return (t->tree_errno);
415 * Called by the client to mark the directory just returned from
416 * tree_next() as needing to be visited.
418 void
419 tree_descend(struct tree *t)
421 if (t->visit_type != TREE_REGULAR)
422 return;
424 if (tree_current_is_physical_dir(t)) {
425 tree_push(t, t->basename);
426 t->stack->flags |= isDir;
427 } else if (tree_current_is_dir(t)) {
428 tree_push(t, t->basename);
429 t->stack->flags |= isDirLink;
434 * Get the stat() data for the entry just returned from tree_next().
436 const struct stat *
437 tree_current_stat(struct tree *t)
439 if (!(t->flags & hasStat)) {
440 if (stat(t->basename, &t->st) != 0)
441 return NULL;
442 t->flags |= hasStat;
444 return (&t->st);
448 * Get the lstat() data for the entry just returned from tree_next().
450 const struct stat *
451 tree_current_lstat(struct tree *t)
453 if (!(t->flags & hasLstat)) {
454 if (lstat(t->basename, &t->lst) != 0)
455 return NULL;
456 t->flags |= hasLstat;
458 return (&t->lst);
462 * Test whether current entry is a dir or link to a dir.
465 tree_current_is_dir(struct tree *t)
467 const struct stat *st;
470 * If we already have lstat() info, then try some
471 * cheap tests to determine if this is a dir.
473 if (t->flags & hasLstat) {
474 /* If lstat() says it's a dir, it must be a dir. */
475 if (S_ISDIR(tree_current_lstat(t)->st_mode))
476 return 1;
477 /* Not a dir; might be a link to a dir. */
478 /* If it's not a link, then it's not a link to a dir. */
479 if (!S_ISLNK(tree_current_lstat(t)->st_mode))
480 return 0;
482 * It's a link, but we don't know what it's a link to,
483 * so we'll have to use stat().
487 st = tree_current_stat(t);
488 /* If we can't stat it, it's not a dir. */
489 if (st == NULL)
490 return 0;
491 /* Use the definitive test. Hopefully this is cached. */
492 return (S_ISDIR(st->st_mode));
496 * Test whether current entry is a physical directory. Usually, we
497 * already have at least one of stat() or lstat() in memory, so we
498 * use tricks to try to avoid an extra trip to the disk.
501 tree_current_is_physical_dir(struct tree *t)
503 const struct stat *st;
506 * If stat() says it isn't a dir, then it's not a dir.
507 * If stat() data is cached, this check is free, so do it first.
509 if ((t->flags & hasStat)
510 && (!S_ISDIR(tree_current_stat(t)->st_mode)))
511 return 0;
514 * Either stat() said it was a dir (in which case, we have
515 * to determine whether it's really a link to a dir) or
516 * stat() info wasn't available. So we use lstat(), which
517 * hopefully is already cached.
520 st = tree_current_lstat(t);
521 /* If we can't stat it, it's not a dir. */
522 if (st == NULL)
523 return 0;
524 /* Use the definitive test. Hopefully this is cached. */
525 return (S_ISDIR(st->st_mode));
529 * Test whether current entry is a symbolic link.
532 tree_current_is_physical_link(struct tree *t)
534 const struct stat *st = tree_current_lstat(t);
535 if (st == NULL)
536 return 0;
537 return (S_ISLNK(st->st_mode));
541 * Return the access path for the entry just returned from tree_next().
543 const char *
544 tree_current_access_path(struct tree *t)
546 return (t->basename);
550 * Return the full path for the entry just returned from tree_next().
552 const char *
553 tree_current_path(struct tree *t)
555 return (t->buff);
559 * Return the length of the path for the entry just returned from tree_next().
561 size_t
562 tree_current_pathlen(struct tree *t)
564 return (t->path_length);
568 * Return the nesting depth of the entry just returned from tree_next().
571 tree_current_depth(struct tree *t)
573 return (t->depth);
577 * Terminate the traversal and release any resources.
579 void
580 tree_close(struct tree *t)
582 /* Release anything remaining in the stack. */
583 while (t->stack != NULL)
584 tree_pop(t);
585 if (t->buff)
586 free(t->buff);
587 /* chdir() back to where we started. */
588 #ifdef HAVE_FCHDIR
589 if (t->initialDirFd >= 0) {
590 fchdir(t->initialDirFd);
591 close(t->initialDirFd);
592 t->initialDirFd = -1;
594 #elif defined(_WIN32) && !defined(__CYGWIN__)
595 if (t->initialDir != NULL) {
596 chdir(t->initialDir);
597 free(t->initialDir);
598 t->initialDir = NULL;
600 #endif
601 free(t);