2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * This is a new directory-walking system that addresses a number
28 * of problems I've had with fts(3). In particular, it has no
29 * pathname-length limits (other than the size of 'int'), handles
30 * deep logical traversals, uses considerably less memory, and has
31 * an opaque interface (easier to modify in the future).
33 * Internally, it keeps a single list of "tree_entry" items that
34 * represent filesystem objects that require further attention.
35 * Non-directories are not kept in memory: they are pulled from
36 * readdir(), returned to the client, then freed as soon as possible.
37 * Any directory entry to be traversed gets pushed onto the stack.
39 * There is surprisingly little information that needs to be kept for
40 * each item on the stack. Just the name, depth (represented here as the
41 * string length of the parent directory's pathname), and some markers
42 * indicating how to get back to the parent (via chdir("..") for a
43 * regular dir or via fchdir(2) for a symlink).
45 #include "bsdtar_platform.h"
46 __FBSDID("$FreeBSD: src/usr.bin/tar/tree.c,v 1.9 2008/11/27 05:49:52 kientzle Exp $");
48 #ifdef HAVE_SYS_STAT_H
75 * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
79 struct tree_entry
*next
;
80 struct tree_entry
*parent
;
82 size_t dirname_length
;
87 #elif defined(_WIN32) && !defined(__CYGWIN__)
90 #error fchdir function required.
95 /* Definitions for tree_entry.flags bitmap. */
96 #define isDir 1 /* This entry is a regular directory. */
97 #define isDirLink 2 /* This entry is a symbolic link to a directory. */
98 #define needsPreVisit 4 /* This entry needs to be previsited. */
99 #define needsPostVisit 8 /* This entry needs to be postvisited. */
102 * Local data for this package.
105 struct tree_entry
*stack
;
106 struct tree_entry
*current
;
110 #elif defined(_WIN32) && !defined(__CYGWIN__)
115 int tree_errno
; /* Error code from last failed operation. */
118 const char *basename
;
121 size_t dirname_length
;
131 /* Definitions for tree.flags bitmap. */
132 #define needsReturn 8 /* Marks first entry as not having been returned yet. */
133 #define hasStat 16 /* The st entry is set. */
134 #define hasLstat 32 /* The lst entry is set. */
137 #ifdef HAVE_DIRENT_D_NAMLEN
138 /* BSD extension; avoids need for a strlen() call. */
139 #define D_NAMELEN(dp) (dp)->d_namlen
141 #define D_NAMELEN(dp) (strlen((dp)->d_name))
147 tree_dump(struct tree
*t
, FILE *out
)
149 struct tree_entry
*te
;
151 fprintf(out
, "\tdepth: %d\n", t
->depth
);
152 fprintf(out
, "\tbuff: %s\n", t
->buff
);
153 fprintf(out
, "\tpwd: "); fflush(stdout
); system("pwd");
154 fprintf(out
, "\taccess: %s\n", t
->basename
);
155 fprintf(out
, "\tstack:\n");
156 for (te
= t
->stack
; te
!= NULL
; te
= te
->next
) {
157 fprintf(out
, "\t\tte->name: %s%s%s\n", te
->name
,
158 te
->flags
& needsPreVisit
? "" : " *",
159 t
->current
== te
? " (current)" : "");
165 * Add a directory path to the current stack.
168 tree_push(struct tree
*t
, const char *path
)
170 struct tree_entry
*te
;
172 te
= malloc(sizeof(*te
));
173 memset(te
, 0, sizeof(*te
));
178 #elif defined(_WIN32) && !defined(__CYGWIN__)
181 te
->name
= strdup(path
);
182 te
->flags
= needsPreVisit
| needsPostVisit
;
183 te
->dirname_length
= t
->dirname_length
;
187 * Append a name to the current path.
190 tree_append(struct tree
*t
, const char *name
, size_t name_length
)
195 t
->buff
[t
->dirname_length
] = '\0';
196 /* Strip trailing '/' from name, unless entire name is "/". */
197 while (name_length
> 1 && name
[name_length
- 1] == '/')
200 /* Resize pathname buffer as needed. */
201 while (name_length
+ 1 + t
->dirname_length
>= t
->buff_length
) {
203 if (t
->buff_length
< 1024)
204 t
->buff_length
= 1024;
205 t
->buff
= realloc(t
->buff
, t
->buff_length
);
207 p
= t
->buff
+ t
->dirname_length
;
208 t
->path_length
= t
->dirname_length
+ name_length
;
209 /* Add a separating '/' if it's needed. */
210 if (t
->dirname_length
> 0 && p
[-1] != '/') {
214 strncpy(p
, name
, name_length
);
215 p
[name_length
] = '\0';
220 * Open a directory tree for traversal.
223 tree_open(const char *path
)
227 t
= malloc(sizeof(*t
));
228 memset(t
, 0, sizeof(*t
));
229 tree_append(t
, path
, strlen(path
));
231 t
->initialDirFd
= open(".", O_RDONLY
);
232 #elif defined(_WIN32) && !defined(__CYGWIN__)
233 t
->initialDir
= getcwd(NULL
, 0);
236 * During most of the traversal, items are set up and then
237 * returned immediately from tree_next(). That doesn't work
238 * for the very first entry, so we set a flag for this special
241 t
->flags
= needsReturn
;
246 * We've finished a directory; ascend back to the parent.
249 tree_ascend(struct tree
*t
)
251 struct tree_entry
*te
;
256 if (te
->flags
& isDirLink
) {
258 if (fchdir(te
->fd
) != 0) {
259 t
->tree_errno
= errno
;
260 r
= TREE_ERROR_FATAL
;
263 #elif defined(_WIN32) && !defined(__CYGWIN__)
264 if (chdir(te
->fullpath
) != 0) {
265 t
->tree_errno
= errno
;
266 r
= TREE_ERROR_FATAL
;
273 if (chdir("..") != 0) {
274 t
->tree_errno
= errno
;
275 r
= TREE_ERROR_FATAL
;
282 * Pop the working stack.
285 tree_pop(struct tree
*t
)
287 struct tree_entry
*te
;
289 t
->buff
[t
->dirname_length
] = '\0';
290 if (t
->stack
== t
->current
&& t
->current
!= NULL
)
291 t
->current
= t
->current
->parent
;
294 t
->dirname_length
= te
->dirname_length
;
295 t
->basename
= t
->buff
+ t
->dirname_length
;
296 /* Special case: starting dir doesn't skip leading '/'. */
297 if (t
->dirname_length
> 0)
304 * Get the next item in the tree traversal.
307 tree_next(struct tree
*t
)
309 struct dirent
*de
= NULL
;
312 /* If we're called again after a fatal error, that's an API
313 * violation. Just crash now. */
314 if (t
->visit_type
== TREE_ERROR_FATAL
) {
315 const char *msg
= "Unable to continue traversing"
316 " directory heirarchy after a fatal error.";
317 write(2, msg
, strlen(msg
));
318 *(int *)0 = 1; /* Deliberate SEGV; NULL pointer dereference. */
319 exit(1); /* In case the SEGV didn't work. */
322 /* Handle the startup case by returning the initial entry. */
323 if (t
->flags
& needsReturn
) {
324 t
->flags
&= ~needsReturn
;
325 return (t
->visit_type
= TREE_REGULAR
);
328 while (t
->stack
!= NULL
) {
329 /* If there's an open dir, get the next entry from there. */
330 while (t
->d
!= NULL
) {
335 } else if (de
->d_name
[0] == '.'
336 && de
->d_name
[1] == '\0') {
338 } else if (de
->d_name
[0] == '.'
339 && de
->d_name
[1] == '.'
340 && de
->d_name
[2] == '\0') {
344 * Append the path to the current path
347 tree_append(t
, de
->d_name
, D_NAMELEN(de
));
348 t
->flags
&= ~hasLstat
;
349 t
->flags
&= ~hasStat
;
350 return (t
->visit_type
= TREE_REGULAR
);
354 /* If the current dir needs to be visited, set it up. */
355 if (t
->stack
->flags
& needsPreVisit
) {
356 t
->current
= t
->stack
;
357 tree_append(t
, t
->stack
->name
, strlen(t
->stack
->name
));
358 t
->stack
->flags
&= ~needsPreVisit
;
359 /* If it is a link, set up fd for the ascent. */
360 if (t
->stack
->flags
& isDirLink
) {
362 t
->stack
->fd
= open(".", O_RDONLY
);
363 #elif defined(_WIN32) && !defined(__CYGWIN__)
364 t
->stack
->fullpath
= getcwd(NULL
, 0);
367 if (t
->openCount
> t
->maxOpenCount
)
368 t
->maxOpenCount
= t
->openCount
;
370 t
->dirname_length
= t
->path_length
;
371 if (chdir(t
->stack
->name
) != 0) {
372 /* chdir() failed; return error */
374 t
->tree_errno
= errno
;
375 return (t
->visit_type
= TREE_ERROR_DIR
);
380 r
= tree_ascend(t
); /* Undo "chdir" */
382 t
->tree_errno
= errno
;
383 t
->visit_type
= r
!= 0 ? r
: TREE_ERROR_DIR
;
384 return (t
->visit_type
);
386 t
->flags
&= ~hasLstat
;
387 t
->flags
&= ~hasStat
;
389 return (t
->visit_type
= TREE_POSTDESCENT
);
392 /* We've done everything necessary for the top stack entry. */
393 if (t
->stack
->flags
& needsPostVisit
) {
396 t
->flags
&= ~hasLstat
;
397 t
->flags
&= ~hasStat
;
398 t
->visit_type
= r
!= 0 ? r
: TREE_POSTASCENT
;
399 return (t
->visit_type
);
402 return (t
->visit_type
= 0);
409 tree_errno(struct tree
*t
)
411 return (t
->tree_errno
);
415 * Called by the client to mark the directory just returned from
416 * tree_next() as needing to be visited.
419 tree_descend(struct tree
*t
)
421 if (t
->visit_type
!= TREE_REGULAR
)
424 if (tree_current_is_physical_dir(t
)) {
425 tree_push(t
, t
->basename
);
426 t
->stack
->flags
|= isDir
;
427 } else if (tree_current_is_dir(t
)) {
428 tree_push(t
, t
->basename
);
429 t
->stack
->flags
|= isDirLink
;
434 * Get the stat() data for the entry just returned from tree_next().
437 tree_current_stat(struct tree
*t
)
439 if (!(t
->flags
& hasStat
)) {
440 if (stat(t
->basename
, &t
->st
) != 0)
448 * Get the lstat() data for the entry just returned from tree_next().
451 tree_current_lstat(struct tree
*t
)
453 if (!(t
->flags
& hasLstat
)) {
454 if (lstat(t
->basename
, &t
->lst
) != 0)
456 t
->flags
|= hasLstat
;
462 * Test whether current entry is a dir or link to a dir.
465 tree_current_is_dir(struct tree
*t
)
467 const struct stat
*st
;
470 * If we already have lstat() info, then try some
471 * cheap tests to determine if this is a dir.
473 if (t
->flags
& hasLstat
) {
474 /* If lstat() says it's a dir, it must be a dir. */
475 if (S_ISDIR(tree_current_lstat(t
)->st_mode
))
477 /* Not a dir; might be a link to a dir. */
478 /* If it's not a link, then it's not a link to a dir. */
479 if (!S_ISLNK(tree_current_lstat(t
)->st_mode
))
482 * It's a link, but we don't know what it's a link to,
483 * so we'll have to use stat().
487 st
= tree_current_stat(t
);
488 /* If we can't stat it, it's not a dir. */
491 /* Use the definitive test. Hopefully this is cached. */
492 return (S_ISDIR(st
->st_mode
));
496 * Test whether current entry is a physical directory. Usually, we
497 * already have at least one of stat() or lstat() in memory, so we
498 * use tricks to try to avoid an extra trip to the disk.
501 tree_current_is_physical_dir(struct tree
*t
)
503 const struct stat
*st
;
506 * If stat() says it isn't a dir, then it's not a dir.
507 * If stat() data is cached, this check is free, so do it first.
509 if ((t
->flags
& hasStat
)
510 && (!S_ISDIR(tree_current_stat(t
)->st_mode
)))
514 * Either stat() said it was a dir (in which case, we have
515 * to determine whether it's really a link to a dir) or
516 * stat() info wasn't available. So we use lstat(), which
517 * hopefully is already cached.
520 st
= tree_current_lstat(t
);
521 /* If we can't stat it, it's not a dir. */
524 /* Use the definitive test. Hopefully this is cached. */
525 return (S_ISDIR(st
->st_mode
));
529 * Test whether current entry is a symbolic link.
532 tree_current_is_physical_link(struct tree
*t
)
534 const struct stat
*st
= tree_current_lstat(t
);
537 return (S_ISLNK(st
->st_mode
));
541 * Return the access path for the entry just returned from tree_next().
544 tree_current_access_path(struct tree
*t
)
546 return (t
->basename
);
550 * Return the full path for the entry just returned from tree_next().
553 tree_current_path(struct tree
*t
)
559 * Return the length of the path for the entry just returned from tree_next().
562 tree_current_pathlen(struct tree
*t
)
564 return (t
->path_length
);
568 * Return the nesting depth of the entry just returned from tree_next().
571 tree_current_depth(struct tree
*t
)
577 * Terminate the traversal and release any resources.
580 tree_close(struct tree
*t
)
582 /* Release anything remaining in the stack. */
583 while (t
->stack
!= NULL
)
587 /* chdir() back to where we started. */
589 if (t
->initialDirFd
>= 0) {
590 fchdir(t
->initialDirFd
);
591 close(t
->initialDirFd
);
592 t
->initialDirFd
= -1;
594 #elif defined(_WIN32) && !defined(__CYGWIN__)
595 if (t
->initialDir
!= NULL
) {
596 chdir(t
->initialDir
);
598 t
->initialDir
= NULL
;