1 /* Copyright (c) 2003, Roger Dingledine
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
9 * \brief Manipulate strings that contain filesystem paths.
12 #include "lib/fs/path.h"
13 #include "lib/malloc/malloc.h"
14 #include "lib/log/log.h"
15 #include "lib/log/util_bug.h"
16 #include "lib/container/smartlist.h"
17 #include "lib/sandbox/sandbox.h"
18 #include "lib/string/printf.h"
19 #include "lib/string/util_string.h"
20 #include "lib/string/compat_ctype.h"
21 #include "lib/string/compat_string.h"
22 #include "lib/fs/files.h"
23 #include "lib/fs/dir.h"
24 #include "lib/fs/userdb.h"
26 #ifdef HAVE_SYS_TYPES_H
27 #include <sys/types.h>
29 #ifdef HAVE_SYS_STAT_H
39 #else /* !(defined(_WIN32)) */
42 #endif /* defined(_WIN32) */
47 /** Removes enclosing quotes from <b>path</b> and unescapes quotes between the
48 * enclosing quotes. Backslashes are not unescaped. Return the unquoted
49 * <b>path</b> on success or 0 if <b>path</b> is not quoted correctly. */
51 get_unquoted_path(const char *path
)
53 size_t len
= strlen(path
);
56 return tor_strdup("");
59 int has_start_quote
= (path
[0] == '\"');
60 int has_end_quote
= (len
> 0 && path
[len
-1] == '\"');
61 if (has_start_quote
!= has_end_quote
|| (len
== 1 && has_start_quote
)) {
65 char *unquoted_path
= tor_malloc(len
- has_start_quote
- has_end_quote
+ 1);
66 char *s
= unquoted_path
;
68 for (i
= has_start_quote
; i
< len
- has_end_quote
; i
++) {
69 if (path
[i
] == '\"' && (i
> 0 && path
[i
-1] == '\\')) {
71 } else if (path
[i
] != '\"') {
73 } else { /* unescaped quote */
74 tor_free(unquoted_path
);
82 /** Expand any homedir prefix on <b>filename</b>; return a newly allocated
85 expand_filename(const char *filename
)
89 /* Might consider using GetFullPathName() as described here:
90 * http://etutorials.org/Programming/secure+programming/
91 * Chapter+3.+Input+Validation/3.7+Validating+Filenames+and+Paths/
93 return tor_strdup(filename
);
94 #else /* !defined(_WIN32) */
95 if (*filename
== '~') {
96 char *home
, *result
=NULL
;
99 if (filename
[1] == '/' || filename
[1] == '\0') {
100 home
= getenv("HOME");
102 log_warn(LD_CONFIG
, "Couldn't find $HOME environment variable while "
103 "expanding \"%s\"; defaulting to \"\".", filename
);
104 home
= tor_strdup("");
106 home
= tor_strdup(home
);
108 rest
= strlen(filename
)>=2?(filename
+2):"";
111 char *username
, *slash
;
112 slash
= strchr(filename
, '/');
114 username
= tor_strndup(filename
+1,slash
-filename
-1);
116 username
= tor_strdup(filename
+1);
117 if (!(home
= get_user_homedir(username
))) {
118 log_warn(LD_CONFIG
,"Couldn't get homedir for \"%s\"",username
);
123 rest
= slash
? (slash
+1) : "";
124 #else /* !defined(HAVE_PWD_H) */
125 log_warn(LD_CONFIG
, "Couldn't expand homedir on system without pwd.h");
126 return tor_strdup(filename
);
127 #endif /* defined(HAVE_PWD_H) */
130 /* Remove trailing slash. */
131 if (strlen(home
)>1 && !strcmpend(home
,PATH_SEPARATOR
)) {
132 home
[strlen(home
)-1] = '\0';
134 tor_asprintf(&result
,"%s"PATH_SEPARATOR
"%s",home
,rest
);
138 return tor_strdup(filename
);
140 #endif /* defined(_WIN32) */
143 /** Return true iff <b>filename</b> is a relative path. */
145 path_is_relative(const char *filename
)
147 if (filename
&& filename
[0] == '/')
150 else if (filename
&& filename
[0] == '\\')
152 else if (filename
&& strlen(filename
)>3 && TOR_ISALPHA(filename
[0]) &&
153 filename
[1] == ':' && filename
[2] == '\\')
155 #endif /* defined(_WIN32) */
160 /** Clean up <b>name</b> so that we can use it in a call to "stat". On Unix,
161 * we do nothing. On Windows, we remove a trailing slash, unless the path is
162 * the root of a disk. */
164 clean_fname_for_stat(char *name
)
167 size_t len
= strlen(name
);
170 if (name
[len
-1]=='\\' || name
[len
-1]=='/') {
171 if (len
== 1 || (len
==3 && name
[1]==':'))
175 #else /* !defined(_WIN32) */
177 #endif /* defined(_WIN32) */
180 /** Modify <b>fname</b> to contain the name of its parent directory. Doesn't
181 * actually examine the filesystem; does a purely syntactic modification.
183 * The parent of the root director is considered to be itself.
185 * Path separators are the forward slash (/) everywhere and additionally
186 * the backslash (\) on Win32.
188 * Cuts off any number of trailing path separators but otherwise ignores
189 * them for purposes of finding the parent directory.
191 * Returns 0 if a parent directory was successfully found, -1 otherwise (fname
192 * did not have any path separators or only had them at the end).
195 get_parent_directory(char *fname
)
201 /* If we start with, say, c:, then don't consider that the start of the path
203 if (fname
[0] && fname
[1] == ':') {
206 #endif /* defined(_WIN32) */
207 /* Now we want to remove all path-separators at the end of the string,
208 * and to remove the end of the string starting with the path separator
209 * before the last non-path-separator. In perl, this would be
210 * s#[/]*$##; s#/[^/]*$##;
211 * on a unixy platform.
213 cp
= fname
+ strlen(fname
);
215 while (--cp
>= fname
) {
216 int is_sep
= (*cp
== '/'
223 /* This is the first separator in the file name; don't remove it! */
238 /** Return a newly allocated string containing the output of getcwd(). Return
239 * NULL on failure. (We can't just use getcwd() into a PATH_MAX buffer, since
240 * Hurd hasn't got a PATH_MAX.)
245 #ifdef HAVE_GET_CURRENT_DIR_NAME
246 /* Glibc makes this nice and simple for us. */
247 char *cwd
= get_current_dir_name();
250 /* We make a copy here, in case tor_malloc() is not malloc(). */
251 result
= tor_strdup(cwd
);
252 raw_free(cwd
); // alias for free to avoid tripping check-spaces.
255 #else /* !defined(HAVE_GET_CURRENT_DIR_NAME) */
260 while (ptr
== NULL
) {
261 buf
= tor_realloc(buf
, size
);
262 ptr
= getcwd(buf
, size
);
264 if (ptr
== NULL
&& errno
!= ERANGE
) {
272 #endif /* defined(HAVE_GET_CURRENT_DIR_NAME) */
274 #endif /* !defined(_WIN32) */
276 /** Expand possibly relative path <b>fname</b> to an absolute path.
277 * Return a newly allocated string, which may be a duplicate of <b>fname</b>.
280 make_path_absolute(const char *fname
)
283 char *absfname_malloced
= _fullpath(NULL
, fname
, 1);
285 /* We don't want to assume that tor_free can free a string allocated
286 * with malloc. On failure, return fname (it's better than nothing). */
287 char *absfname
= tor_strdup(absfname_malloced
? absfname_malloced
: fname
);
288 if (absfname_malloced
) raw_free(absfname_malloced
);
291 #else /* !defined(_WIN32) */
292 char *absfname
= NULL
, *path
= NULL
;
296 if (fname
[0] == '/') {
297 absfname
= tor_strdup(fname
);
299 path
= alloc_getcwd();
301 tor_asprintf(&absfname
, "%s/%s", path
, fname
);
304 /* LCOV_EXCL_START Can't make getcwd fail. */
305 /* If getcwd failed, the best we can do here is keep using the
306 * relative path. (Perhaps / isn't readable by this UID/GID.) */
307 log_warn(LD_GENERAL
, "Unable to find current working directory: %s",
309 absfname
= tor_strdup(fname
);
314 #endif /* defined(_WIN32) */
317 /* The code below implements tor_glob and get_glob_opened_files. Because it is
318 * not easy to understand it by looking at individual functions, the big
319 * picture explanation here should be read first.
321 * Purpose of the functions:
322 * - tor_glob - receives a pattern and returns all the paths that result from
323 * its glob expansion, globs can be present on all path components.
324 * - get_glob_opened_files - receives a pattern and returns all the paths that
325 * are opened during its expansion (the paths before any path fragment that
326 * contains a glob as they have to be opened to check for glob matches). This
327 * is used to get the paths that have to be added to the seccomp sandbox
330 * Due to OS API differences explained below, the implementation of tor_glob is
331 * completely different for Windows and POSIX systems, so we ended up with
332 * three different implementations:
333 * - tor_glob for POSIX - as POSIX glob does everything we need, we simply call
334 * it and process the results. This is completely implemented in tor_glob.
335 * - tor_glob for WIN32 - because the WIN32 API only supports expanding globs
336 * in the last path fragment, we need to expand the globs in each path
337 * fragment manually and call recursively to get the same behaviour as POSIX
338 * glob. When there are no globs in pattern, we know we are on the last path
339 * fragment and collect the full path.
340 * - get_glob_opened_files - because the paths before any path fragment with a
341 * glob will be opened to check for matches, we need to collect them and we
342 * need to expand the globs in each path fragments and call recursively until
343 * we find no more globs.
345 * As seen from the description above, both tor_glob for WIN32 and
346 * get_glob_opened_files receive a pattern and return a list of paths and have
347 * to expand all path fragments that contain globs and call themselves
348 * recursively. The differences are:
349 * - get_glob_opened_files collects paths before path fragments with globs
350 * while tor_glob for WIN32 collects full paths resulting from the expansion
352 * - get_glob_opened_files can call tor_glob to expand path fragments with
353 * globs while tor_glob for WIN32 cannot because it IS tor_glob. For tor_glob
354 * for WIN32, an auxiliary function has to be used for this purpose.
356 * To avoid code duplication, the logic of tor_glob for WIN32 and
357 * get_glob_opened_files is implemented in get_glob_paths. The differences are
358 * configured by the extra function parameters:
359 * - final - if true, returns a list of paths obtained from expanding pattern
360 * (implements tor_glob). Otherwise, returns the paths before path fragments
361 * with globs (implements get_glob_opened_files).
362 * - unglob - function used to expand a path fragment. The function signature
363 * is defined by the unglob_fn typedef. Two implementations are available:
364 * - unglob_win32 - uses tor_listdir and PathMatchSpec (for tor_glob WIN32)
365 * - unglob_opened_files - uses tor_glob (for get_glob_opened_files)
368 /** Returns true if the character at position <b>pos</b> in <b>pattern</b> is
369 * considered a glob. Returns false otherwise. Takes escaping into account on
370 * systems where escaping globs is supported. */
372 is_glob_char(const char *pattern
, int pos
)
374 bool is_glob
= pattern
[pos
] == '*' || pattern
[pos
] == '?';
377 #else /* !defined(_WIN32) */
378 bool is_escaped
= pos
> 0 && pattern
[pos
-1] == '\\';
379 return is_glob
&& !is_escaped
;
380 #endif /* defined(_WIN32) */
383 /** Expands the first path fragment of <b>pattern</b> that contains globs. The
384 * path fragment is between <b>prev_sep</b> and <b>next_sep</b>. If the path
385 * fragment is the last fragment of <b>pattern</b>, <b>next_sep</b> will be the
386 * index of the last char. Returns a list of paths resulting from the glob
387 * expansion of the path fragment. Anything after <b>next_sep</b> is not
388 * included in the returned list. Returns NULL on failure. */
389 typedef struct smartlist_t
* unglob_fn(const char *pattern
, int prev_sep
,
392 /** Adds <b>path</b> to <b>result</b> if it exists and is a file type we can
393 * handle. Returns false if <b>path</b> is a file type we cannot handle,
394 * returns true otherwise. Used on tor_glob for WIN32. */
396 add_non_glob_path(const char *path
, struct smartlist_t
*result
)
398 file_status_t file_type
= file_status(path
);
399 if (file_type
== FN_ERROR
) {
401 } else if (file_type
!= FN_NOENT
) {
402 char *to_add
= tor_strdup(path
);
403 clean_fname_for_stat(to_add
);
404 smartlist_add(result
, to_add
);
406 /* If WIN32 tor_glob is called with a non-existing path, we want it to
407 * return an empty list instead of error to match the regular version */
411 /** Auxiliary function used by get_glob_opened_files and WIN32 tor_glob.
412 * Returns a list of paths obtained from <b>pattern</b> using <b>unglob</b> to
413 * expand each path fragment. If <b>final</b> is true, the paths are the result
414 * of the glob expansion of <b>pattern</b> (implements tor_glob). Otherwise,
415 * the paths are the paths opened by glob while expanding <b>pattern</b>
416 * (implements get_glob_opened_files). Returns NULL on failure. */
417 static struct smartlist_t
*
418 get_glob_paths(const char *pattern
, unglob_fn unglob
, bool final
)
420 smartlist_t
*result
= smartlist_new();
421 int i
, prev_sep
= -1, next_sep
= -1;
422 bool is_glob
= false, error_found
= false, is_sep
= false, is_last
= false;
424 // find first path fragment with globs
425 for (i
= 0; pattern
[i
]; i
++) {
426 is_glob
= is_glob
|| is_glob_char(pattern
, i
);
427 is_last
= !pattern
[i
+1];
428 is_sep
= pattern
[i
] == *PATH_SEPARATOR
|| pattern
[i
] == '/';
429 if (is_sep
|| is_last
) {
431 next_sep
= i
; // next_sep+1 is start of next fragment or end of string
438 if (!is_glob
) { // pattern fully expanded or no glob in pattern
439 if (final
&& !add_non_glob_path(pattern
, result
)) {
447 // add path before the glob to result
448 int len
= prev_sep
< 1 ? prev_sep
+ 1 : prev_sep
; // handle /*
449 char *path_until_glob
= tor_strndup(pattern
, len
);
450 smartlist_add(result
, path_until_glob
);
453 smartlist_t
*unglobbed_paths
= unglob(pattern
, prev_sep
, next_sep
);
454 if (!unglobbed_paths
) {
457 // for each path for current fragment, add the rest of the pattern
458 // and call recursively to get all expanded paths
459 SMARTLIST_FOREACH_BEGIN(unglobbed_paths
, char *, current_path
) {
461 tor_asprintf(&next_path
, "%s"PATH_SEPARATOR
"%s", current_path
,
462 &pattern
[next_sep
+1]);
463 smartlist_t
*opened_next
= get_glob_paths(next_path
, unglob
, final
);
469 smartlist_add_all(result
, opened_next
);
470 smartlist_free(opened_next
);
471 } SMARTLIST_FOREACH_END(current_path
);
472 SMARTLIST_FOREACH(unglobbed_paths
, char *, p
, tor_free(p
));
473 smartlist_free(unglobbed_paths
);
478 SMARTLIST_FOREACH(result
, char *, p
, tor_free(p
));
479 smartlist_free(result
);
486 /** Expands globs in <b>pattern</b> for the path fragment between
487 * <b>prev_sep</b> and <b>next_sep</b> using the WIN32 API. Returns NULL on
488 * failure. Used by the WIN32 implementation of tor_glob. Implements unglob_fn,
489 * see its description for more details. */
490 static struct smartlist_t
*
491 unglob_win32(const char *pattern
, int prev_sep
, int next_sep
)
493 smartlist_t
*result
= smartlist_new();
494 int len
= prev_sep
< 1 ? prev_sep
+ 1 : prev_sep
; // handle /*
495 char *path_until_glob
= tor_strndup(pattern
, len
);
497 if (!is_file(file_status(path_until_glob
))) {
498 smartlist_t
*filenames
= tor_listdir(path_until_glob
);
500 smartlist_free(result
);
503 SMARTLIST_FOREACH_BEGIN(filenames
, char *, filename
) {
504 TCHAR tpattern
[MAX_PATH
] = {0};
505 TCHAR tfile
[MAX_PATH
] = {0};
507 tor_asprintf(&full_path
, "%s"PATH_SEPARATOR
"%s",
508 path_until_glob
, filename
);
509 char *path_curr_glob
= tor_strndup(pattern
, next_sep
+ 1);
510 // *\ must return only dirs, remove \ from the pattern so it matches
511 if (is_dir(file_status(full_path
))) {
512 clean_fname_for_stat(path_curr_glob
);
515 mbstowcs(tpattern
, path_curr_glob
, MAX_PATH
);
516 mbstowcs(tfile
, full_path
, MAX_PATH
);
517 #else /* !defined(UNICODE) */
518 strlcpy(tpattern
, path_curr_glob
, MAX_PATH
);
519 strlcpy(tfile
, full_path
, MAX_PATH
);
520 #endif /* defined(UNICODE) */
521 if (PathMatchSpec(tfile
, tpattern
)) {
522 smartlist_add(result
, full_path
);
526 tor_free(path_curr_glob
);
527 } SMARTLIST_FOREACH_END(filename
);
528 SMARTLIST_FOREACH(filenames
, char *, p
, tor_free(p
));
529 smartlist_free(filenames
);
532 tor_free(path_until_glob
);
536 /** Same as opendir but calls sandbox_intern_string before */
538 prot_opendir(const char *name
)
540 if (sandbox_interned_string_is_missing(name
)) {
544 return opendir(sandbox_intern_string(name
));
547 /** Same as stat but calls sandbox_intern_string before */
549 prot_stat(const char *pathname
, struct stat
*buf
)
551 if (sandbox_interned_string_is_missing(pathname
)) {
555 return stat(sandbox_intern_string(pathname
), buf
);
558 /** Same as lstat but calls sandbox_intern_string before */
560 prot_lstat(const char *pathname
, struct stat
*buf
)
562 if (sandbox_interned_string_is_missing(pathname
)) {
566 return lstat(sandbox_intern_string(pathname
), buf
);
568 /** As closedir, but has the right type for gl_closedir */
570 wrap_closedir(void *arg
)
574 #endif /* defined(HAVE_GLOB) */
576 /** Return a new list containing the paths that match the pattern
577 * <b>pattern</b>. Return NULL on error. On POSIX systems, errno is set by the
578 * glob function or is set to EPERM if glob tried to access a file not allowed
579 * by the seccomp sandbox.
582 tor_glob(const char *pattern
)
584 smartlist_t
*result
= NULL
;
587 // PathMatchSpec does not support forward slashes, change them to backslashes
588 char *pattern_normalized
= tor_strdup(pattern
);
589 tor_strreplacechar(pattern_normalized
, '/', *PATH_SEPARATOR
);
590 result
= get_glob_paths(pattern_normalized
, unglob_win32
, true);
591 tor_free(pattern_normalized
);
592 #elif HAVE_GLOB /* !(defined(_WIN32)) */
594 int flags
= GLOB_ERR
| GLOB_NOSORT
;
595 #ifdef GLOB_ALTDIRFUNC
596 /* use functions that call sandbox_intern_string */
597 flags
|= GLOB_ALTDIRFUNC
;
598 typedef void *(*gl_opendir
)(const char * name
);
599 typedef struct dirent
*(*gl_readdir
)(void *);
600 typedef void (*gl_closedir
)(void *);
601 matches
.gl_opendir
= (gl_opendir
) &prot_opendir
;
602 matches
.gl_readdir
= (gl_readdir
) &readdir
;
603 matches
.gl_closedir
= (gl_closedir
) &wrap_closedir
;
604 matches
.gl_stat
= &prot_stat
;
605 matches
.gl_lstat
= &prot_lstat
;
606 #endif /* defined(GLOB_ALTDIRFUNC) */
607 int ret
= glob(pattern
, flags
, NULL
, &matches
);
608 if (ret
== GLOB_NOMATCH
) {
609 return smartlist_new();
610 } else if (ret
!= 0) {
614 // #40141: workaround for bug in glibc < 2.19 where patterns ending in path
615 // separator match files and folders instead of folders only
616 size_t pattern_len
= strlen(pattern
);
617 bool dir_only
= has_glob(pattern
) &&
618 pattern_len
> 0 && pattern
[pattern_len
-1] == *PATH_SEPARATOR
;
620 result
= smartlist_new();
622 for (i
= 0; i
< matches
.gl_pathc
; i
++) {
623 char *match
= tor_strdup(matches
.gl_pathv
[i
]);
624 size_t len
= strlen(match
);
625 if (len
> 0 && match
[len
-1] == *PATH_SEPARATOR
) {
629 if (!dir_only
|| (dir_only
&& is_dir(file_status(match
)))) {
630 smartlist_add(result
, match
);
639 #endif /* !defined(HAVE_GLOB) */
644 /** Returns true if <b>s</b> contains characters that can be globbed.
645 * Returns false otherwise. */
647 has_glob(const char *s
)
650 for (i
= 0; s
[i
]; i
++) {
651 if (is_glob_char(s
, i
)) {
658 /** Expands globs in <b>pattern</b> for the path fragment between
659 * <b>prev_sep</b> and <b>next_sep</b> using tor_glob. Returns NULL on
660 * failure. Used by get_glob_opened_files. Implements unglob_fn, see its
661 * description for more details. */
662 static struct smartlist_t
*
663 unglob_opened_files(const char *pattern
, int prev_sep
, int next_sep
)
666 smartlist_t
*result
= smartlist_new();
667 // if the following fragments have no globs, we're done
668 if (has_glob(&pattern
[next_sep
+1])) {
669 // if there is a glob after next_sep, we know next_sep is a separator and
670 // not the last char and glob_path will have the path without the separator
671 char *glob_path
= tor_strndup(pattern
, next_sep
);
672 smartlist_t
*child_paths
= tor_glob(glob_path
);
675 smartlist_free(result
);
678 smartlist_add_all(result
, child_paths
);
679 smartlist_free(child_paths
);
685 /** Returns a list of files that are opened by the tor_glob function when
686 * called with <b>pattern</b>. Returns NULL on error. The purpose of this
687 * function is to create a list of files to be added to the sandbox white list
688 * before the sandbox is enabled. */
690 get_glob_opened_files(const char *pattern
)
692 return get_glob_paths(pattern
, unglob_opened_files
, false);