exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / canonicalize.c
blob52e9b3b371cc637c365bac1ac9f15a4dd1ce3054
1 /* Return the canonical absolute name of a given file.
2 Copyright (C) 1996-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "canonicalize.h"
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <string.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
27 #include <filename.h>
28 #include <idx.h>
29 #include <intprops.h>
30 #include <scratch_buffer.h>
32 #include "attribute.h"
33 #include "file-set.h"
34 #include "hash-triple.h"
35 #include "xalloc.h"
37 #ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT
38 # define DOUBLE_SLASH_IS_DISTINCT_ROOT false
39 #endif
41 #if ISSLASH ('\\')
42 # define SLASHES "/\\"
43 #else
44 # define SLASHES "/"
45 #endif
47 /* Avoid false GCC warning "'end_idx' may be used uninitialized". */
48 #if __GNUC__ + (__GNUC_MINOR__ >= 7) > 4
49 # pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
50 #endif
52 /* Return true if FILE's existence can be shown, false (setting errno)
53 otherwise. Follow symbolic links. */
54 static bool
55 file_accessible (char const *file)
57 # if HAVE_FACCESSAT
58 return faccessat (AT_FDCWD, file, F_OK, AT_EACCESS) == 0;
59 # else
60 struct stat st;
61 return stat (file, &st) == 0 || errno == EOVERFLOW;
62 # endif
65 /* True if concatenating END as a suffix to a file name means that the
66 code needs to check that the file name is that of a searchable
67 directory, since the canonicalize_filename_mode_stk code won't
68 check this later anyway when it checks an ordinary file name
69 component within END. END must either be empty, or start with a
70 slash. */
72 static bool _GL_ATTRIBUTE_PURE
73 suffix_requires_dir_check (char const *end)
75 /* If END does not start with a slash, the suffix is OK. */
76 while (ISSLASH (*end))
78 /* Two or more slashes act like a single slash. */
80 end++;
81 while (ISSLASH (*end));
83 switch (*end++)
85 default: return false; /* An ordinary file name component is OK. */
86 case '\0': return true; /* Trailing "/" is trouble. */
87 case '.': break; /* Possibly "." or "..". */
89 /* Trailing "/.", or "/.." even if not trailing, is trouble. */
90 if (!*end || (*end == '.' && (!end[1] || ISSLASH (end[1]))))
91 return true;
94 return false;
97 /* Append this to a file name to test whether it is a searchable directory.
98 On POSIX platforms "/" suffices, but "/./" is sometimes needed on
99 macOS 10.13 <https://bugs.gnu.org/30350>, and should also work on
100 platforms like AIX 7.2 that need at least "/.". */
102 #ifdef LSTAT_FOLLOWS_SLASHED_SYMLINK
103 static char const dir_suffix[] = "/";
104 #else
105 static char const dir_suffix[] = "/./";
106 #endif
108 /* Return true if DIR is a searchable dir, false (setting errno) otherwise.
109 DIREND points to the NUL byte at the end of the DIR string.
110 Store garbage into DIREND[0 .. strlen (dir_suffix)]. */
112 static bool
113 dir_check (char *dir, char *dirend)
115 strcpy (dirend, dir_suffix);
116 return file_accessible (dir);
119 #if !((HAVE_CANONICALIZE_FILE_NAME && FUNC_REALPATH_WORKS) \
120 || GNULIB_CANONICALIZE_LGPL)
121 /* Return the canonical absolute name of file NAME. A canonical name
122 does not contain any ".", ".." components nor any repeated file name
123 separators ('/') or symlinks. All components must exist.
124 The result is malloc'd. */
126 char *
127 canonicalize_file_name (const char *name)
129 return canonicalize_filename_mode (name, CAN_EXISTING);
131 #endif /* !HAVE_CANONICALIZE_FILE_NAME */
133 static bool
134 multiple_bits_set (canonicalize_mode_t i)
136 return (i & (i - 1)) != 0;
139 /* Return true if we've already seen the triple, <FILENAME, dev, ino>.
140 If *HT is not initialized, initialize it. */
141 static bool
142 seen_triple (Hash_table **ht, char const *filename, struct stat const *st)
144 if (*ht == NULL)
146 idx_t initial_capacity = 7;
147 *ht = hash_initialize (initial_capacity,
148 NULL,
149 triple_hash,
150 triple_compare_ino_str,
151 triple_free);
152 if (*ht == NULL)
153 xalloc_die ();
156 if (seen_file (*ht, filename, st))
157 return true;
159 record_file (*ht, filename, st);
160 return false;
163 /* Scratch buffers used by canonicalize_filename_mode_stk and managed
164 by __realpath. */
165 struct realpath_bufs
167 struct scratch_buffer rname;
168 struct scratch_buffer extra;
169 struct scratch_buffer link;
172 static char *
173 canonicalize_filename_mode_stk (const char *name, canonicalize_mode_t can_mode,
174 struct realpath_bufs *bufs)
176 char *dest;
177 char const *start;
178 char const *end;
179 Hash_table *ht = NULL;
180 bool logical = (can_mode & CAN_NOLINKS) != 0;
181 int num_links = 0;
183 canonicalize_mode_t can_exist = can_mode & CAN_MODE_MASK;
184 if (multiple_bits_set (can_exist))
186 errno = EINVAL;
187 return NULL;
190 if (name == NULL)
192 errno = EINVAL;
193 return NULL;
196 if (name[0] == '\0')
198 errno = ENOENT;
199 return NULL;
202 char *rname = bufs->rname.data;
203 bool end_in_extra_buffer = false;
204 bool failed = true;
206 /* This is always zero for Posix hosts, but can be 2 for MS-Windows
207 and MS-DOS X:/foo/bar file names. */
208 idx_t prefix_len = FILE_SYSTEM_PREFIX_LEN (name);
210 if (!IS_ABSOLUTE_FILE_NAME (name))
212 while (!getcwd (bufs->rname.data, bufs->rname.length))
214 switch (errno)
216 case ERANGE:
217 if (scratch_buffer_grow (&bufs->rname))
218 break;
219 FALLTHROUGH;
220 case ENOMEM:
221 xalloc_die ();
223 default:
224 dest = rname;
225 goto error;
227 rname = bufs->rname.data;
229 dest = rawmemchr (rname, '\0');
230 start = name;
231 prefix_len = FILE_SYSTEM_PREFIX_LEN (rname);
233 else
235 dest = mempcpy (rname, name, prefix_len);
236 *dest++ = '/';
237 if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
239 if (prefix_len == 0 /* implies ISSLASH (name[0]) */
240 && ISSLASH (name[1]) && !ISSLASH (name[2]))
242 *dest++ = '/';
243 #if defined _WIN32 && !defined __CYGWIN__
244 /* For UNC file names '\\server\path\to\file', extend the prefix
245 to include the server: '\\server\'. */
247 idx_t i;
248 for (i = 2; name[i] != '\0' && !ISSLASH (name[i]); )
249 i++;
250 if (name[i] != '\0' /* implies ISSLASH (name[i]) */
251 && i + 1 < bufs->rname.length)
253 prefix_len = i;
254 memcpy (dest, name + 2, i - 2 + 1);
255 dest += i - 2 + 1;
257 else
259 /* Either name = '\\server'; this is an invalid file name.
260 Or name = '\\server\...' and server is more than
261 bufs->rname.length - 4 bytes long. In either
262 case, stop the UNC processing. */
265 #endif
267 *dest = '\0';
269 start = name + prefix_len;
272 for ( ; *start; start = end)
274 /* Skip sequence of multiple file name separators. */
275 while (ISSLASH (*start))
276 ++start;
278 /* Find end of component. */
279 for (end = start; *end && !ISSLASH (*end); ++end)
280 /* Nothing. */;
282 /* Length of this file name component; it can be zero if a file
283 name ends in '/'. */
284 idx_t startlen = end - start;
286 if (startlen == 0)
287 break;
288 else if (startlen == 1 && start[0] == '.')
289 /* nothing */;
290 else if (startlen == 2 && start[0] == '.' && start[1] == '.')
292 /* Back up to previous component, ignore if at root already. */
293 if (dest > rname + prefix_len + 1)
294 for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest)
295 continue;
296 if (DOUBLE_SLASH_IS_DISTINCT_ROOT
297 && dest == rname + 1 && !prefix_len
298 && ISSLASH (*dest) && !ISSLASH (dest[1]))
299 dest++;
301 else
303 if (!ISSLASH (dest[-1]))
304 *dest++ = '/';
306 while (rname + bufs->rname.length - dest
307 < startlen + sizeof dir_suffix)
309 idx_t dest_offset = dest - rname;
310 if (!scratch_buffer_grow_preserve (&bufs->rname))
311 xalloc_die ();
312 rname = bufs->rname.data;
313 dest = rname + dest_offset;
316 dest = mempcpy (dest, start, startlen);
317 *dest = '\0';
319 char *buf;
320 ssize_t n = -1;
321 if (!logical)
323 while (true)
325 buf = bufs->link.data;
326 idx_t bufsize = bufs->link.length;
327 n = readlink (rname, buf, bufsize - 1);
328 if (n < bufsize - 1)
329 break;
330 if (!scratch_buffer_grow (&bufs->link))
331 xalloc_die ();
334 if (0 <= n)
336 /* A physical traversal and RNAME is a symbolic link. */
338 if (num_links < 20)
339 num_links++;
340 else if (*start)
342 /* Enough symlinks have been seen that it is time to
343 worry about being in a symlink cycle.
344 Get the device and inode of the parent directory, as
345 pre-2017 POSIX says this info is not reliable for
346 symlinks. */
347 struct stat st;
348 dest[- startlen] = '\0';
349 if (stat (*rname ? rname : ".", &st) != 0)
350 goto error;
351 dest[- startlen] = *start;
353 /* Detect loops. We cannot use the cycle-check module here,
354 since it's possible to encounter the same parent
355 directory more than once in a given traversal. However,
356 encountering the same (parentdir, START) pair twice does
357 indicate a loop. */
358 if (seen_triple (&ht, start, &st))
360 if (can_exist == CAN_MISSING)
361 continue;
362 errno = ELOOP;
363 goto error;
367 buf[n] = '\0';
369 char *extra_buf = bufs->extra.data;
370 idx_t end_idx;
371 if (end_in_extra_buffer)
372 end_idx = end - extra_buf;
373 size_t len = strlen (end);
374 if (INT_ADD_OVERFLOW (len, n))
375 xalloc_die ();
376 while (bufs->extra.length <= len + n)
378 if (!scratch_buffer_grow_preserve (&bufs->extra))
379 xalloc_die ();
380 extra_buf = bufs->extra.data;
382 if (end_in_extra_buffer)
383 end = extra_buf + end_idx;
385 /* Careful here, end may be a pointer into extra_buf... */
386 memmove (&extra_buf[n], end, len + 1);
387 name = end = memcpy (extra_buf, buf, n);
388 end_in_extra_buffer = true;
390 if (IS_ABSOLUTE_FILE_NAME (buf))
392 idx_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf);
394 dest = mempcpy (rname, buf, pfxlen);
395 *dest++ = '/'; /* It's an absolute symlink */
396 if (DOUBLE_SLASH_IS_DISTINCT_ROOT)
398 if (ISSLASH (buf[1]) && !ISSLASH (buf[2]) && !pfxlen)
399 *dest++ = '/';
400 *dest = '\0';
402 /* Install the new prefix to be in effect hereafter. */
403 prefix_len = pfxlen;
405 else
407 /* Back up to previous component, ignore if at root
408 already: */
409 if (dest > rname + prefix_len + 1)
410 for (--dest; dest > rname && !ISSLASH (dest[-1]); --dest)
411 continue;
412 if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1
413 && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len)
414 dest++;
417 else if (! (can_exist == CAN_MISSING
418 || (suffix_requires_dir_check (end)
419 ? dir_check (rname, dest)
420 : !logical
421 ? errno == EINVAL
422 : *end || file_accessible (rname))
423 || (can_exist == CAN_ALL_BUT_LAST
424 && errno == ENOENT
425 && !end[strspn (end, SLASHES)])))
426 goto error;
429 if (dest > rname + prefix_len + 1 && ISSLASH (dest[-1]))
430 --dest;
431 if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len
432 && ISSLASH (*dest) && !ISSLASH (dest[1]))
433 dest++;
434 failed = false;
436 error:
437 if (ht)
438 hash_free (ht);
440 if (failed)
441 return NULL;
443 *dest++ = '\0';
444 char *result = malloc (dest - rname);
445 if (!result)
446 xalloc_die ();
447 return memcpy (result, rname, dest - rname);
450 /* Return the canonical absolute name of file NAME, while treating
451 missing elements according to CAN_MODE. A canonical name
452 does not contain any ".", ".." components nor any repeated file name
453 separators ('/') or, depending on other CAN_MODE flags, symlinks.
454 Whether components must exist or not depends on canonicalize mode.
455 The result is malloc'd. */
457 char *
458 canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
460 struct realpath_bufs bufs;
461 scratch_buffer_init (&bufs.rname);
462 scratch_buffer_init (&bufs.extra);
463 scratch_buffer_init (&bufs.link);
464 char *result = canonicalize_filename_mode_stk (name, can_mode, &bufs);
465 scratch_buffer_free (&bufs.link);
466 scratch_buffer_free (&bufs.extra);
467 scratch_buffer_free (&bufs.rname);
468 return result;