CI: Update Upload Artifact Action.
[xz.git] / src / xz / file_io.c
blob4a2c8392fa68f9387557076909b83a7644659d12
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file file_io.c
4 /// \brief File opening, unlinking, and closing
5 //
6 // Author: Lasse Collin
7 //
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
13 #include "private.h"
15 #include <fcntl.h>
17 #ifdef TUKLIB_DOSLIKE
18 # include <io.h>
19 #else
20 # include <poll.h>
21 static bool warn_fchown;
22 #endif
24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25 # include <sys/time.h>
26 #elif defined(HAVE__FUTIME)
27 # include <sys/utime.h>
28 #elif defined(HAVE_UTIME)
29 # include <utime.h>
30 #endif
32 #ifdef HAVE_CAP_RIGHTS_LIMIT
33 # include <sys/capsicum.h>
34 #endif
36 #ifdef HAVE_LINUX_LANDLOCK_H
37 # include <linux/landlock.h>
38 # include <sys/syscall.h>
39 #endif
41 #include "tuklib_open_stdxxx.h"
43 #ifdef _MSC_VER
44 # ifdef _WIN64
45 typedef __int64 ssize_t;
46 # else
47 typedef int ssize_t;
48 # endif
50 typedef int mode_t;
51 # define S_IRUSR _S_IREAD
52 # define S_IWUSR _S_IWRITE
54 # define setmode _setmode
55 # define open _open
56 # define close _close
57 # define lseek _lseeki64
58 # define unlink _unlink
60 // The casts are to silence warnings.
61 // The sizes are known to be small enough.
62 # define read(fd, buf, size) _read(fd, buf, (unsigned int)(size))
63 # define write(fd, buf, size) _write(fd, buf, (unsigned int)(size))
65 # define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
66 # define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
67 #endif
69 #ifndef O_BINARY
70 # define O_BINARY 0
71 #endif
73 #ifndef O_NOCTTY
74 # define O_NOCTTY 0
75 #endif
77 // Using this macro to silence a warning from gcc -Wlogical-op.
78 #if EAGAIN == EWOULDBLOCK
79 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
80 #else
81 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \
82 ((e) == EAGAIN || (e) == EWOULDBLOCK)
83 #endif
86 typedef enum {
87 IO_WAIT_MORE, // Reading or writing is possible.
88 IO_WAIT_ERROR, // Error or user_abort
89 IO_WAIT_TIMEOUT, // poll() timed out
90 } io_wait_ret;
93 /// If true, try to create sparse files when decompressing.
94 static bool try_sparse = true;
96 #ifdef ENABLE_SANDBOX
97 /// True if the conditions for sandboxing (described in main()) have been met.
98 static bool sandbox_allowed = false;
99 #endif
101 #ifndef TUKLIB_DOSLIKE
102 /// File status flags of standard input. This is used by io_open_src()
103 /// and io_close_src().
104 static int stdin_flags;
105 static bool restore_stdin_flags = false;
107 /// Original file status flags of standard output. This is used by
108 /// io_open_dest() and io_close_dest() to save and restore the flags.
109 static int stdout_flags;
110 static bool restore_stdout_flags = false;
112 /// Self-pipe used together with the user_abort variable to avoid
113 /// race conditions with signal handling.
114 static int user_abort_pipe[2];
115 #endif
118 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
121 extern void
122 io_init(void)
124 // Make sure that stdin, stdout, and stderr are connected to
125 // a valid file descriptor. Exit immediately with exit code ERROR
126 // if we cannot make the file descriptors valid. Maybe we should
127 // print an error message, but our stderr could be screwed anyway.
128 tuklib_open_stdxxx(E_ERROR);
130 #ifndef TUKLIB_DOSLIKE
131 // If fchown() fails setting the owner, we warn about it only if
132 // we are root.
133 warn_fchown = geteuid() == 0;
135 // Create a pipe for the self-pipe trick.
136 if (pipe(user_abort_pipe))
137 message_fatal(_("Error creating a pipe: %s"),
138 strerror(errno));
140 // Make both ends of the pipe non-blocking.
141 for (unsigned i = 0; i < 2; ++i) {
142 int flags = fcntl(user_abort_pipe[i], F_GETFL);
143 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
144 flags | O_NONBLOCK) == -1)
145 message_fatal(_("Error creating a pipe: %s"),
146 strerror(errno));
148 #endif
150 #ifdef __DJGPP__
151 // Avoid doing useless things when statting files.
152 // This isn't important but doesn't hurt.
153 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
154 #endif
156 return;
160 #ifndef TUKLIB_DOSLIKE
161 extern void
162 io_write_to_user_abort_pipe(void)
164 // If the write() fails, it's probably due to the pipe being full.
165 // Failing in that case is fine. If the reason is something else,
166 // there's not much we can do since this is called in a signal
167 // handler. So ignore the errors and try to avoid warnings with
168 // GCC and glibc when _FORTIFY_SOURCE=2 is used.
169 uint8_t b = '\0';
170 const ssize_t ret = write(user_abort_pipe[1], &b, 1);
171 (void)ret;
172 return;
174 #endif
177 extern void
178 io_no_sparse(void)
180 try_sparse = false;
181 return;
185 #ifdef ENABLE_SANDBOX
186 extern void
187 io_allow_sandbox(void)
189 sandbox_allowed = true;
190 return;
194 /// Enables operating-system-specific sandbox if it is possible.
195 /// src_fd is the file descriptor of the input file.
196 static void
197 io_sandbox_enter(int src_fd)
199 if (!sandbox_allowed) {
200 // This message is more often annoying than useful so
201 // it's commented out. It can be useful when developing
202 // the sandboxing code.
203 //message(V_DEBUG, _("Sandbox is disabled due "
204 // "to incompatible command line arguments"));
205 return;
208 const char dummy_str[] = "x";
210 // Try to ensure that both libc and xz locale files have been
211 // loaded when NLS is enabled.
212 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL));
214 // Try to ensure that iconv data files needed for handling multibyte
215 // characters have been loaded. This is needed at least with glibc.
216 tuklib_mbstr_width(dummy_str, NULL);
218 #ifdef HAVE_CAP_RIGHTS_LIMIT
219 // Capsicum needs FreeBSD 10.2 or later.
220 cap_rights_t rights;
222 if (cap_enter())
223 goto error;
225 if (cap_rights_limit(src_fd, cap_rights_init(&rights,
226 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK)))
227 goto error;
229 if (src_fd != STDIN_FILENO && cap_rights_limit(
230 STDIN_FILENO, cap_rights_clear(&rights)))
231 goto error;
233 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
234 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP,
235 CAP_WRITE, CAP_SEEK)))
236 goto error;
238 if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights,
239 CAP_WRITE)))
240 goto error;
242 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights,
243 CAP_EVENT)))
244 goto error;
246 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights,
247 CAP_WRITE)))
248 goto error;
250 #elif defined(HAVE_PLEDGE)
251 // pledge() was introduced in OpenBSD 5.9.
253 // main() unconditionally calls pledge() with fairly relaxed
254 // promises which work in all situations. Here we make the
255 // sandbox more strict.
256 if (pledge("stdio", ""))
257 goto error;
259 (void)src_fd;
261 #elif defined(HAVE_LINUX_LANDLOCK_H)
262 int landlock_abi = syscall(SYS_landlock_create_ruleset,
263 (void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
265 if (landlock_abi > 0) {
266 // We support ABI versions 1-3.
267 if (landlock_abi > 3)
268 landlock_abi = 3;
270 // We want to set all supported flags in handled_access_fs.
271 // This way the ruleset will initially forbid access to all
272 // actions that the available Landlock ABI version supports.
273 // Exceptions can be added using landlock_add_rule(2) to
274 // allow certain actions on certain files or directories.
276 // The same flag values are used on all archs. ABI v2 and v3
277 // both add one new flag.
279 // First in ABI v1: LANDLOCK_ACCESS_FS_EXECUTE = 1ULL << 0
280 // Last in ABI v1: LANDLOCK_ACCESS_FS_MAKE_SYM = 1ULL << 12
281 // Last in ABI v2: LANDLOCK_ACCESS_FS_REFER = 1ULL << 13
282 // Last in ABI v3: LANDLOCK_ACCESS_FS_TRUNCATE = 1ULL << 14
284 // This makes it simple to set the mask based on the ABI
285 // version and we don't need to care which flags are #defined
286 // in the installed <linux/landlock.h>.
287 const struct landlock_ruleset_attr attr = {
288 .handled_access_fs = (1ULL << (12 + landlock_abi)) - 1
291 const int ruleset_fd = syscall(SYS_landlock_create_ruleset,
292 &attr, sizeof(attr), 0U);
293 if (ruleset_fd < 0)
294 goto error;
296 // All files we need should have already been opened. Thus,
297 // we don't need to add any rules using landlock_add_rule(2)
298 // before activating the sandbox.
300 // NOTE: It's possible that the hack at the beginning of this
301 // function isn't be good enough. It tries to get translations
302 // and libc-specific files loaded but if it's not good enough
303 // then perhaps a Landlock rule to allow reading from /usr
304 // and/or the xz installation prefix would be needed.
306 // prctl(PR_SET_NO_NEW_PRIVS, ...) was already called in
307 // main() so we don't do it here again.
308 if (syscall(SYS_landlock_restrict_self, ruleset_fd, 0U) != 0)
309 goto error;
312 (void)src_fd;
314 #else
315 # error ENABLE_SANDBOX is defined but no sandboxing method was found.
316 #endif
318 // This message is annoying in xz -lvv.
319 //message(V_DEBUG, _("Sandbox was successfully enabled"));
320 return;
322 error:
323 #ifdef HAVE_CAP_RIGHTS_LIMIT
324 // If a kernel is configured without capability mode support or
325 // used in an emulator that does not implement the capability
326 // system calls, then the Capsicum system calls will fail and set
327 // errno to ENOSYS. In that case xz will silently run without
328 // the sandbox.
329 if (errno == ENOSYS)
330 return;
331 #endif
332 message_fatal(_("Failed to enable the sandbox"));
334 #endif // ENABLE_SANDBOX
337 #ifndef TUKLIB_DOSLIKE
338 /// \brief Waits for input or output to become available or for a signal
340 /// This uses the self-pipe trick to avoid a race condition that can occur
341 /// if a signal is caught after user_abort has been checked but before e.g.
342 /// read() has been called. In that situation read() could block unless
343 /// non-blocking I/O is used. With non-blocking I/O something like select()
344 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
345 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
346 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
347 /// old and very portable.
348 static io_wait_ret
349 io_wait(file_pair *pair, int timeout, bool is_reading)
351 struct pollfd pfd[2];
353 if (is_reading) {
354 pfd[0].fd = pair->src_fd;
355 pfd[0].events = POLLIN;
356 } else {
357 pfd[0].fd = pair->dest_fd;
358 pfd[0].events = POLLOUT;
361 pfd[1].fd = user_abort_pipe[0];
362 pfd[1].events = POLLIN;
364 while (true) {
365 const int ret = poll(pfd, 2, timeout);
367 if (user_abort)
368 return IO_WAIT_ERROR;
370 if (ret == -1) {
371 if (errno == EINTR || errno == EAGAIN)
372 continue;
374 message_error(_("%s: poll() failed: %s"),
375 is_reading ? pair->src_name
376 : pair->dest_name,
377 strerror(errno));
378 return IO_WAIT_ERROR;
381 if (ret == 0)
382 return IO_WAIT_TIMEOUT;
384 if (pfd[0].revents != 0)
385 return IO_WAIT_MORE;
388 #endif
391 /// \brief Unlink a file
393 /// This tries to verify that the file being unlinked really is the file that
394 /// we want to unlink by verifying device and inode numbers. There's still
395 /// a small unavoidable race, but this is much better than nothing (the file
396 /// could have been moved/replaced even hours earlier).
397 static void
398 io_unlink(const char *name, const struct stat *known_st)
400 #if defined(TUKLIB_DOSLIKE)
401 // On DOS-like systems, st_ino is meaningless, so don't bother
402 // testing it. Just silence a compiler warning.
403 (void)known_st;
404 #else
405 struct stat new_st;
407 // If --force was used, use stat() instead of lstat(). This way
408 // (de)compressing symlinks works correctly. However, it also means
409 // that xz cannot detect if a regular file foo is renamed to bar
410 // and then a symlink foo -> bar is created. Because of stat()
411 // instead of lstat(), xz will think that foo hasn't been replaced
412 // with another file. Thus, xz will remove foo even though it no
413 // longer is the same file that xz used when it started compressing.
414 // Probably it's not too bad though, so this doesn't need a more
415 // complex fix.
416 const int stat_ret = opt_force
417 ? stat(name, &new_st) : lstat(name, &new_st);
419 if (stat_ret
420 # ifdef __VMS
421 // st_ino is an array, and we don't want to
422 // compare st_dev at all.
423 || memcmp(&new_st.st_ino, &known_st->st_ino,
424 sizeof(new_st.st_ino)) != 0
425 # else
426 // Typical POSIX-like system
427 || new_st.st_dev != known_st->st_dev
428 || new_st.st_ino != known_st->st_ino
429 # endif
431 // TRANSLATORS: When compression or decompression finishes,
432 // and xz is going to remove the source file, xz first checks
433 // if the source file still exists, and if it does, does its
434 // device and inode numbers match what xz saw when it opened
435 // the source file. If these checks fail, this message is
436 // shown, %s being the filename, and the file is not deleted.
437 // The check for device and inode numbers is there, because
438 // it is possible that the user has put a new file in place
439 // of the original file, and in that case it obviously
440 // shouldn't be removed.
441 message_warning(_("%s: File seems to have been moved, "
442 "not removing"), name);
443 else
444 #endif
445 // There's a race condition between lstat() and unlink()
446 // but at least we have tried to avoid removing wrong file.
447 if (unlink(name))
448 message_warning(_("%s: Cannot remove: %s"),
449 name, strerror(errno));
451 return;
455 /// \brief Copies owner/group and permissions
457 /// \todo ACL and EA support
459 static void
460 io_copy_attrs(const file_pair *pair)
462 // Skip chown and chmod on Windows.
463 #ifndef TUKLIB_DOSLIKE
464 // This function is more tricky than you may think at first.
465 // Blindly copying permissions may permit users to access the
466 // destination file who didn't have permission to access the
467 // source file.
469 // Try changing the owner of the file. If we aren't root or the owner
470 // isn't already us, fchown() probably doesn't succeed. We warn
471 // about failing fchown() only if we are root.
472 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
473 && warn_fchown)
474 message_warning(_("%s: Cannot set the file owner: %s"),
475 pair->dest_name, strerror(errno));
477 mode_t mode;
479 // With BSD semantics the new dest file may have a group that
480 // does not belong to the user. If the src file has the same gid
481 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails
482 // in this case which seems to be POSIX compliant. As there is
483 // nothing to do, skip the system call.
484 if (pair->dest_st.st_gid != pair->src_st.st_gid
485 && fchown(pair->dest_fd, (uid_t)(-1),
486 pair->src_st.st_gid)) {
487 message_warning(_("%s: Cannot set the file group: %s"),
488 pair->dest_name, strerror(errno));
489 // We can still safely copy some additional permissions:
490 // 'group' must be at least as strict as 'other' and
491 // also vice versa.
493 // NOTE: After this, the owner of the source file may
494 // get additional permissions. This shouldn't be too bad,
495 // because the owner would have had permission to chmod
496 // the original file anyway.
497 mode = ((pair->src_st.st_mode & 0070) >> 3)
498 & (pair->src_st.st_mode & 0007);
499 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
500 } else {
501 // Drop the setuid, setgid, and sticky bits.
502 mode = pair->src_st.st_mode & 0777;
505 if (fchmod(pair->dest_fd, mode))
506 message_warning(_("%s: Cannot set the file permissions: %s"),
507 pair->dest_name, strerror(errno));
508 #endif
510 // Copy the timestamps. We have several possible ways to do this, of
511 // which some are better in both security and precision.
513 // First, get the nanosecond part of the timestamps. As of writing,
514 // it's not standardized by POSIX, and there are several names for
515 // the same thing in struct stat.
516 long atime_nsec;
517 long mtime_nsec;
519 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
520 // GNU and Solaris
521 atime_nsec = pair->src_st.st_atim.tv_nsec;
522 mtime_nsec = pair->src_st.st_mtim.tv_nsec;
524 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
525 // BSD
526 atime_nsec = pair->src_st.st_atimespec.tv_nsec;
527 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
529 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
530 // GNU and BSD without extensions
531 atime_nsec = pair->src_st.st_atimensec;
532 mtime_nsec = pair->src_st.st_mtimensec;
534 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
535 // Tru64
536 atime_nsec = pair->src_st.st_uatime * 1000;
537 mtime_nsec = pair->src_st.st_umtime * 1000;
539 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
540 // UnixWare
541 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
542 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
544 # else
545 // Safe fallback
546 atime_nsec = 0;
547 mtime_nsec = 0;
548 # endif
550 // Construct a structure to hold the timestamps and call appropriate
551 // function to set the timestamps.
552 #if defined(HAVE_FUTIMENS)
553 // Use nanosecond precision.
554 struct timespec tv[2];
555 tv[0].tv_sec = pair->src_st.st_atime;
556 tv[0].tv_nsec = atime_nsec;
557 tv[1].tv_sec = pair->src_st.st_mtime;
558 tv[1].tv_nsec = mtime_nsec;
560 (void)futimens(pair->dest_fd, tv);
562 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
563 // Use microsecond precision.
564 struct timeval tv[2];
565 tv[0].tv_sec = pair->src_st.st_atime;
566 tv[0].tv_usec = atime_nsec / 1000;
567 tv[1].tv_sec = pair->src_st.st_mtime;
568 tv[1].tv_usec = mtime_nsec / 1000;
570 # if defined(HAVE_FUTIMES)
571 (void)futimes(pair->dest_fd, tv);
572 # elif defined(HAVE_FUTIMESAT)
573 (void)futimesat(pair->dest_fd, NULL, tv);
574 # else
575 // Argh, no function to use a file descriptor to set the timestamp.
576 (void)utimes(pair->dest_name, tv);
577 # endif
579 #elif defined(HAVE__FUTIME)
580 // Use one-second precision with Windows-specific _futime().
581 // We could use utime() too except that for some reason the
582 // timestamp will get reset at close(). With _futime() it works.
583 // This struct cannot be const as _futime() takes a non-const pointer.
584 struct _utimbuf buf = {
585 .actime = pair->src_st.st_atime,
586 .modtime = pair->src_st.st_mtime,
589 // Avoid warnings.
590 (void)atime_nsec;
591 (void)mtime_nsec;
593 (void)_futime(pair->dest_fd, &buf);
595 #elif defined(HAVE_UTIME)
596 // Use one-second precision. utime() doesn't support using file
597 // descriptor either. Some systems have broken utime() prototype
598 // so don't make this const.
599 struct utimbuf buf = {
600 .actime = pair->src_st.st_atime,
601 .modtime = pair->src_st.st_mtime,
604 // Avoid warnings.
605 (void)atime_nsec;
606 (void)mtime_nsec;
608 (void)utime(pair->dest_name, &buf);
609 #endif
611 return;
615 /// Opens the source file. Returns false on success, true on error.
616 static bool
617 io_open_src_real(file_pair *pair)
619 // There's nothing to open when reading from stdin.
620 if (pair->src_name == stdin_filename) {
621 pair->src_fd = STDIN_FILENO;
622 #ifdef TUKLIB_DOSLIKE
623 setmode(STDIN_FILENO, O_BINARY);
624 #else
625 // Try to set stdin to non-blocking mode. It won't work
626 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
627 // case we proceed as if stdin were non-blocking anyway
628 // (in case of /dev/null it will be in practice). The
629 // same applies to stdout in io_open_dest_real().
630 stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
631 if (stdin_flags == -1) {
632 message_error(_("Error getting the file status flags "
633 "from standard input: %s"),
634 strerror(errno));
635 return true;
638 if ((stdin_flags & O_NONBLOCK) == 0
639 && fcntl(STDIN_FILENO, F_SETFL,
640 stdin_flags | O_NONBLOCK) != -1)
641 restore_stdin_flags = true;
642 #endif
643 #ifdef HAVE_POSIX_FADVISE
644 // It will fail if stdin is a pipe and that's fine.
645 (void)posix_fadvise(STDIN_FILENO, 0, 0,
646 opt_mode == MODE_LIST
647 ? POSIX_FADV_RANDOM
648 : POSIX_FADV_SEQUENTIAL);
649 #endif
650 return false;
653 // Symlinks are not followed unless writing to stdout or --force
654 // or --keep was used.
655 const bool follow_symlinks
656 = opt_stdout || opt_force || opt_keep_original;
658 // We accept only regular files if we are writing the output
659 // to disk too. bzip2 allows overriding this with --force but
660 // gzip and xz don't.
661 const bool reg_files_only = !opt_stdout;
663 // Flags for open()
664 int flags = O_RDONLY | O_BINARY | O_NOCTTY;
666 #ifndef TUKLIB_DOSLIKE
667 // Use non-blocking I/O:
668 // - It prevents blocking when opening FIFOs and some other
669 // special files, which is good if we want to accept only
670 // regular files.
671 // - It can help avoiding some race conditions with signal handling.
672 flags |= O_NONBLOCK;
673 #endif
675 #if defined(O_NOFOLLOW)
676 if (!follow_symlinks)
677 flags |= O_NOFOLLOW;
678 #elif !defined(TUKLIB_DOSLIKE)
679 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
680 // by POSIX). Check for symlinks with a separate lstat() on
681 // these systems.
682 if (!follow_symlinks) {
683 struct stat st;
684 if (lstat(pair->src_name, &st)) {
685 message_error(_("%s: %s"), pair->src_name,
686 strerror(errno));
687 return true;
689 } else if (S_ISLNK(st.st_mode)) {
690 message_warning(_("%s: Is a symbolic link, "
691 "skipping"), pair->src_name);
692 return true;
695 #else
696 // Avoid warnings.
697 (void)follow_symlinks;
698 #endif
700 // Try to open the file. Signals have been blocked so EINTR shouldn't
701 // be possible.
702 pair->src_fd = open(pair->src_name, flags);
704 if (pair->src_fd == -1) {
705 // Signals (that have a signal handler) have been blocked.
706 assert(errno != EINTR);
708 #ifdef O_NOFOLLOW
709 // Give an understandable error message if the reason
710 // for failing was that the file was a symbolic link.
712 // Note that at least Linux, OpenBSD, Solaris, and Darwin
713 // use ELOOP to indicate that O_NOFOLLOW was the reason
714 // that open() failed. Because there may be
715 // directories in the pathname, ELOOP may occur also
716 // because of a symlink loop in the directory part.
717 // So ELOOP doesn't tell us what actually went wrong,
718 // and this stupidity went into POSIX-1.2008 too.
720 // FreeBSD associates EMLINK with O_NOFOLLOW and
721 // Tru64 uses ENOTSUP. We use these directly here
722 // and skip the lstat() call and the associated race.
723 // I want to hear if there are other kernels that
724 // fail with something else than ELOOP with O_NOFOLLOW.
725 bool was_symlink = false;
727 # if defined(__FreeBSD__) || defined(__DragonFly__)
728 if (errno == EMLINK)
729 was_symlink = true;
731 # elif defined(__digital__) && defined(__unix__)
732 if (errno == ENOTSUP)
733 was_symlink = true;
735 # elif defined(__NetBSD__)
736 if (errno == EFTYPE)
737 was_symlink = true;
739 # else
740 if (errno == ELOOP && !follow_symlinks) {
741 const int saved_errno = errno;
742 struct stat st;
743 if (lstat(pair->src_name, &st) == 0
744 && S_ISLNK(st.st_mode))
745 was_symlink = true;
747 errno = saved_errno;
749 # endif
751 if (was_symlink)
752 message_warning(_("%s: Is a symbolic link, "
753 "skipping"), pair->src_name);
754 else
755 #endif
756 // Something else than O_NOFOLLOW failing
757 // (assuming that the race conditions didn't
758 // confuse us).
759 message_error(_("%s: %s"), pair->src_name,
760 strerror(errno));
762 return true;
765 // Stat the source file. We need the result also when we copy
766 // the permissions, and when unlinking.
768 // NOTE: Use stat() instead of fstat() with DJGPP, because
769 // then we have a better chance to get st_ino value that can
770 // be used in io_open_dest_real() to prevent overwriting the
771 // source file.
772 #ifdef __DJGPP__
773 if (stat(pair->src_name, &pair->src_st))
774 goto error_msg;
775 #else
776 if (fstat(pair->src_fd, &pair->src_st))
777 goto error_msg;
778 #endif
780 if (S_ISDIR(pair->src_st.st_mode)) {
781 message_warning(_("%s: Is a directory, skipping"),
782 pair->src_name);
783 goto error;
786 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
787 message_warning(_("%s: Not a regular file, skipping"),
788 pair->src_name);
789 goto error;
792 #ifndef TUKLIB_DOSLIKE
793 if (reg_files_only && !opt_force && !opt_keep_original) {
794 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
795 // gzip rejects setuid and setgid files even
796 // when --force was used. bzip2 doesn't check
797 // for them, but calls fchown() after fchmod(),
798 // and many systems automatically drop setuid
799 // and setgid bits there.
801 // We accept setuid and setgid files if
802 // --force or --keep was used. We drop these bits
803 // explicitly in io_copy_attr().
804 message_warning(_("%s: File has setuid or "
805 "setgid bit set, skipping"),
806 pair->src_name);
807 goto error;
810 if (pair->src_st.st_mode & S_ISVTX) {
811 message_warning(_("%s: File has sticky bit "
812 "set, skipping"),
813 pair->src_name);
814 goto error;
817 if (pair->src_st.st_nlink > 1) {
818 message_warning(_("%s: Input file has more "
819 "than one hard link, "
820 "skipping"), pair->src_name);
821 goto error;
825 // If it is something else than a regular file, wait until
826 // there is input available. This way reading from FIFOs
827 // will work when open() is used with O_NONBLOCK.
828 if (!S_ISREG(pair->src_st.st_mode)) {
829 signals_unblock();
830 const io_wait_ret ret = io_wait(pair, -1, true);
831 signals_block();
833 if (ret != IO_WAIT_MORE)
834 goto error;
836 #endif
838 #ifdef HAVE_POSIX_FADVISE
839 // It will fail with some special files like FIFOs but that is fine.
840 (void)posix_fadvise(pair->src_fd, 0, 0,
841 opt_mode == MODE_LIST
842 ? POSIX_FADV_RANDOM
843 : POSIX_FADV_SEQUENTIAL);
844 #endif
846 return false;
848 error_msg:
849 message_error(_("%s: %s"), pair->src_name, strerror(errno));
850 error:
851 (void)close(pair->src_fd);
852 return true;
856 extern file_pair *
857 io_open_src(const char *src_name)
859 if (src_name[0] == '\0') {
860 message_error(_("Empty filename, skipping"));
861 return NULL;
864 // Since we have only one file open at a time, we can use
865 // a statically allocated structure.
866 static file_pair pair;
868 // This implicitly also initializes src_st.st_size to zero
869 // which is expected to be <= 0 by default. fstat() isn't
870 // called when reading from standard input but src_st.st_size
871 // is still read.
872 pair = (file_pair){
873 .src_name = src_name,
874 .dest_name = NULL,
875 .src_fd = -1,
876 .dest_fd = -1,
877 .src_eof = false,
878 .src_has_seen_input = false,
879 .flush_needed = false,
880 .dest_try_sparse = false,
881 .dest_pending_sparse = 0,
884 // Block the signals, for which we have a custom signal handler, so
885 // that we don't need to worry about EINTR.
886 signals_block();
887 const bool error = io_open_src_real(&pair);
888 signals_unblock();
890 #ifdef ENABLE_SANDBOX
891 if (!error)
892 io_sandbox_enter(pair.src_fd);
893 #endif
895 return error ? NULL : &pair;
899 /// \brief Closes source file of the file_pair structure
901 /// \param pair File whose src_fd should be closed
902 /// \param success If true, the file will be removed from the disk if
903 /// closing succeeds and --keep hasn't been used.
904 static void
905 io_close_src(file_pair *pair, bool success)
907 #ifndef TUKLIB_DOSLIKE
908 if (restore_stdin_flags) {
909 assert(pair->src_fd == STDIN_FILENO);
911 restore_stdin_flags = false;
913 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
914 message_error(_("Error restoring the status flags "
915 "to standard input: %s"),
916 strerror(errno));
918 #endif
920 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
921 // Close the file before possibly unlinking it. On DOS-like
922 // systems this is always required since unlinking will fail
923 // if the file is open. On POSIX systems it usually works
924 // to unlink open files, but in some cases it doesn't and
925 // one gets EBUSY in errno.
927 // xz 5.2.2 and older unlinked the file before closing it
928 // (except on DOS-like systems). The old code didn't handle
929 // EBUSY and could fail e.g. on some CIFS shares. The
930 // advantage of unlinking before closing is negligible
931 // (avoids a race between close() and stat()/lstat() and
932 // unlink()), so let's keep this simple.
933 (void)close(pair->src_fd);
935 if (success && !opt_keep_original)
936 io_unlink(pair->src_name, &pair->src_st);
939 return;
943 static bool
944 io_open_dest_real(file_pair *pair)
946 if (opt_stdout || pair->src_fd == STDIN_FILENO) {
947 // We don't modify or free() this.
948 pair->dest_name = (char *)"(stdout)";
949 pair->dest_fd = STDOUT_FILENO;
950 #ifdef TUKLIB_DOSLIKE
951 setmode(STDOUT_FILENO, O_BINARY);
952 #else
953 // Try to set O_NONBLOCK if it isn't already set.
954 // If it fails, we assume that stdout is non-blocking
955 // in practice. See the comments in io_open_src_real()
956 // for similar situation with stdin.
958 // NOTE: O_APPEND may be unset later in this function
959 // and it relies on stdout_flags being set here.
960 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
961 if (stdout_flags == -1) {
962 message_error(_("Error getting the file status flags "
963 "from standard output: %s"),
964 strerror(errno));
965 return true;
968 if ((stdout_flags & O_NONBLOCK) == 0
969 && fcntl(STDOUT_FILENO, F_SETFL,
970 stdout_flags | O_NONBLOCK) != -1)
971 restore_stdout_flags = true;
972 #endif
973 } else {
974 pair->dest_name = suffix_get_dest_name(pair->src_name);
975 if (pair->dest_name == NULL)
976 return true;
978 #ifdef __DJGPP__
979 struct stat st;
980 if (stat(pair->dest_name, &st) == 0) {
981 // Check that it isn't a special file like "prn".
982 if (st.st_dev == -1) {
983 message_error("%s: Refusing to write to "
984 "a DOS special file",
985 pair->dest_name);
986 free(pair->dest_name);
987 return true;
990 // Check that we aren't overwriting the source file.
991 if (st.st_dev == pair->src_st.st_dev
992 && st.st_ino == pair->src_st.st_ino) {
993 message_error("%s: Output file is the same "
994 "as the input file",
995 pair->dest_name);
996 free(pair->dest_name);
997 return true;
1000 #endif
1002 // If --force was used, unlink the target file first.
1003 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
1004 message_error(_("%s: Cannot remove: %s"),
1005 pair->dest_name, strerror(errno));
1006 free(pair->dest_name);
1007 return true;
1010 // Open the file.
1011 int flags = O_WRONLY | O_BINARY | O_NOCTTY
1012 | O_CREAT | O_EXCL;
1013 #ifndef TUKLIB_DOSLIKE
1014 flags |= O_NONBLOCK;
1015 #endif
1016 const mode_t mode = S_IRUSR | S_IWUSR;
1017 pair->dest_fd = open(pair->dest_name, flags, mode);
1019 if (pair->dest_fd == -1) {
1020 message_error(_("%s: %s"), pair->dest_name,
1021 strerror(errno));
1022 free(pair->dest_name);
1023 return true;
1027 if (fstat(pair->dest_fd, &pair->dest_st)) {
1028 // If fstat() really fails, we have a safe fallback here.
1029 #if defined(__VMS)
1030 pair->dest_st.st_ino[0] = 0;
1031 pair->dest_st.st_ino[1] = 0;
1032 pair->dest_st.st_ino[2] = 0;
1033 #else
1034 pair->dest_st.st_dev = 0;
1035 pair->dest_st.st_ino = 0;
1036 #endif
1038 #if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__)
1039 // Check that the output file is a regular file. We open with O_EXCL
1040 // but that doesn't prevent open()/_open() on Windows from opening
1041 // files like "con" or "nul".
1043 // With DJGPP this check is done with stat() even before opening
1044 // the output file. That method or a variant of it doesn't work on
1045 // Windows because on Windows stat()/_stat64() sets st.st_mode so
1046 // that S_ISREG(st.st_mode) will be true even for special files.
1047 // With fstat()/_fstat64() it works.
1048 else if (pair->dest_fd != STDOUT_FILENO
1049 && !S_ISREG(pair->dest_st.st_mode)) {
1050 message_error("%s: Destination is not a regular file",
1051 pair->dest_name);
1053 // dest_fd needs to be reset to -1 to keep io_close() working.
1054 (void)close(pair->dest_fd);
1055 pair->dest_fd = -1;
1057 free(pair->dest_name);
1058 return true;
1060 #elif !defined(TUKLIB_DOSLIKE)
1061 else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
1062 // When writing to standard output, we need to be extra
1063 // careful:
1064 // - It may be connected to something else than
1065 // a regular file.
1066 // - We aren't necessarily writing to a new empty file
1067 // or to the end of an existing file.
1068 // - O_APPEND may be active.
1070 // TODO: I'm keeping this disabled for DOS-like systems
1071 // for now. FAT doesn't support sparse files, but NTFS
1072 // does, so maybe this should be enabled on Windows after
1073 // some testing.
1074 if (pair->dest_fd == STDOUT_FILENO) {
1075 if (!S_ISREG(pair->dest_st.st_mode))
1076 return false;
1078 if (stdout_flags & O_APPEND) {
1079 // Creating a sparse file is not possible
1080 // when O_APPEND is active (it's used by
1081 // shell's >> redirection). As I understand
1082 // it, it is safe to temporarily disable
1083 // O_APPEND in xz, because if someone
1084 // happened to write to the same file at the
1085 // same time, results would be bad anyway
1086 // (users shouldn't assume that xz uses any
1087 // specific block size when writing data).
1089 // The write position may be something else
1090 // than the end of the file, so we must fix
1091 // it to start writing at the end of the file
1092 // to imitate O_APPEND.
1093 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
1094 return false;
1096 // Construct the new file status flags.
1097 // If O_NONBLOCK was set earlier in this
1098 // function, it must be kept here too.
1099 int flags = stdout_flags & ~O_APPEND;
1100 if (restore_stdout_flags)
1101 flags |= O_NONBLOCK;
1103 // If this fcntl() fails, we continue but won't
1104 // try to create sparse output. The original
1105 // flags will still be restored if needed (to
1106 // unset O_NONBLOCK) when the file is finished.
1107 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
1108 return false;
1110 // Disabling O_APPEND succeeded. Mark
1111 // that the flags should be restored
1112 // in io_close_dest(). (This may have already
1113 // been set when enabling O_NONBLOCK.)
1114 restore_stdout_flags = true;
1116 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
1117 != pair->dest_st.st_size) {
1118 // Writing won't start exactly at the end
1119 // of the file. We cannot use sparse output,
1120 // because it would probably corrupt the file.
1121 return false;
1125 pair->dest_try_sparse = true;
1127 #endif
1129 return false;
1133 extern bool
1134 io_open_dest(file_pair *pair)
1136 signals_block();
1137 const bool ret = io_open_dest_real(pair);
1138 signals_unblock();
1139 return ret;
1143 /// \brief Closes destination file of the file_pair structure
1145 /// \param pair File whose dest_fd should be closed
1146 /// \param success If false, the file will be removed from the disk.
1148 /// \return Zero if closing succeeds. On error, -1 is returned and
1149 /// error message printed.
1150 static bool
1151 io_close_dest(file_pair *pair, bool success)
1153 #ifndef TUKLIB_DOSLIKE
1154 // If io_open_dest() has disabled O_APPEND, restore it here.
1155 if (restore_stdout_flags) {
1156 assert(pair->dest_fd == STDOUT_FILENO);
1158 restore_stdout_flags = false;
1160 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
1161 message_error(_("Error restoring the O_APPEND flag "
1162 "to standard output: %s"),
1163 strerror(errno));
1164 return true;
1167 #endif
1169 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
1170 return false;
1172 if (close(pair->dest_fd)) {
1173 message_error(_("%s: Closing the file failed: %s"),
1174 pair->dest_name, strerror(errno));
1176 // Closing destination file failed, so we cannot trust its
1177 // contents. Get rid of junk:
1178 io_unlink(pair->dest_name, &pair->dest_st);
1179 free(pair->dest_name);
1180 return true;
1183 // If the operation using this file wasn't successful, we git rid
1184 // of the junk file.
1185 if (!success)
1186 io_unlink(pair->dest_name, &pair->dest_st);
1188 free(pair->dest_name);
1190 return false;
1194 extern void
1195 io_close(file_pair *pair, bool success)
1197 // Take care of sparseness at the end of the output file.
1198 if (success && pair->dest_try_sparse
1199 && pair->dest_pending_sparse > 0) {
1200 // Seek forward one byte less than the size of the pending
1201 // hole, then write one zero-byte. This way the file grows
1202 // to its correct size. An alternative would be to use
1203 // ftruncate() but that isn't portable enough (e.g. it
1204 // doesn't work with FAT on Linux; FAT isn't that important
1205 // since it doesn't support sparse files anyway, but we don't
1206 // want to create corrupt files on it).
1207 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
1208 SEEK_CUR) == -1) {
1209 message_error(_("%s: Seeking failed when trying "
1210 "to create a sparse file: %s"),
1211 pair->dest_name, strerror(errno));
1212 success = false;
1213 } else {
1214 const uint8_t zero[1] = { '\0' };
1215 if (io_write_buf(pair, zero, 1))
1216 success = false;
1220 signals_block();
1222 // Copy the file attributes. We need to skip this if destination
1223 // file isn't open or it is standard output.
1224 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
1225 io_copy_attrs(pair);
1227 // Close the destination first. If it fails, we must not remove
1228 // the source file!
1229 if (io_close_dest(pair, success))
1230 success = false;
1232 // Close the source file, and unlink it if the operation using this
1233 // file pair was successful and we haven't requested to keep the
1234 // source file.
1235 io_close_src(pair, success);
1237 signals_unblock();
1239 return;
1243 extern void
1244 io_fix_src_pos(file_pair *pair, size_t rewind_size)
1246 assert(rewind_size <= IO_BUFFER_SIZE);
1248 if (rewind_size > 0) {
1249 // This doesn't need to work on unseekable file descriptors,
1250 // so just ignore possible errors.
1251 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
1254 return;
1258 extern size_t
1259 io_read(file_pair *pair, io_buf *buf, size_t size)
1261 assert(size <= IO_BUFFER_SIZE);
1263 size_t pos = 0;
1265 while (pos < size) {
1266 const ssize_t amount = read(
1267 pair->src_fd, buf->u8 + pos, size - pos);
1269 if (amount == 0) {
1270 pair->src_eof = true;
1271 break;
1274 if (amount == -1) {
1275 if (errno == EINTR) {
1276 if (user_abort)
1277 return SIZE_MAX;
1279 continue;
1282 #ifndef TUKLIB_DOSLIKE
1283 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1284 // Disable the flush-timeout if no input has
1285 // been seen since the previous flush and thus
1286 // there would be nothing to flush after the
1287 // timeout expires (avoids busy waiting).
1288 const int timeout = pair->src_has_seen_input
1289 ? mytime_get_flush_timeout()
1290 : -1;
1292 switch (io_wait(pair, timeout, true)) {
1293 case IO_WAIT_MORE:
1294 continue;
1296 case IO_WAIT_ERROR:
1297 return SIZE_MAX;
1299 case IO_WAIT_TIMEOUT:
1300 pair->flush_needed = true;
1301 return pos;
1303 default:
1304 message_bug();
1307 #endif
1309 message_error(_("%s: Read error: %s"),
1310 pair->src_name, strerror(errno));
1312 return SIZE_MAX;
1315 pos += (size_t)(amount);
1317 if (!pair->src_has_seen_input) {
1318 pair->src_has_seen_input = true;
1319 mytime_set_flush_time();
1323 return pos;
1327 extern bool
1328 io_seek_src(file_pair *pair, uint64_t pos)
1330 // Caller must not attempt to seek past the end of the input file
1331 // (seeking to 100 in a 100-byte file is seeking to the end of
1332 // the file, not past the end of the file, and thus that is allowed).
1334 // This also validates that pos can be safely cast to off_t.
1335 if (pos > (uint64_t)(pair->src_st.st_size))
1336 message_bug();
1338 if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) {
1339 message_error(_("%s: Error seeking the file: %s"),
1340 pair->src_name, strerror(errno));
1341 return true;
1344 pair->src_eof = false;
1346 return false;
1350 extern bool
1351 io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos)
1353 // Using lseek() and read() is more portable than pread() and
1354 // for us it is as good as real pread().
1355 if (io_seek_src(pair, pos))
1356 return true;
1358 const size_t amount = io_read(pair, buf, size);
1359 if (amount == SIZE_MAX)
1360 return true;
1362 if (amount != size) {
1363 message_error(_("%s: Unexpected end of file"),
1364 pair->src_name);
1365 return true;
1368 return false;
1372 static bool
1373 is_sparse(const io_buf *buf)
1375 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1377 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1378 if (buf->u64[i] != 0)
1379 return false;
1381 return true;
1385 static bool
1386 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1388 assert(size <= IO_BUFFER_SIZE);
1390 while (size > 0) {
1391 const ssize_t amount = write(pair->dest_fd, buf, size);
1392 if (amount == -1) {
1393 if (errno == EINTR) {
1394 if (user_abort)
1395 return true;
1397 continue;
1400 #ifndef TUKLIB_DOSLIKE
1401 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1402 if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1403 continue;
1405 return true;
1407 #endif
1409 // Handle broken pipe specially. gzip and bzip2
1410 // don't print anything on SIGPIPE. In addition,
1411 // gzip --quiet uses exit status 2 (warning) on
1412 // broken pipe instead of whatever raise(SIGPIPE)
1413 // would make it return. It is there to hide "Broken
1414 // pipe" message on some old shells (probably old
1415 // GNU bash).
1417 // We don't do anything special with --quiet, which
1418 // is what bzip2 does too. If we get SIGPIPE, we
1419 // will handle it like other signals by setting
1420 // user_abort, and get EPIPE here.
1421 if (errno != EPIPE)
1422 message_error(_("%s: Write error: %s"),
1423 pair->dest_name, strerror(errno));
1425 return true;
1428 buf += (size_t)(amount);
1429 size -= (size_t)(amount);
1432 return false;
1436 extern bool
1437 io_write(file_pair *pair, const io_buf *buf, size_t size)
1439 assert(size <= IO_BUFFER_SIZE);
1441 if (pair->dest_try_sparse) {
1442 // Check if the block is sparse (contains only zeros). If it
1443 // sparse, we just store the amount and return. We will take
1444 // care of actually skipping over the hole when we hit the
1445 // next data block or close the file.
1447 // Since io_close() requires that dest_pending_sparse > 0
1448 // if the file ends with sparse block, we must also return
1449 // if size == 0 to avoid doing the lseek().
1450 if (size == IO_BUFFER_SIZE) {
1451 // Even if the block was sparse, treat it as non-sparse
1452 // if the pending sparse amount is large compared to
1453 // the size of off_t. In practice this only matters
1454 // on 32-bit systems where off_t isn't always 64 bits.
1455 const off_t pending_max
1456 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2);
1457 if (is_sparse(buf) && pair->dest_pending_sparse
1458 < pending_max) {
1459 pair->dest_pending_sparse += (off_t)(size);
1460 return false;
1462 } else if (size == 0) {
1463 return false;
1466 // This is not a sparse block. If we have a pending hole,
1467 // skip it now.
1468 if (pair->dest_pending_sparse > 0) {
1469 if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1470 SEEK_CUR) == -1) {
1471 message_error(_("%s: Seeking failed when "
1472 "trying to create a sparse "
1473 "file: %s"), pair->dest_name,
1474 strerror(errno));
1475 return true;
1478 pair->dest_pending_sparse = 0;
1482 return io_write_buf(pair, buf->u8, size);