1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief File opening, unlinking, and closing
6 // Author: Lasse Collin
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
21 static bool warn_fchown
;
24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25 # include <sys/time.h>
26 #elif defined(HAVE__FUTIME)
27 # include <sys/utime.h>
28 #elif defined(HAVE_UTIME)
32 #ifdef HAVE_CAP_RIGHTS_LIMIT
33 # include <sys/capsicum.h>
36 #ifdef HAVE_LINUX_LANDLOCK_H
37 # include <linux/landlock.h>
38 # include <sys/syscall.h>
41 #include "tuklib_open_stdxxx.h"
45 typedef __int64 ssize_t
;
51 # define S_IRUSR _S_IREAD
52 # define S_IWUSR _S_IWRITE
54 # define setmode _setmode
57 # define lseek _lseeki64
58 # define unlink _unlink
60 // The casts are to silence warnings.
61 // The sizes are known to be small enough.
62 # define read(fd, buf, size) _read(fd, buf, (unsigned int)(size))
63 # define write(fd, buf, size) _write(fd, buf, (unsigned int)(size))
65 # define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
66 # define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
77 // Using this macro to silence a warning from gcc -Wlogical-op.
78 #if EAGAIN == EWOULDBLOCK
79 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
81 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \
82 ((e) == EAGAIN || (e) == EWOULDBLOCK)
87 IO_WAIT_MORE
, // Reading or writing is possible.
88 IO_WAIT_ERROR
, // Error or user_abort
89 IO_WAIT_TIMEOUT
, // poll() timed out
93 /// If true, try to create sparse files when decompressing.
94 static bool try_sparse
= true;
97 /// True if the conditions for sandboxing (described in main()) have been met.
98 static bool sandbox_allowed
= false;
101 #ifndef TUKLIB_DOSLIKE
102 /// File status flags of standard input. This is used by io_open_src()
103 /// and io_close_src().
104 static int stdin_flags
;
105 static bool restore_stdin_flags
= false;
107 /// Original file status flags of standard output. This is used by
108 /// io_open_dest() and io_close_dest() to save and restore the flags.
109 static int stdout_flags
;
110 static bool restore_stdout_flags
= false;
112 /// Self-pipe used together with the user_abort variable to avoid
113 /// race conditions with signal handling.
114 static int user_abort_pipe
[2];
118 static bool io_write_buf(file_pair
*pair
, const uint8_t *buf
, size_t size
);
124 // Make sure that stdin, stdout, and stderr are connected to
125 // a valid file descriptor. Exit immediately with exit code ERROR
126 // if we cannot make the file descriptors valid. Maybe we should
127 // print an error message, but our stderr could be screwed anyway.
128 tuklib_open_stdxxx(E_ERROR
);
130 #ifndef TUKLIB_DOSLIKE
131 // If fchown() fails setting the owner, we warn about it only if
133 warn_fchown
= geteuid() == 0;
135 // Create a pipe for the self-pipe trick.
136 if (pipe(user_abort_pipe
))
137 message_fatal(_("Error creating a pipe: %s"),
140 // Make both ends of the pipe non-blocking.
141 for (unsigned i
= 0; i
< 2; ++i
) {
142 int flags
= fcntl(user_abort_pipe
[i
], F_GETFL
);
143 if (flags
== -1 || fcntl(user_abort_pipe
[i
], F_SETFL
,
144 flags
| O_NONBLOCK
) == -1)
145 message_fatal(_("Error creating a pipe: %s"),
151 // Avoid doing useless things when statting files.
152 // This isn't important but doesn't hurt.
153 _djstat_flags
= _STAT_EXEC_EXT
| _STAT_EXEC_MAGIC
| _STAT_DIRSIZE
;
160 #ifndef TUKLIB_DOSLIKE
162 io_write_to_user_abort_pipe(void)
164 // If the write() fails, it's probably due to the pipe being full.
165 // Failing in that case is fine. If the reason is something else,
166 // there's not much we can do since this is called in a signal
167 // handler. So ignore the errors and try to avoid warnings with
168 // GCC and glibc when _FORTIFY_SOURCE=2 is used.
170 const ssize_t ret
= write(user_abort_pipe
[1], &b
, 1);
185 #ifdef ENABLE_SANDBOX
187 io_allow_sandbox(void)
189 sandbox_allowed
= true;
194 /// Enables operating-system-specific sandbox if it is possible.
195 /// src_fd is the file descriptor of the input file.
197 io_sandbox_enter(int src_fd
)
199 if (!sandbox_allowed
) {
200 // This message is more often annoying than useful so
201 // it's commented out. It can be useful when developing
202 // the sandboxing code.
203 //message(V_DEBUG, _("Sandbox is disabled due "
204 // "to incompatible command line arguments"));
208 const char dummy_str
[] = "x";
210 // Try to ensure that both libc and xz locale files have been
211 // loaded when NLS is enabled.
212 snprintf(NULL
, 0, "%s%s", _(dummy_str
), strerror(EINVAL
));
214 // Try to ensure that iconv data files needed for handling multibyte
215 // characters have been loaded. This is needed at least with glibc.
216 tuklib_mbstr_width(dummy_str
, NULL
);
218 #ifdef HAVE_CAP_RIGHTS_LIMIT
219 // Capsicum needs FreeBSD 10.2 or later.
225 if (cap_rights_limit(src_fd
, cap_rights_init(&rights
,
226 CAP_EVENT
, CAP_FCNTL
, CAP_LOOKUP
, CAP_READ
, CAP_SEEK
)))
229 if (src_fd
!= STDIN_FILENO
&& cap_rights_limit(
230 STDIN_FILENO
, cap_rights_clear(&rights
)))
233 if (cap_rights_limit(STDOUT_FILENO
, cap_rights_init(&rights
,
234 CAP_EVENT
, CAP_FCNTL
, CAP_FSTAT
, CAP_LOOKUP
,
235 CAP_WRITE
, CAP_SEEK
)))
238 if (cap_rights_limit(STDERR_FILENO
, cap_rights_init(&rights
,
242 if (cap_rights_limit(user_abort_pipe
[0], cap_rights_init(&rights
,
246 if (cap_rights_limit(user_abort_pipe
[1], cap_rights_init(&rights
,
250 #elif defined(HAVE_PLEDGE)
251 // pledge() was introduced in OpenBSD 5.9.
253 // main() unconditionally calls pledge() with fairly relaxed
254 // promises which work in all situations. Here we make the
255 // sandbox more strict.
256 if (pledge("stdio", ""))
261 #elif defined(HAVE_LINUX_LANDLOCK_H)
262 int landlock_abi
= syscall(SYS_landlock_create_ruleset
,
263 (void *)NULL
, 0, LANDLOCK_CREATE_RULESET_VERSION
);
265 if (landlock_abi
> 0) {
266 // We support ABI versions 1-3.
267 if (landlock_abi
> 3)
270 // We want to set all supported flags in handled_access_fs.
271 // This way the ruleset will initially forbid access to all
272 // actions that the available Landlock ABI version supports.
273 // Exceptions can be added using landlock_add_rule(2) to
274 // allow certain actions on certain files or directories.
276 // The same flag values are used on all archs. ABI v2 and v3
277 // both add one new flag.
279 // First in ABI v1: LANDLOCK_ACCESS_FS_EXECUTE = 1ULL << 0
280 // Last in ABI v1: LANDLOCK_ACCESS_FS_MAKE_SYM = 1ULL << 12
281 // Last in ABI v2: LANDLOCK_ACCESS_FS_REFER = 1ULL << 13
282 // Last in ABI v3: LANDLOCK_ACCESS_FS_TRUNCATE = 1ULL << 14
284 // This makes it simple to set the mask based on the ABI
285 // version and we don't need to care which flags are #defined
286 // in the installed <linux/landlock.h>.
287 const struct landlock_ruleset_attr attr
= {
288 .handled_access_fs
= (1ULL << (12 + landlock_abi
)) - 1
291 const int ruleset_fd
= syscall(SYS_landlock_create_ruleset
,
292 &attr
, sizeof(attr
), 0U);
296 // All files we need should have already been opened. Thus,
297 // we don't need to add any rules using landlock_add_rule(2)
298 // before activating the sandbox.
300 // NOTE: It's possible that the hack at the beginning of this
301 // function isn't be good enough. It tries to get translations
302 // and libc-specific files loaded but if it's not good enough
303 // then perhaps a Landlock rule to allow reading from /usr
304 // and/or the xz installation prefix would be needed.
306 // prctl(PR_SET_NO_NEW_PRIVS, ...) was already called in
307 // main() so we don't do it here again.
308 if (syscall(SYS_landlock_restrict_self
, ruleset_fd
, 0U) != 0)
315 # error ENABLE_SANDBOX is defined but no sandboxing method was found.
318 // This message is annoying in xz -lvv.
319 //message(V_DEBUG, _("Sandbox was successfully enabled"));
323 #ifdef HAVE_CAP_RIGHTS_LIMIT
324 // If a kernel is configured without capability mode support or
325 // used in an emulator that does not implement the capability
326 // system calls, then the Capsicum system calls will fail and set
327 // errno to ENOSYS. In that case xz will silently run without
332 message_fatal(_("Failed to enable the sandbox"));
334 #endif // ENABLE_SANDBOX
337 #ifndef TUKLIB_DOSLIKE
338 /// \brief Waits for input or output to become available or for a signal
340 /// This uses the self-pipe trick to avoid a race condition that can occur
341 /// if a signal is caught after user_abort has been checked but before e.g.
342 /// read() has been called. In that situation read() could block unless
343 /// non-blocking I/O is used. With non-blocking I/O something like select()
344 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
345 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
346 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
347 /// old and very portable.
349 io_wait(file_pair
*pair
, int timeout
, bool is_reading
)
351 struct pollfd pfd
[2];
354 pfd
[0].fd
= pair
->src_fd
;
355 pfd
[0].events
= POLLIN
;
357 pfd
[0].fd
= pair
->dest_fd
;
358 pfd
[0].events
= POLLOUT
;
361 pfd
[1].fd
= user_abort_pipe
[0];
362 pfd
[1].events
= POLLIN
;
365 const int ret
= poll(pfd
, 2, timeout
);
368 return IO_WAIT_ERROR
;
371 if (errno
== EINTR
|| errno
== EAGAIN
)
374 message_error(_("%s: poll() failed: %s"),
375 is_reading
? pair
->src_name
378 return IO_WAIT_ERROR
;
382 return IO_WAIT_TIMEOUT
;
384 if (pfd
[0].revents
!= 0)
391 /// \brief Unlink a file
393 /// This tries to verify that the file being unlinked really is the file that
394 /// we want to unlink by verifying device and inode numbers. There's still
395 /// a small unavoidable race, but this is much better than nothing (the file
396 /// could have been moved/replaced even hours earlier).
398 io_unlink(const char *name
, const struct stat
*known_st
)
400 #if defined(TUKLIB_DOSLIKE)
401 // On DOS-like systems, st_ino is meaningless, so don't bother
402 // testing it. Just silence a compiler warning.
407 // If --force was used, use stat() instead of lstat(). This way
408 // (de)compressing symlinks works correctly. However, it also means
409 // that xz cannot detect if a regular file foo is renamed to bar
410 // and then a symlink foo -> bar is created. Because of stat()
411 // instead of lstat(), xz will think that foo hasn't been replaced
412 // with another file. Thus, xz will remove foo even though it no
413 // longer is the same file that xz used when it started compressing.
414 // Probably it's not too bad though, so this doesn't need a more
416 const int stat_ret
= opt_force
417 ? stat(name
, &new_st
) : lstat(name
, &new_st
);
421 // st_ino is an array, and we don't want to
422 // compare st_dev at all.
423 || memcmp(&new_st
.st_ino
, &known_st
->st_ino
,
424 sizeof(new_st
.st_ino
)) != 0
426 // Typical POSIX-like system
427 || new_st
.st_dev
!= known_st
->st_dev
428 || new_st
.st_ino
!= known_st
->st_ino
431 // TRANSLATORS: When compression or decompression finishes,
432 // and xz is going to remove the source file, xz first checks
433 // if the source file still exists, and if it does, does its
434 // device and inode numbers match what xz saw when it opened
435 // the source file. If these checks fail, this message is
436 // shown, %s being the filename, and the file is not deleted.
437 // The check for device and inode numbers is there, because
438 // it is possible that the user has put a new file in place
439 // of the original file, and in that case it obviously
440 // shouldn't be removed.
441 message_warning(_("%s: File seems to have been moved, "
442 "not removing"), name
);
445 // There's a race condition between lstat() and unlink()
446 // but at least we have tried to avoid removing wrong file.
448 message_warning(_("%s: Cannot remove: %s"),
449 name
, strerror(errno
));
455 /// \brief Copies owner/group and permissions
457 /// \todo ACL and EA support
460 io_copy_attrs(const file_pair
*pair
)
462 // Skip chown and chmod on Windows.
463 #ifndef TUKLIB_DOSLIKE
464 // This function is more tricky than you may think at first.
465 // Blindly copying permissions may permit users to access the
466 // destination file who didn't have permission to access the
469 // Try changing the owner of the file. If we aren't root or the owner
470 // isn't already us, fchown() probably doesn't succeed. We warn
471 // about failing fchown() only if we are root.
472 if (fchown(pair
->dest_fd
, pair
->src_st
.st_uid
, (gid_t
)(-1))
474 message_warning(_("%s: Cannot set the file owner: %s"),
475 pair
->dest_name
, strerror(errno
));
479 // With BSD semantics the new dest file may have a group that
480 // does not belong to the user. If the src file has the same gid
481 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails
482 // in this case which seems to be POSIX compliant. As there is
483 // nothing to do, skip the system call.
484 if (pair
->dest_st
.st_gid
!= pair
->src_st
.st_gid
485 && fchown(pair
->dest_fd
, (uid_t
)(-1),
486 pair
->src_st
.st_gid
)) {
487 message_warning(_("%s: Cannot set the file group: %s"),
488 pair
->dest_name
, strerror(errno
));
489 // We can still safely copy some additional permissions:
490 // 'group' must be at least as strict as 'other' and
493 // NOTE: After this, the owner of the source file may
494 // get additional permissions. This shouldn't be too bad,
495 // because the owner would have had permission to chmod
496 // the original file anyway.
497 mode
= ((pair
->src_st
.st_mode
& 0070) >> 3)
498 & (pair
->src_st
.st_mode
& 0007);
499 mode
= (pair
->src_st
.st_mode
& 0700) | (mode
<< 3) | mode
;
501 // Drop the setuid, setgid, and sticky bits.
502 mode
= pair
->src_st
.st_mode
& 0777;
505 if (fchmod(pair
->dest_fd
, mode
))
506 message_warning(_("%s: Cannot set the file permissions: %s"),
507 pair
->dest_name
, strerror(errno
));
510 // Copy the timestamps. We have several possible ways to do this, of
511 // which some are better in both security and precision.
513 // First, get the nanosecond part of the timestamps. As of writing,
514 // it's not standardized by POSIX, and there are several names for
515 // the same thing in struct stat.
519 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
521 atime_nsec
= pair
->src_st
.st_atim
.tv_nsec
;
522 mtime_nsec
= pair
->src_st
.st_mtim
.tv_nsec
;
524 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
526 atime_nsec
= pair
->src_st
.st_atimespec
.tv_nsec
;
527 mtime_nsec
= pair
->src_st
.st_mtimespec
.tv_nsec
;
529 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
530 // GNU and BSD without extensions
531 atime_nsec
= pair
->src_st
.st_atimensec
;
532 mtime_nsec
= pair
->src_st
.st_mtimensec
;
534 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
536 atime_nsec
= pair
->src_st
.st_uatime
* 1000;
537 mtime_nsec
= pair
->src_st
.st_umtime
* 1000;
539 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
541 atime_nsec
= pair
->src_st
.st_atim
.st__tim
.tv_nsec
;
542 mtime_nsec
= pair
->src_st
.st_mtim
.st__tim
.tv_nsec
;
550 // Construct a structure to hold the timestamps and call appropriate
551 // function to set the timestamps.
552 #if defined(HAVE_FUTIMENS)
553 // Use nanosecond precision.
554 struct timespec tv
[2];
555 tv
[0].tv_sec
= pair
->src_st
.st_atime
;
556 tv
[0].tv_nsec
= atime_nsec
;
557 tv
[1].tv_sec
= pair
->src_st
.st_mtime
;
558 tv
[1].tv_nsec
= mtime_nsec
;
560 (void)futimens(pair
->dest_fd
, tv
);
562 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
563 // Use microsecond precision.
564 struct timeval tv
[2];
565 tv
[0].tv_sec
= pair
->src_st
.st_atime
;
566 tv
[0].tv_usec
= atime_nsec
/ 1000;
567 tv
[1].tv_sec
= pair
->src_st
.st_mtime
;
568 tv
[1].tv_usec
= mtime_nsec
/ 1000;
570 # if defined(HAVE_FUTIMES)
571 (void)futimes(pair
->dest_fd
, tv
);
572 # elif defined(HAVE_FUTIMESAT)
573 (void)futimesat(pair
->dest_fd
, NULL
, tv
);
575 // Argh, no function to use a file descriptor to set the timestamp.
576 (void)utimes(pair
->dest_name
, tv
);
579 #elif defined(HAVE__FUTIME)
580 // Use one-second precision with Windows-specific _futime().
581 // We could use utime() too except that for some reason the
582 // timestamp will get reset at close(). With _futime() it works.
583 // This struct cannot be const as _futime() takes a non-const pointer.
584 struct _utimbuf buf
= {
585 .actime
= pair
->src_st
.st_atime
,
586 .modtime
= pair
->src_st
.st_mtime
,
593 (void)_futime(pair
->dest_fd
, &buf
);
595 #elif defined(HAVE_UTIME)
596 // Use one-second precision. utime() doesn't support using file
597 // descriptor either. Some systems have broken utime() prototype
598 // so don't make this const.
599 struct utimbuf buf
= {
600 .actime
= pair
->src_st
.st_atime
,
601 .modtime
= pair
->src_st
.st_mtime
,
608 (void)utime(pair
->dest_name
, &buf
);
615 /// Opens the source file. Returns false on success, true on error.
617 io_open_src_real(file_pair
*pair
)
619 // There's nothing to open when reading from stdin.
620 if (pair
->src_name
== stdin_filename
) {
621 pair
->src_fd
= STDIN_FILENO
;
622 #ifdef TUKLIB_DOSLIKE
623 setmode(STDIN_FILENO
, O_BINARY
);
625 // Try to set stdin to non-blocking mode. It won't work
626 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
627 // case we proceed as if stdin were non-blocking anyway
628 // (in case of /dev/null it will be in practice). The
629 // same applies to stdout in io_open_dest_real().
630 stdin_flags
= fcntl(STDIN_FILENO
, F_GETFL
);
631 if (stdin_flags
== -1) {
632 message_error(_("Error getting the file status flags "
633 "from standard input: %s"),
638 if ((stdin_flags
& O_NONBLOCK
) == 0
639 && fcntl(STDIN_FILENO
, F_SETFL
,
640 stdin_flags
| O_NONBLOCK
) != -1)
641 restore_stdin_flags
= true;
643 #ifdef HAVE_POSIX_FADVISE
644 // It will fail if stdin is a pipe and that's fine.
645 (void)posix_fadvise(STDIN_FILENO
, 0, 0,
646 opt_mode
== MODE_LIST
648 : POSIX_FADV_SEQUENTIAL
);
653 // Symlinks are not followed unless writing to stdout or --force
654 // or --keep was used.
655 const bool follow_symlinks
656 = opt_stdout
|| opt_force
|| opt_keep_original
;
658 // We accept only regular files if we are writing the output
659 // to disk too. bzip2 allows overriding this with --force but
660 // gzip and xz don't.
661 const bool reg_files_only
= !opt_stdout
;
664 int flags
= O_RDONLY
| O_BINARY
| O_NOCTTY
;
666 #ifndef TUKLIB_DOSLIKE
667 // Use non-blocking I/O:
668 // - It prevents blocking when opening FIFOs and some other
669 // special files, which is good if we want to accept only
671 // - It can help avoiding some race conditions with signal handling.
675 #if defined(O_NOFOLLOW)
676 if (!follow_symlinks
)
678 #elif !defined(TUKLIB_DOSLIKE)
679 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
680 // by POSIX). Check for symlinks with a separate lstat() on
682 if (!follow_symlinks
) {
684 if (lstat(pair
->src_name
, &st
)) {
685 message_error(_("%s: %s"), pair
->src_name
,
689 } else if (S_ISLNK(st
.st_mode
)) {
690 message_warning(_("%s: Is a symbolic link, "
691 "skipping"), pair
->src_name
);
697 (void)follow_symlinks
;
700 // Try to open the file. Signals have been blocked so EINTR shouldn't
702 pair
->src_fd
= open(pair
->src_name
, flags
);
704 if (pair
->src_fd
== -1) {
705 // Signals (that have a signal handler) have been blocked.
706 assert(errno
!= EINTR
);
709 // Give an understandable error message if the reason
710 // for failing was that the file was a symbolic link.
712 // Note that at least Linux, OpenBSD, Solaris, and Darwin
713 // use ELOOP to indicate that O_NOFOLLOW was the reason
714 // that open() failed. Because there may be
715 // directories in the pathname, ELOOP may occur also
716 // because of a symlink loop in the directory part.
717 // So ELOOP doesn't tell us what actually went wrong,
718 // and this stupidity went into POSIX-1.2008 too.
720 // FreeBSD associates EMLINK with O_NOFOLLOW and
721 // Tru64 uses ENOTSUP. We use these directly here
722 // and skip the lstat() call and the associated race.
723 // I want to hear if there are other kernels that
724 // fail with something else than ELOOP with O_NOFOLLOW.
725 bool was_symlink
= false;
727 # if defined(__FreeBSD__) || defined(__DragonFly__)
731 # elif defined(__digital__) && defined(__unix__)
732 if (errno
== ENOTSUP
)
735 # elif defined(__NetBSD__)
740 if (errno
== ELOOP
&& !follow_symlinks
) {
741 const int saved_errno
= errno
;
743 if (lstat(pair
->src_name
, &st
) == 0
744 && S_ISLNK(st
.st_mode
))
752 message_warning(_("%s: Is a symbolic link, "
753 "skipping"), pair
->src_name
);
756 // Something else than O_NOFOLLOW failing
757 // (assuming that the race conditions didn't
759 message_error(_("%s: %s"), pair
->src_name
,
765 // Stat the source file. We need the result also when we copy
766 // the permissions, and when unlinking.
768 // NOTE: Use stat() instead of fstat() with DJGPP, because
769 // then we have a better chance to get st_ino value that can
770 // be used in io_open_dest_real() to prevent overwriting the
773 if (stat(pair
->src_name
, &pair
->src_st
))
776 if (fstat(pair
->src_fd
, &pair
->src_st
))
780 if (S_ISDIR(pair
->src_st
.st_mode
)) {
781 message_warning(_("%s: Is a directory, skipping"),
786 if (reg_files_only
&& !S_ISREG(pair
->src_st
.st_mode
)) {
787 message_warning(_("%s: Not a regular file, skipping"),
792 #ifndef TUKLIB_DOSLIKE
793 if (reg_files_only
&& !opt_force
&& !opt_keep_original
) {
794 if (pair
->src_st
.st_mode
& (S_ISUID
| S_ISGID
)) {
795 // gzip rejects setuid and setgid files even
796 // when --force was used. bzip2 doesn't check
797 // for them, but calls fchown() after fchmod(),
798 // and many systems automatically drop setuid
799 // and setgid bits there.
801 // We accept setuid and setgid files if
802 // --force or --keep was used. We drop these bits
803 // explicitly in io_copy_attr().
804 message_warning(_("%s: File has setuid or "
805 "setgid bit set, skipping"),
810 if (pair
->src_st
.st_mode
& S_ISVTX
) {
811 message_warning(_("%s: File has sticky bit "
817 if (pair
->src_st
.st_nlink
> 1) {
818 message_warning(_("%s: Input file has more "
819 "than one hard link, "
820 "skipping"), pair
->src_name
);
825 // If it is something else than a regular file, wait until
826 // there is input available. This way reading from FIFOs
827 // will work when open() is used with O_NONBLOCK.
828 if (!S_ISREG(pair
->src_st
.st_mode
)) {
830 const io_wait_ret ret
= io_wait(pair
, -1, true);
833 if (ret
!= IO_WAIT_MORE
)
838 #ifdef HAVE_POSIX_FADVISE
839 // It will fail with some special files like FIFOs but that is fine.
840 (void)posix_fadvise(pair
->src_fd
, 0, 0,
841 opt_mode
== MODE_LIST
843 : POSIX_FADV_SEQUENTIAL
);
849 message_error(_("%s: %s"), pair
->src_name
, strerror(errno
));
851 (void)close(pair
->src_fd
);
857 io_open_src(const char *src_name
)
859 if (src_name
[0] == '\0') {
860 message_error(_("Empty filename, skipping"));
864 // Since we have only one file open at a time, we can use
865 // a statically allocated structure.
866 static file_pair pair
;
868 // This implicitly also initializes src_st.st_size to zero
869 // which is expected to be <= 0 by default. fstat() isn't
870 // called when reading from standard input but src_st.st_size
873 .src_name
= src_name
,
878 .src_has_seen_input
= false,
879 .flush_needed
= false,
880 .dest_try_sparse
= false,
881 .dest_pending_sparse
= 0,
884 // Block the signals, for which we have a custom signal handler, so
885 // that we don't need to worry about EINTR.
887 const bool error
= io_open_src_real(&pair
);
890 #ifdef ENABLE_SANDBOX
892 io_sandbox_enter(pair
.src_fd
);
895 return error
? NULL
: &pair
;
899 /// \brief Closes source file of the file_pair structure
901 /// \param pair File whose src_fd should be closed
902 /// \param success If true, the file will be removed from the disk if
903 /// closing succeeds and --keep hasn't been used.
905 io_close_src(file_pair
*pair
, bool success
)
907 #ifndef TUKLIB_DOSLIKE
908 if (restore_stdin_flags
) {
909 assert(pair
->src_fd
== STDIN_FILENO
);
911 restore_stdin_flags
= false;
913 if (fcntl(STDIN_FILENO
, F_SETFL
, stdin_flags
) == -1)
914 message_error(_("Error restoring the status flags "
915 "to standard input: %s"),
920 if (pair
->src_fd
!= STDIN_FILENO
&& pair
->src_fd
!= -1) {
921 // Close the file before possibly unlinking it. On DOS-like
922 // systems this is always required since unlinking will fail
923 // if the file is open. On POSIX systems it usually works
924 // to unlink open files, but in some cases it doesn't and
925 // one gets EBUSY in errno.
927 // xz 5.2.2 and older unlinked the file before closing it
928 // (except on DOS-like systems). The old code didn't handle
929 // EBUSY and could fail e.g. on some CIFS shares. The
930 // advantage of unlinking before closing is negligible
931 // (avoids a race between close() and stat()/lstat() and
932 // unlink()), so let's keep this simple.
933 (void)close(pair
->src_fd
);
935 if (success
&& !opt_keep_original
)
936 io_unlink(pair
->src_name
, &pair
->src_st
);
944 io_open_dest_real(file_pair
*pair
)
946 if (opt_stdout
|| pair
->src_fd
== STDIN_FILENO
) {
947 // We don't modify or free() this.
948 pair
->dest_name
= (char *)"(stdout)";
949 pair
->dest_fd
= STDOUT_FILENO
;
950 #ifdef TUKLIB_DOSLIKE
951 setmode(STDOUT_FILENO
, O_BINARY
);
953 // Try to set O_NONBLOCK if it isn't already set.
954 // If it fails, we assume that stdout is non-blocking
955 // in practice. See the comments in io_open_src_real()
956 // for similar situation with stdin.
958 // NOTE: O_APPEND may be unset later in this function
959 // and it relies on stdout_flags being set here.
960 stdout_flags
= fcntl(STDOUT_FILENO
, F_GETFL
);
961 if (stdout_flags
== -1) {
962 message_error(_("Error getting the file status flags "
963 "from standard output: %s"),
968 if ((stdout_flags
& O_NONBLOCK
) == 0
969 && fcntl(STDOUT_FILENO
, F_SETFL
,
970 stdout_flags
| O_NONBLOCK
) != -1)
971 restore_stdout_flags
= true;
974 pair
->dest_name
= suffix_get_dest_name(pair
->src_name
);
975 if (pair
->dest_name
== NULL
)
980 if (stat(pair
->dest_name
, &st
) == 0) {
981 // Check that it isn't a special file like "prn".
982 if (st
.st_dev
== -1) {
983 message_error("%s: Refusing to write to "
984 "a DOS special file",
986 free(pair
->dest_name
);
990 // Check that we aren't overwriting the source file.
991 if (st
.st_dev
== pair
->src_st
.st_dev
992 && st
.st_ino
== pair
->src_st
.st_ino
) {
993 message_error("%s: Output file is the same "
996 free(pair
->dest_name
);
1002 // If --force was used, unlink the target file first.
1003 if (opt_force
&& unlink(pair
->dest_name
) && errno
!= ENOENT
) {
1004 message_error(_("%s: Cannot remove: %s"),
1005 pair
->dest_name
, strerror(errno
));
1006 free(pair
->dest_name
);
1011 int flags
= O_WRONLY
| O_BINARY
| O_NOCTTY
1013 #ifndef TUKLIB_DOSLIKE
1014 flags
|= O_NONBLOCK
;
1016 const mode_t mode
= S_IRUSR
| S_IWUSR
;
1017 pair
->dest_fd
= open(pair
->dest_name
, flags
, mode
);
1019 if (pair
->dest_fd
== -1) {
1020 message_error(_("%s: %s"), pair
->dest_name
,
1022 free(pair
->dest_name
);
1027 if (fstat(pair
->dest_fd
, &pair
->dest_st
)) {
1028 // If fstat() really fails, we have a safe fallback here.
1030 pair
->dest_st
.st_ino
[0] = 0;
1031 pair
->dest_st
.st_ino
[1] = 0;
1032 pair
->dest_st
.st_ino
[2] = 0;
1034 pair
->dest_st
.st_dev
= 0;
1035 pair
->dest_st
.st_ino
= 0;
1038 #if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__)
1039 // Check that the output file is a regular file. We open with O_EXCL
1040 // but that doesn't prevent open()/_open() on Windows from opening
1041 // files like "con" or "nul".
1043 // With DJGPP this check is done with stat() even before opening
1044 // the output file. That method or a variant of it doesn't work on
1045 // Windows because on Windows stat()/_stat64() sets st.st_mode so
1046 // that S_ISREG(st.st_mode) will be true even for special files.
1047 // With fstat()/_fstat64() it works.
1048 else if (pair
->dest_fd
!= STDOUT_FILENO
1049 && !S_ISREG(pair
->dest_st
.st_mode
)) {
1050 message_error("%s: Destination is not a regular file",
1053 // dest_fd needs to be reset to -1 to keep io_close() working.
1054 (void)close(pair
->dest_fd
);
1057 free(pair
->dest_name
);
1060 #elif !defined(TUKLIB_DOSLIKE)
1061 else if (try_sparse
&& opt_mode
== MODE_DECOMPRESS
) {
1062 // When writing to standard output, we need to be extra
1064 // - It may be connected to something else than
1066 // - We aren't necessarily writing to a new empty file
1067 // or to the end of an existing file.
1068 // - O_APPEND may be active.
1070 // TODO: I'm keeping this disabled for DOS-like systems
1071 // for now. FAT doesn't support sparse files, but NTFS
1072 // does, so maybe this should be enabled on Windows after
1074 if (pair
->dest_fd
== STDOUT_FILENO
) {
1075 if (!S_ISREG(pair
->dest_st
.st_mode
))
1078 if (stdout_flags
& O_APPEND
) {
1079 // Creating a sparse file is not possible
1080 // when O_APPEND is active (it's used by
1081 // shell's >> redirection). As I understand
1082 // it, it is safe to temporarily disable
1083 // O_APPEND in xz, because if someone
1084 // happened to write to the same file at the
1085 // same time, results would be bad anyway
1086 // (users shouldn't assume that xz uses any
1087 // specific block size when writing data).
1089 // The write position may be something else
1090 // than the end of the file, so we must fix
1091 // it to start writing at the end of the file
1092 // to imitate O_APPEND.
1093 if (lseek(STDOUT_FILENO
, 0, SEEK_END
) == -1)
1096 // Construct the new file status flags.
1097 // If O_NONBLOCK was set earlier in this
1098 // function, it must be kept here too.
1099 int flags
= stdout_flags
& ~O_APPEND
;
1100 if (restore_stdout_flags
)
1101 flags
|= O_NONBLOCK
;
1103 // If this fcntl() fails, we continue but won't
1104 // try to create sparse output. The original
1105 // flags will still be restored if needed (to
1106 // unset O_NONBLOCK) when the file is finished.
1107 if (fcntl(STDOUT_FILENO
, F_SETFL
, flags
) == -1)
1110 // Disabling O_APPEND succeeded. Mark
1111 // that the flags should be restored
1112 // in io_close_dest(). (This may have already
1113 // been set when enabling O_NONBLOCK.)
1114 restore_stdout_flags
= true;
1116 } else if (lseek(STDOUT_FILENO
, 0, SEEK_CUR
)
1117 != pair
->dest_st
.st_size
) {
1118 // Writing won't start exactly at the end
1119 // of the file. We cannot use sparse output,
1120 // because it would probably corrupt the file.
1125 pair
->dest_try_sparse
= true;
1134 io_open_dest(file_pair
*pair
)
1137 const bool ret
= io_open_dest_real(pair
);
1143 /// \brief Closes destination file of the file_pair structure
1145 /// \param pair File whose dest_fd should be closed
1146 /// \param success If false, the file will be removed from the disk.
1148 /// \return Zero if closing succeeds. On error, -1 is returned and
1149 /// error message printed.
1151 io_close_dest(file_pair
*pair
, bool success
)
1153 #ifndef TUKLIB_DOSLIKE
1154 // If io_open_dest() has disabled O_APPEND, restore it here.
1155 if (restore_stdout_flags
) {
1156 assert(pair
->dest_fd
== STDOUT_FILENO
);
1158 restore_stdout_flags
= false;
1160 if (fcntl(STDOUT_FILENO
, F_SETFL
, stdout_flags
) == -1) {
1161 message_error(_("Error restoring the O_APPEND flag "
1162 "to standard output: %s"),
1169 if (pair
->dest_fd
== -1 || pair
->dest_fd
== STDOUT_FILENO
)
1172 if (close(pair
->dest_fd
)) {
1173 message_error(_("%s: Closing the file failed: %s"),
1174 pair
->dest_name
, strerror(errno
));
1176 // Closing destination file failed, so we cannot trust its
1177 // contents. Get rid of junk:
1178 io_unlink(pair
->dest_name
, &pair
->dest_st
);
1179 free(pair
->dest_name
);
1183 // If the operation using this file wasn't successful, we git rid
1184 // of the junk file.
1186 io_unlink(pair
->dest_name
, &pair
->dest_st
);
1188 free(pair
->dest_name
);
1195 io_close(file_pair
*pair
, bool success
)
1197 // Take care of sparseness at the end of the output file.
1198 if (success
&& pair
->dest_try_sparse
1199 && pair
->dest_pending_sparse
> 0) {
1200 // Seek forward one byte less than the size of the pending
1201 // hole, then write one zero-byte. This way the file grows
1202 // to its correct size. An alternative would be to use
1203 // ftruncate() but that isn't portable enough (e.g. it
1204 // doesn't work with FAT on Linux; FAT isn't that important
1205 // since it doesn't support sparse files anyway, but we don't
1206 // want to create corrupt files on it).
1207 if (lseek(pair
->dest_fd
, pair
->dest_pending_sparse
- 1,
1209 message_error(_("%s: Seeking failed when trying "
1210 "to create a sparse file: %s"),
1211 pair
->dest_name
, strerror(errno
));
1214 const uint8_t zero
[1] = { '\0' };
1215 if (io_write_buf(pair
, zero
, 1))
1222 // Copy the file attributes. We need to skip this if destination
1223 // file isn't open or it is standard output.
1224 if (success
&& pair
->dest_fd
!= -1 && pair
->dest_fd
!= STDOUT_FILENO
)
1225 io_copy_attrs(pair
);
1227 // Close the destination first. If it fails, we must not remove
1229 if (io_close_dest(pair
, success
))
1232 // Close the source file, and unlink it if the operation using this
1233 // file pair was successful and we haven't requested to keep the
1235 io_close_src(pair
, success
);
1244 io_fix_src_pos(file_pair
*pair
, size_t rewind_size
)
1246 assert(rewind_size
<= IO_BUFFER_SIZE
);
1248 if (rewind_size
> 0) {
1249 // This doesn't need to work on unseekable file descriptors,
1250 // so just ignore possible errors.
1251 (void)lseek(pair
->src_fd
, -(off_t
)(rewind_size
), SEEK_CUR
);
1259 io_read(file_pair
*pair
, io_buf
*buf
, size_t size
)
1261 assert(size
<= IO_BUFFER_SIZE
);
1265 while (pos
< size
) {
1266 const ssize_t amount
= read(
1267 pair
->src_fd
, buf
->u8
+ pos
, size
- pos
);
1270 pair
->src_eof
= true;
1275 if (errno
== EINTR
) {
1282 #ifndef TUKLIB_DOSLIKE
1283 if (IS_EAGAIN_OR_EWOULDBLOCK(errno
)) {
1284 // Disable the flush-timeout if no input has
1285 // been seen since the previous flush and thus
1286 // there would be nothing to flush after the
1287 // timeout expires (avoids busy waiting).
1288 const int timeout
= pair
->src_has_seen_input
1289 ? mytime_get_flush_timeout()
1292 switch (io_wait(pair
, timeout
, true)) {
1299 case IO_WAIT_TIMEOUT
:
1300 pair
->flush_needed
= true;
1309 message_error(_("%s: Read error: %s"),
1310 pair
->src_name
, strerror(errno
));
1315 pos
+= (size_t)(amount
);
1317 if (!pair
->src_has_seen_input
) {
1318 pair
->src_has_seen_input
= true;
1319 mytime_set_flush_time();
1328 io_seek_src(file_pair
*pair
, uint64_t pos
)
1330 // Caller must not attempt to seek past the end of the input file
1331 // (seeking to 100 in a 100-byte file is seeking to the end of
1332 // the file, not past the end of the file, and thus that is allowed).
1334 // This also validates that pos can be safely cast to off_t.
1335 if (pos
> (uint64_t)(pair
->src_st
.st_size
))
1338 if (lseek(pair
->src_fd
, (off_t
)(pos
), SEEK_SET
) == -1) {
1339 message_error(_("%s: Error seeking the file: %s"),
1340 pair
->src_name
, strerror(errno
));
1344 pair
->src_eof
= false;
1351 io_pread(file_pair
*pair
, io_buf
*buf
, size_t size
, uint64_t pos
)
1353 // Using lseek() and read() is more portable than pread() and
1354 // for us it is as good as real pread().
1355 if (io_seek_src(pair
, pos
))
1358 const size_t amount
= io_read(pair
, buf
, size
);
1359 if (amount
== SIZE_MAX
)
1362 if (amount
!= size
) {
1363 message_error(_("%s: Unexpected end of file"),
1373 is_sparse(const io_buf
*buf
)
1375 assert(IO_BUFFER_SIZE
% sizeof(uint64_t) == 0);
1377 for (size_t i
= 0; i
< ARRAY_SIZE(buf
->u64
); ++i
)
1378 if (buf
->u64
[i
] != 0)
1386 io_write_buf(file_pair
*pair
, const uint8_t *buf
, size_t size
)
1388 assert(size
<= IO_BUFFER_SIZE
);
1391 const ssize_t amount
= write(pair
->dest_fd
, buf
, size
);
1393 if (errno
== EINTR
) {
1400 #ifndef TUKLIB_DOSLIKE
1401 if (IS_EAGAIN_OR_EWOULDBLOCK(errno
)) {
1402 if (io_wait(pair
, -1, false) == IO_WAIT_MORE
)
1409 // Handle broken pipe specially. gzip and bzip2
1410 // don't print anything on SIGPIPE. In addition,
1411 // gzip --quiet uses exit status 2 (warning) on
1412 // broken pipe instead of whatever raise(SIGPIPE)
1413 // would make it return. It is there to hide "Broken
1414 // pipe" message on some old shells (probably old
1417 // We don't do anything special with --quiet, which
1418 // is what bzip2 does too. If we get SIGPIPE, we
1419 // will handle it like other signals by setting
1420 // user_abort, and get EPIPE here.
1422 message_error(_("%s: Write error: %s"),
1423 pair
->dest_name
, strerror(errno
));
1428 buf
+= (size_t)(amount
);
1429 size
-= (size_t)(amount
);
1437 io_write(file_pair
*pair
, const io_buf
*buf
, size_t size
)
1439 assert(size
<= IO_BUFFER_SIZE
);
1441 if (pair
->dest_try_sparse
) {
1442 // Check if the block is sparse (contains only zeros). If it
1443 // sparse, we just store the amount and return. We will take
1444 // care of actually skipping over the hole when we hit the
1445 // next data block or close the file.
1447 // Since io_close() requires that dest_pending_sparse > 0
1448 // if the file ends with sparse block, we must also return
1449 // if size == 0 to avoid doing the lseek().
1450 if (size
== IO_BUFFER_SIZE
) {
1451 // Even if the block was sparse, treat it as non-sparse
1452 // if the pending sparse amount is large compared to
1453 // the size of off_t. In practice this only matters
1454 // on 32-bit systems where off_t isn't always 64 bits.
1455 const off_t pending_max
1456 = (off_t
)(1) << (sizeof(off_t
) * CHAR_BIT
- 2);
1457 if (is_sparse(buf
) && pair
->dest_pending_sparse
1459 pair
->dest_pending_sparse
+= (off_t
)(size
);
1462 } else if (size
== 0) {
1466 // This is not a sparse block. If we have a pending hole,
1468 if (pair
->dest_pending_sparse
> 0) {
1469 if (lseek(pair
->dest_fd
, pair
->dest_pending_sparse
,
1471 message_error(_("%s: Seeking failed when "
1472 "trying to create a sparse "
1473 "file: %s"), pair
->dest_name
,
1478 pair
->dest_pending_sparse
= 0;
1482 return io_write_buf(pair
, buf
->u8
, size
);