From a21e890f82258c17ee47895fa28bb62937eb1af9 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 4 Oct 2021 15:40:29 +0200 Subject: [PATCH] Add close_range(2) support This is a system call introduced in Linux 5.9. It's typically used to bulk-close file descriptors that a process inherited without having desired so and doesn't want to pass them to its offspring for security reasons. For this reason the sensible upper limit value tends to be unknown and the users prefer to stay on the safe side by setting it high. This is a bit peculiar because, if unfiltered, the syscall could end up closing descriptors Valgrind uses for its purposes, ending in no end of mayhem and suffering. This patch adjusts the upper bounds to a safe value and then skips over the descriptor Valgrind uses by potentially calling the real system call with sub-ranges that are safe to close. The call can fail on negative ranges and bad flags -- we're dealing with the first condition ourselves while letting the real call fail on bad flags. https://bugs.kde.org/show_bug.cgi?id=439090 --- NEWS | 1 + coregrind/m_syswrap/priv_syswrap-linux.h | 3 ++ coregrind/m_syswrap/syswrap-amd64-linux.c | 1 + coregrind/m_syswrap/syswrap-arm-linux.c | 1 + coregrind/m_syswrap/syswrap-arm64-linux.c | 1 + coregrind/m_syswrap/syswrap-linux.c | 62 ++++++++++++++++++++++++++++ coregrind/m_syswrap/syswrap-mips32-linux.c | 1 + coregrind/m_syswrap/syswrap-mips64-linux.c | 1 + coregrind/m_syswrap/syswrap-nanomips-linux.c | 1 + coregrind/m_syswrap/syswrap-ppc32-linux.c | 1 + coregrind/m_syswrap/syswrap-ppc64-linux.c | 1 + coregrind/m_syswrap/syswrap-s390x-linux.c | 1 + coregrind/m_syswrap/syswrap-x86-linux.c | 1 + include/vki/vki-linux.h | 3 ++ include/vki/vki-scnums-shared-linux.h | 1 + 15 files changed, 80 insertions(+) diff --git a/NEWS b/NEWS index bd4458dae..112758301 100644 --- a/NEWS +++ b/NEWS @@ -70,6 +70,7 @@ are not entered into bugzilla tend to get forgotten about or ignored. have debug information 438871 unhandled instruction bytes: 0xF3 0x49 0xF 0x6F 0x9C 0x24 0x60 0x2 0x0 0x0 439046 valgrind is unusably large when linked with lld +439090 Implement close_range(2) 439326 Valgrind 3.17.0 won't compile with Intel 2021 oneAPI compilers 439590 glibc-2.34 breaks suppressions against obj:*/lib*/libc-2.*so* 440670 unhandled ppc64le-linux syscall: 252 (statfs64) and 253 (fstatfs64) diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h index 110f7c832..baf362f11 100644 --- a/coregrind/m_syswrap/priv_syswrap-linux.h +++ b/coregrind/m_syswrap/priv_syswrap-linux.h @@ -320,6 +320,9 @@ DECL_TEMPLATE(linux, sys_io_uring_setup); DECL_TEMPLATE(linux, sys_io_uring_enter); DECL_TEMPLATE(linux, sys_io_uring_register); +// Linux-specific (new in Linux 5.9) +DECL_TEMPLATE(linux, sys_close_range); + /* --------------------------------------------------------------------- Wrappers for sockets and ipc-ery. These are split into standalone procedures because x86-linux hides them inside multiplexors diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c index 46d6b5beb..5062324a1 100644 --- a/coregrind/m_syswrap/syswrap-amd64-linux.c +++ b/coregrind/m_syswrap/syswrap-amd64-linux.c @@ -875,6 +875,7 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c index 7f41b0749..556dd844b 100644 --- a/coregrind/m_syswrap/syswrap-arm-linux.c +++ b/coregrind/m_syswrap/syswrap-arm-linux.c @@ -1052,6 +1052,7 @@ static SyscallTableEntry syscall_main_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c index 0d717f44b..b87107727 100644 --- a/coregrind/m_syswrap/syswrap-arm64-linux.c +++ b/coregrind/m_syswrap/syswrap-arm64-linux.c @@ -831,6 +831,7 @@ static SyscallTableEntry syscall_main_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c index 818b11ff6..63dd1fb66 100644 --- a/coregrind/m_syswrap/syswrap-linux.c +++ b/coregrind/m_syswrap/syswrap-linux.c @@ -13315,6 +13315,68 @@ PRE(sys_execveat) } +PRE(sys_close_range) +{ + SysRes res = VG_(mk_SysRes_Success)(0); + unsigned int beg, end; + unsigned int last = ARG2; + + FUSE_COMPATIBLE_MAY_BLOCK(); + PRINT("sys_close_range ( %" FMT_REGWORD "u, %" FMT_REGWORD "u, %" + FMT_REGWORD "u )", ARG1, ARG2, ARG3); + PRE_REG_READ3(long, "close_range", + unsigned int, first, unsigned int, last, + unsigned int, flags); + + if (ARG1 > last) { + SET_STATUS_Failure( VKI_EINVAL ); + return; + } + + if (last >= VG_(fd_hard_limit)) + last = VG_(fd_hard_limit) - 1; + + if (ARG1 > last) { + SET_STATUS_Success ( 0 ); + return; + } + + beg = end = ARG1; + do { + if (end > last + || (end == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) + || end == VG_(log_output_sink).fd + || end == VG_(xml_output_sink).fd) { + /* Split the range if it contains a file descriptor we're not + * supposed to close. */ + if (end - 1 >= beg) + res = VG_(do_syscall3)(__NR_close_range, (UWord)beg, (UWord)end - 1, ARG3 ); + beg = end + 1; + } + } while (end++ <= last); + + /* If it failed along the way, it's presumably the flags being wrong. */ + SET_STATUS_from_SysRes (res); +} + +POST(sys_close_range) +{ + unsigned int fd; + unsigned int last = ARG2; + + if (!VG_(clo_track_fds) + || (ARG3 & VKI_CLOSE_RANGE_CLOEXEC) != 0) + return; + + if (last >= VG_(fd_hard_limit)) + last = VG_(fd_hard_limit) - 1; + + for (fd = ARG1; fd <= last; fd++) + if ((fd != 2/*stderr*/ || VG_(debugLog_getLevel)() == 0) + && fd != VG_(log_output_sink).fd + && fd != VG_(xml_output_sink).fd) + ML_(record_fd_close)(fd); +} #undef PRE #undef POST diff --git a/coregrind/m_syswrap/syswrap-mips32-linux.c b/coregrind/m_syswrap/syswrap-mips32-linux.c index f957c6f58..f556e063f 100644 --- a/coregrind/m_syswrap/syswrap-mips32-linux.c +++ b/coregrind/m_syswrap/syswrap-mips32-linux.c @@ -1137,6 +1137,7 @@ static SyscallTableEntry syscall_main_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_ (__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-mips64-linux.c b/coregrind/m_syswrap/syswrap-mips64-linux.c index 94f38366f..41a5404c5 100644 --- a/coregrind/m_syswrap/syswrap-mips64-linux.c +++ b/coregrind/m_syswrap/syswrap-mips64-linux.c @@ -816,6 +816,7 @@ static SyscallTableEntry syscall_main_table[] = { LINXY (__NR_io_uring_enter, sys_io_uring_enter), LINXY (__NR_io_uring_register, sys_io_uring_register), GENX_ (__NR_clone3, sys_ni_syscall), + LINXY (__NR_close_range, sys_close_range), LINX_ (__NR_faccessat2, sys_faccessat2), }; diff --git a/coregrind/m_syswrap/syswrap-nanomips-linux.c b/coregrind/m_syswrap/syswrap-nanomips-linux.c index 102c15550..f9d4b19f4 100644 --- a/coregrind/m_syswrap/syswrap-nanomips-linux.c +++ b/coregrind/m_syswrap/syswrap-nanomips-linux.c @@ -825,6 +825,7 @@ static SyscallTableEntry syscall_main_table[] = { LINXY (__NR_io_uring_enter, sys_io_uring_enter), LINXY (__NR_io_uring_register, sys_io_uring_register), GENX_ (__NR_clone3, sys_ni_syscall), + LINXY (__NR_close_range, sys_close_range), LINX_ (__NR_faccessat2, sys_faccessat2), }; diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c index 7f9c9fa9b..6263ab845 100644 --- a/coregrind/m_syswrap/syswrap-ppc32-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c @@ -1055,6 +1055,7 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c index 43cde7327..a26b41c32 100644 --- a/coregrind/m_syswrap/syswrap-ppc64-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c @@ -1024,6 +1024,7 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c index 5283c19fe..5c9209859 100644 --- a/coregrind/m_syswrap/syswrap-s390x-linux.c +++ b/coregrind/m_syswrap/syswrap-s390x-linux.c @@ -865,6 +865,7 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c index 418c76975..1d8f45d33 100644 --- a/coregrind/m_syswrap/syswrap-x86-linux.c +++ b/coregrind/m_syswrap/syswrap-x86-linux.c @@ -1646,6 +1646,7 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_io_uring_register, sys_io_uring_register),// 427 GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 }; diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h index 426d9db92..eb4e01b33 100644 --- a/include/vki/vki-linux.h +++ b/include/vki/vki-linux.h @@ -5369,6 +5369,9 @@ struct vki_itimerspec64 { #define VKI_RLIM64_INFINITY (~0ULL) +#define VKI_CLOSE_RANGE_UNSHARE (1U << 1) +#define VKI_CLOSE_RANGE_CLOEXEC (1U << 2) + /*--------------------------------------------------------------------*/ /*--- end ---*/ /*--------------------------------------------------------------------*/ diff --git a/include/vki/vki-scnums-shared-linux.h b/include/vki/vki-scnums-shared-linux.h index 6c70c9981..fa63c7a9b 100644 --- a/include/vki/vki-scnums-shared-linux.h +++ b/include/vki/vki-scnums-shared-linux.h @@ -40,6 +40,7 @@ #define __NR_fspick 433 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_faccessat2 439 -- 2.11.4.GIT