Hi All,
This series adds capabilities support for clone3 syscall along with set of testcases in morello clone kselftests.
Changes available at: https://git.morello-project.org/Bea/linux/-/tree/morello/clone3_v4 LTP changes: https://git.morello-project.org/Bea/morello-linux-ltp/-/tree/morello/clone3 To run clone3 tests: ./runltp -f syscalls -s clone3
v4: [PATCH 1/3] - fixed commit message referring to the wrong copy routine [PATCH 3/3] - dropped setting default size for clone3 args - dropped stale comment regarding re-using bits from clone_args flags - switched ASSERT_FALSE to ASSERT_EQ when comparing pids in child process - added caching tls value to safely unmap memory - added validation for both clone stack and tls - switch from clone_args-> tls to actual thread data when checking for tag in cloned process v3: [PATCH 1/3]: - updated commit message to reflect actual changes [PATCH 2/3]: - fixed type casting and sizes for copy routines - swapped order of args for clone_args_size_ver [PATCH 3/3]: - added dedicated field for test custom flags instead of 'borrowing' one from clone_args struct - added test for stack before calling munmap in failing test cases - switched to WSTOPPED for waitid call v2: - add copy_struct_from_user_with_ptr variant - drop explicit padding from clone_args struct - switch __alignof__ to sizeof for struct sizing conditions - use __clone_args_size_ver macro when validating struct layout - cache the current compat mode instead of relying on compiler optimizations - drop use of as_user_ptr in favour of explicit casting - use clone_args struct directly for kselftest test fixture - add signalling to better handle dependencies between test threads
Beata Michalska (3): uaccess: Preserve capability tags with copy_struct_from_user_with_ptr fork: clone3: Add support for architectural capabilities kselftests/arm64: morello: Add clone3 test-cases
include/linux/uaccess.h | 60 ++++- include/uapi/linux/sched.h | 30 ++- kernel/fork.c | 139 ++++++++--- .../testing/selftests/arm64/morello/Makefile | 1 + tools/testing/selftests/arm64/morello/clone.c | 222 +++++++++++++++++- 5 files changed, 400 insertions(+), 52 deletions(-)
Introduce a variant of copy_struct_from_user that will make use of copy_from_user_with_ptr as its actual copying routine, in order to preserve capability tags throughout the process.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- include/linux/uaccess.h | 60 ++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 13 deletions(-)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 88b2224e85c3..e17bd4dce0d6 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -356,6 +356,24 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
extern __must_check int check_zeroed_user(const void __user *from, size_t size);
+static __always_inline __must_check int +__copy_struct_from_user_prepare(void *dst, size_t ksize, const void __user *src, + size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Deal with trailing bytes. */ + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + int ret = check_zeroed_user(src + size, rest); + if (ret <= 0) + return ret ?: -E2BIG; + } + return 0; +} + /** * copy_struct_from_user: copy a struct from userspace * @dst: Destination address, in kernel space. This buffer must be @ksize @@ -408,20 +426,36 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, size_t usize) { size_t size = min(ksize, usize); - size_t rest = max(ksize, usize) - size; + int ret;
- /* Deal with trailing bytes. */ - if (usize < ksize) { - memset(dst + size, 0, rest); - } else if (usize > ksize) { - int ret = check_zeroed_user(src + size, rest); - if (ret <= 0) - return ret ?: -E2BIG; - } - /* Copy the interoperable parts of the struct. */ - if (copy_from_user(dst, src, size)) - return -EFAULT; - return 0; + ret = __copy_struct_from_user_prepare(dst, ksize, src, usize); + + return ret ?: (copy_from_user(dst, src, size) ? -EFAULT : 0); +} + +/** + * copy_struct_from_user_with_ptr: copy a struct from userspace + * + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @src: Source address, in userspace. + * @usize: (Alleged) size of @src struct. + * + * Counterpart of copy_struct_from_user that deals with structures, + * members of which can contain user pointers. + * Otherwise, same logic/requirements apply. + */ +static __always_inline __must_check int +copy_struct_from_user_with_ptr(void *dst, size_t ksize, const void __user *src, + size_t usize) +{ + size_t size = min(ksize, usize); + int ret; + + ret = __copy_struct_from_user_prepare(dst, ksize, src, usize); + + return ret ?: (copy_from_user_with_ptr(dst, src, size) ? -EFAULT : 0); }
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size);
The clone3 syscall provides a superset of the functionality compared to its clone forerunner, encapsulating all the various arguments within a single, dedicated clone_args structure.
As it stands now, members of said structure that do represent user pointers, are specified as being of __u64 type (alignment aside). This is problematic for some architectures, where that type cannot actually hold said pointers.
In order to add support for the latter, the relevant clone_args struct members get redefined with the appropriate __kernel_uintptr_t type.
This though, brings its own drawbacks. The structure itself gains revised signature (with implicit paddings) which results in noticeable growth in its size. Furthermore, for affected architectures, as a consequence, clone3 is no longer compat mode agnostic, and as such, requires repurposing the main syscall handler at a cost of marginal overhead though in favour of avoiding code duplication.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- include/uapi/linux/sched.h | 30 +++++--- kernel/fork.c | 139 +++++++++++++++++++++++++++++-------- 2 files changed, 131 insertions(+), 38 deletions(-)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 3bac0a8ceab2..e3ca56b530d2 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -87,26 +87,36 @@ * * The structure is versioned by size and thus extensible. * New struct members must go at the end of the struct and - * must be properly 64bit aligned. + * must be properly aligned on at least 64bit boundary. + * Mind implicit padding when: + * ((sizeof(__kernel_uintptr_t) > sizeof(__u64)) + * */ + struct clone_args { __aligned_u64 flags; - __aligned_u64 pidfd; - __aligned_u64 child_tid; - __aligned_u64 parent_tid; + __kernel_aligned_uintptr_t pidfd; + __kernel_aligned_uintptr_t child_tid; + __kernel_aligned_uintptr_t parent_tid; __aligned_u64 exit_signal; - __aligned_u64 stack; + __kernel_aligned_uintptr_t stack; __aligned_u64 stack_size; - __aligned_u64 tls; - __aligned_u64 set_tid; + __kernel_aligned_uintptr_t tls; + /* VER0 boundary */ + __kernel_aligned_uintptr_t set_tid; __aligned_u64 set_tid_size; + /* VER1 boundary */ __aligned_u64 cgroup; + /* VER2 boundary */ }; #endif
-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ -#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ -#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ +#define CLONE_ARGS_SIZE_VER0 /* sizeof first published struct */ \ + ((sizeof(__kernel_uintptr_t) > sizeof(__u64)) ? 128 : 64) +#define CLONE_ARGS_SIZE_VER1 /* sizeof second published struct */ \ + ((sizeof(__kernel_uintptr_t) > sizeof(__u64)) ? 152 : 80) +#define CLONE_ARGS_SIZE_VER2 /* sizeof third published struct */ \ + ((sizeof(__kernel_uintptr_t) > sizeof(__u64)) ? 160 : 88)
/* * Scheduling policies diff --git a/kernel/fork.c b/kernel/fork.c index 181771571599..94777ac4d455 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2759,67 +2759,150 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, user_uintptr_t, newsp,
#ifdef __ARCH_WANT_SYS_CLONE3
+#define __clone_args_size_ver(ver, pfx) \ + pfx##CLONE_ARGS_SIZE_VER##ver + +#ifdef CONFIG_COMPAT64 +struct compat_clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; + __aligned_u64 cgroup; +}; + +#define COMPAT_CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#define COMPAT_CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#define COMPAT_CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ + +#define clone_args_size_ver(args, ver) \ + ((args)->compat_mode ? __clone_args_size_ver(ver, COMPAT_) \ + : __clone_args_size_ver(ver, )) + +#define clone_args_get(args, member) \ + ((args)->compat_mode ? (args)->__compat_args.member \ + : (args)->__args.member) + +#define clone_args_get_user_ptr(args, member) \ + ((args)->compat_mode ? \ + compat_ptr(clone_args_get(args, member)) : \ + (void __user *)(user_uintptr_t)(clone_args_get(args, member))) + +#else /* CONFIG_COMPAT64 */ +#define clone_args_size_ver(args, ver) __clone_args_size_ver(ver, ) +#define clone_args_get(args, member) ((args)->member) +#define clone_args_get_user_ptr(args, member) \ + (void __user *)(user_uintptr_t)(clone_args_get(args, member)) +#endif /* CONFIG_COMPAT64 */ + +static inline void clone_args_validate_static(void) +{ +#define CLONE_ARGS_BUILD_BUG_ON(type, pfx) \ +do { \ + BUILD_BUG_ON(offsetofend(type, tls) != \ + __clone_args_size_ver(0, pfx)); \ + BUILD_BUG_ON(offsetofend(type, set_tid_size) != \ + __clone_args_size_ver(1, pfx)); \ + BUILD_BUG_ON(offsetofend(type, cgroup) != \ + __clone_args_size_ver(2, pfx)); \ + BUILD_BUG_ON(sizeof(type) != __clone_args_size_ver(2, pfx)); \ +} while (0) + + CLONE_ARGS_BUILD_BUG_ON(struct clone_args, ); +#ifdef CONFIG_COMPAT64 + CLONE_ARGS_BUILD_BUG_ON(struct compat_clone_args, COMPAT_); + BUILD_BUG_ON(sizeof(struct clone_args) < sizeof(struct compat_clone_args)); +#endif +} + noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args __user *uargs, size_t usize) { int err; +#ifdef CONFIG_COMPAT64 + struct { + union { + struct clone_args __args; + struct compat_clone_args __compat_args; + }; + bool compat_mode; + } args = { + .compat_mode = in_compat_syscall(), + }; +#else struct clone_args args; +#endif pid_t *kset_tid = kargs->set_tid;
- BUILD_BUG_ON(offsetofend(struct clone_args, tls) != - CLONE_ARGS_SIZE_VER0); - BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) != - CLONE_ARGS_SIZE_VER1); - BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) != - CLONE_ARGS_SIZE_VER2); - BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2); + clone_args_validate_static();
if (unlikely(usize > PAGE_SIZE)) return -E2BIG; - if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) + if (unlikely(usize < clone_args_size_ver(&args, 0))) return -EINVAL;
- err = copy_struct_from_user(&args, sizeof(args), uargs, usize); +#ifdef CONFIG_COMPAT64 + if (args.compat_mode) + err = copy_struct_from_user(&args.__compat_args, + sizeof(args.__compat_args), + uargs, usize); + else + err = copy_struct_from_user_with_ptr(&args.__args, + sizeof(args.__args), + uargs, usize); +#else + err = copy_struct_from_user_with_ptr(&args, sizeof(args), uargs, usize); +#endif + if (err) return err;
- if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL)) + if (unlikely(clone_args_get(&args, set_tid_size) > MAX_PID_NS_LEVEL)) return -EINVAL;
- if (unlikely(!args.set_tid && args.set_tid_size > 0)) + if (unlikely(!clone_args_get(&args, set_tid) && + clone_args_get(&args, set_tid_size) > 0)) return -EINVAL;
- if (unlikely(args.set_tid && args.set_tid_size == 0)) + if (unlikely(clone_args_get(&args, set_tid) && + clone_args_get(&args, set_tid_size) == 0)) return -EINVAL;
/* * Verify that higher 32bits of exit_signal are unset and that * it is a valid signal */ - if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) || - !valid_signal(args.exit_signal))) + if (unlikely((clone_args_get(&args, exit_signal) & ~((u64)CSIGNAL)) || + !valid_signal(clone_args_get(&args, exit_signal)))) return -EINVAL;
- if ((args.flags & CLONE_INTO_CGROUP) && - (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2)) + if ((clone_args_get(&args, flags) & CLONE_INTO_CGROUP) && + (clone_args_get(&args, cgroup) > INT_MAX || + usize < clone_args_size_ver(&args, 2))) return -EINVAL;
*kargs = (struct kernel_clone_args){ - .flags = args.flags, - .pidfd = u64_to_user_ptr(args.pidfd), - .child_tid = u64_to_user_ptr(args.child_tid), - .parent_tid = u64_to_user_ptr(args.parent_tid), - .exit_signal = args.exit_signal, - .stack = args.stack, - .stack_size = args.stack_size, - .tls = args.tls, - .set_tid_size = args.set_tid_size, - .cgroup = args.cgroup, + .flags = clone_args_get(&args, flags), + .pidfd = clone_args_get_user_ptr(&args, pidfd), + .child_tid = clone_args_get_user_ptr(&args, child_tid), + .parent_tid = clone_args_get_user_ptr(&args, parent_tid), + .exit_signal = clone_args_get(&args, exit_signal), + .stack = clone_args_get(&args, stack), + .stack_size = clone_args_get(&args, stack_size), + .tls = clone_args_get(&args, tls), + .set_tid_size = clone_args_get(&args, set_tid_size), + .cgroup = clone_args_get(&args, cgroup), };
- if (args.set_tid && - copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), + if (clone_args_get(&args, set_tid) && + copy_from_user(kset_tid, clone_args_get_user_ptr(&args, set_tid), (kargs->set_tid_size * sizeof(pid_t)))) return -EFAULT;
Add a bunch of test-cases for clone3 syscall, focusing mainly on handling struct clone_args versioning and sizing, alongside the obvious req for respecting capabilities.
Signed-off-by: Beata Michalska beata.michalska@arm.com --- .../testing/selftests/arm64/morello/Makefile | 1 + tools/testing/selftests/arm64/morello/clone.c | 222 +++++++++++++++++- 2 files changed, 222 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/arm64/morello/Makefile b/tools/testing/selftests/arm64/morello/Makefile index 7dbf5b140695..c79e8973dbbf 100644 --- a/tools/testing/selftests/arm64/morello/Makefile +++ b/tools/testing/selftests/arm64/morello/Makefile @@ -38,3 +38,4 @@ $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/freestanding_start.o $(OUTPUT)/freestanding $(CC) $^ -o $@ $(LDFLAGS)
$(OUTPUT)/signal: $(OUTPUT)/signal_common.o +$(OUTPUT)/clone: $(OUTPUT)/signal_common.o diff --git a/tools/testing/selftests/arm64/morello/clone.c b/tools/testing/selftests/arm64/morello/clone.c index 8217840ba504..0590698d5e2b 100644 --- a/tools/testing/selftests/arm64/morello/clone.c +++ b/tools/testing/selftests/arm64/morello/clone.c @@ -1,15 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2021 Arm Limited +#include "freestanding.h" #include <linux/mman.h> #include <linux/resource.h> #include <linux/wait.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/errno.h> +#include <linux/signal.h> +#include <linux/types.h> +#include <limits.h> #include <cheriintrin.h> -#include "freestanding.h" +#include "signal_common.h"
#define STACK_SIZE 1024*1024 +#define TLS_SIZE 4096
#define in_restricted() \ (!(cheri_perms_get(cheri_pcc_get()) & ARM_CAP_PERMISSION_EXECUTIVE)) @@ -17,6 +22,10 @@
#define MAGIC_NR 0x3562 /* whatever really ..... */
+#ifndef MAX_PID_NS_LEVEL +#define MAX_PID_NS_LEVEL 32 +#endif + /* Cloned thread result */ #define CTR_SUCCESS 1 #define CTR_FAILED -1 @@ -256,6 +265,216 @@ TEST(test_wait_rusage) RUN_WITH_FIXTURE(data, CLONE_TH_RUSAGE, wait_single); }
+#define GET_ERRNO(err) (((err) + MAX_ERRNO) ^ MAX_ERRNO) + +#define CUSTOM_CLONE_STACK_INV BIT(1) + +static struct clone3_fixture { + size_t args_size; + struct clone_args args; + int test_flags; + int e_result; +} clone3_data[] = { + /* BEGIN_SECTION: expected failure */ + /* size of clone_args smaller than CLONE_ARGS_SIZE_VER0 */ + { + .args_size = offsetof(struct clone_args, tls), + .e_result = -EINVAL + }, /* @{0} */ + /* invalid set_tid array size */ + { + .args.set_tid_size = MAX_PID_NS_LEVEL + 1, + .e_result = -EINVAL + }, /* @{1} */ + /* Invalid combination of set_tid & set_tid_size */ + { + .args.set_tid_size = 1, + .e_result = -EINVAL + }, /* @{2} */ + /* Invalid exit_signal */ + { + .args.exit_signal = _NSIG + 1, + .e_result = -EINVAL + }, /* @{3} */ + /* Invalid cgroup number */ + { + .args.flags = CLONE_INTO_CGROUP, + .args.cgroup = (__u64)INT_MAX + 1, + .e_result = -EINVAL + }, /* @{4} */ + /* Invalid size for clone_args with cgroup */ + { + .args_size = offsetof(struct clone_args, cgroup), + .args.flags = CLONE_INTO_CGROUP, + .args.cgroup = 1, + .e_result = -EINVAL + }, /* @{5} */ + /* Invalid stack & stack_size combination */ + { + .args.stack_size = STACK_SIZE, + .test_flags = CUSTOM_CLONE_STACK_INV, + .e_result = -EINVAL + }, /* @{6} */ + { + .test_flags = CUSTOM_CLONE_STACK_INV, + .e_result = -EINVAL + }, /* @{7} */ + /* Invalid set_tid entry */ + { + .args.set_tid = (uintptr_t)&(pid_t){1}, + .args.set_tid_size = 1, + .e_result = -EEXIST + }, /* @{8} */ + + /* END_SECTION: expected failure */ + { + .args.flags = CLONE_PIDFD | CLONE_CHILD_SETTID | + CLONE_CHILD_CLEARTID, + .e_result = 0 + }, /* @{9} */ + { + .args.flags = CLONE_PARENT_SETTID | CLONE_CHILD_SETTID, + .e_result = 0 + }, /* @{10} */ + { + .args.flags = CLONE_SETTLS, + .e_result = 0 + }, /* @{11} */ +}; + +static __attribute__((noinline)) void run_child(struct clone_args *args) +{ + static __thread int tls_data; + + if (args->flags & CLONE_CHILD_SETTID) { + pid_t current_pid = getpid(); + + ASSERT_EQ(current_pid, *(pid_t *)args->child_tid); + } + + if (args->flags & CLONE_SETTLS && args->tls) { + ptraddr_t base_addr = cheri_address_get(args->tls); + ptraddr_t ref_addr = cheri_address_get(&tls_data); + size_t length = cheri_length_get(args->tls); + + ASSERT_TRUE(cheri_tag_get(&tls_data)); + ASSERT_TRUE(ref_addr >= base_addr && ref_addr < base_addr + length); + } + + if (args->flags & CLONE_PIDFD) { + sigset_t set; + + ASSERT_EQ(sigemptyset(&set), 0); + ASSERT_EQ(sigaddset(&set, SIGUSR1), 0); + ASSERT_EQ(sigprocmask(SIG_BLOCK, &set, NULL), 0); + + /* Suspend utill parent kicks things back in */ + ASSERT_EQ(syscall(__NR_kill, getpid(), SIGSTOP), 0); + + /* Wait for a signal */ + ASSERT_EQ(rt_sigtimedwait(&set, NULL, 0, sizeof(set)), + SIGUSR1); + } + syscall(__NR_exit, 0); +} + +static inline __attribute__((always_inline)) +void run_clone3(struct clone3_fixture *data) +{ + struct clone_args *args = &(data->args); + int pidfd, parent_tid = 0, child_tid = 0; + siginfo_t wait_si; + int result; + pid_t pid; + void *tls; + + args->pidfd = (uintcap_t)&pidfd; + args->parent_tid = (uintcap_t)&parent_tid; + args->child_tid = (uintcap_t)&child_tid; + + if (!args->exit_signal) + args->exit_signal = SIGCHLD; + + if (!args->stack_size) { + args->stack = (uintcap_t) mmap_verified(NULL, STACK_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0, STACK_REQ_PERMS); + ASSERT_NE(args->stack, 0); + args->stack_size = + data->test_flags & CUSTOM_CLONE_STACK_INV ? 0 : STACK_SIZE; + } + + if (data->e_result) { + result = syscall(__NR_clone3, args, + data->args_size ? data->args_size : + sizeof(struct clone_args)); + ASSERT_EQ(result, data->e_result) { + TH_LOG("Expected: %d while %d was received", + GET_ERRNO(data->e_result), GET_ERRNO(result)); + } + if (args->stack) + munmap((void *)args->stack, STACK_SIZE); + return; + } + + args->flags |= CLONE_VM; + + if (args->flags & CLONE_SETTLS) { + tls = mmap_verified(NULL, TLS_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, + CAP_LOAD_PERMS | CAP_STORE_PERMS); + + ASSERT_NE(tls, NULL); + args->tls = (uintcap_t)cheri_bounds_set(tls, TLS_SIZE); + } + + pid = syscall(__NR_clone3, args, sizeof(struct clone_args)); + + ASSERT_GE(pid, 0); + if (!pid) + run_child(args); + + if (args->flags & CLONE_PIDFD) { + /* Make sure the child here gets a chance to block the signal */ + result = waitid(P_PID, pid, &wait_si, WSTOPPED, NULL); + ASSERT_EQ(result, 0); + result = syscall(__NR_kill, pid, SIGCONT); + ASSERT_EQ(result, 0); + /* Signal handling is not the test target here: valid pidfd is */ + result = syscall(__NR_pidfd_send_signal, pidfd, SIGUSR1, NULL, 0); + ASSERT_EQ(result, 0); + } + + result = waitid(P_PID, pid, &wait_si, WEXITED, NULL); + ASSERT_EQ(result, 0); + + /* child_tid set once the thread gets scheduled */ + if (args->flags & CLONE_PARENT_SETTID && args->flags & CLONE_CHILD_SETTID + && !(args->flags & CLONE_CHILD_CLEARTID)) { + ASSERT_EQ(parent_tid, child_tid); + ASSERT_NE(parent_tid, 0); + } + + if (args->flags & CLONE_CHILD_CLEARTID) + ASSERT_EQ(child_tid, 0); + + munmap((void *)args->stack, STACK_SIZE); + if (args->flags & CLONE_SETTLS) + /* unmap TLS with a capability cached prior to setting the bounds */ + munmap((void *)tls, TLS_SIZE); +} + +TEST(test_clone3) +{ + int ncount = sizeof(clone3_data)/sizeof(clone3_data[0]); + + for (int i = 0; i < ncount; ++i) { + TH_LOG("Validating clone3 @{%d}", i); + run_clone3(&clone3_data[i]); + } +} + void run_restricted(uintcap_t entry_point) { void *new_stack = allocate_mem_raw(STACK_SIZE); @@ -290,5 +509,6 @@ int main(void) run_restricted((uintcap_t)test_clone_tls_restricted); test_wait_raw(); test_wait_rusage(); + test_clone3(); return 0; }
On 14/12/2022 12:52, Beata Michalska wrote:
Add a bunch of test-cases for clone3 syscall, focusing mainly on handling struct clone_args versioning and sizing, alongside the obvious req for respecting capabilities.
Signed-off-by: Beata Michalska beata.michalska@arm.com
.../testing/selftests/arm64/morello/Makefile | 1 + tools/testing/selftests/arm64/morello/clone.c | 222 +++++++++++++++++- 2 files changed, 222 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/arm64/morello/Makefile b/tools/testing/selftests/arm64/morello/Makefile index 7dbf5b140695..c79e8973dbbf 100644 --- a/tools/testing/selftests/arm64/morello/Makefile +++ b/tools/testing/selftests/arm64/morello/Makefile @@ -38,3 +38,4 @@ $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/freestanding_start.o $(OUTPUT)/freestanding $(CC) $^ -o $@ $(LDFLAGS) $(OUTPUT)/signal: $(OUTPUT)/signal_common.o +$(OUTPUT)/clone: $(OUTPUT)/signal_common.o diff --git a/tools/testing/selftests/arm64/morello/clone.c b/tools/testing/selftests/arm64/morello/clone.c index 8217840ba504..0590698d5e2b 100644 --- a/tools/testing/selftests/arm64/morello/clone.c +++ b/tools/testing/selftests/arm64/morello/clone.c @@ -1,15 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2021 Arm Limited +#include "freestanding.h" #include <linux/mman.h> #include <linux/resource.h> #include <linux/wait.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/errno.h> +#include <linux/signal.h> +#include <linux/types.h> +#include <limits.h> #include <cheriintrin.h> -#include "freestanding.h" +#include "signal_common.h" #define STACK_SIZE 1024*1024 +#define TLS_SIZE 4096 #define in_restricted() \ (!(cheri_perms_get(cheri_pcc_get()) & ARM_CAP_PERMISSION_EXECUTIVE)) @@ -17,6 +22,10 @@ #define MAGIC_NR 0x3562 /* whatever really ..... */ +#ifndef MAX_PID_NS_LEVEL +#define MAX_PID_NS_LEVEL 32 +#endif
/* Cloned thread result */ #define CTR_SUCCESS 1 #define CTR_FAILED -1 @@ -256,6 +265,216 @@ TEST(test_wait_rusage) RUN_WITH_FIXTURE(data, CLONE_TH_RUSAGE, wait_single); } +#define GET_ERRNO(err) (((err) + MAX_ERRNO) ^ MAX_ERRNO)
+#define CUSTOM_CLONE_STACK_INV BIT(1)
+static struct clone3_fixture {
- size_t args_size;
- struct clone_args args;
- int test_flags;
- int e_result;
+} clone3_data[] = {
- /* BEGIN_SECTION: expected failure */
- /* size of clone_args smaller than CLONE_ARGS_SIZE_VER0 */
- {
.args_size = offsetof(struct clone_args, tls),
.e_result = -EINVAL
- }, /* @{0} */
- /* invalid set_tid array size */
- {
.args.set_tid_size = MAX_PID_NS_LEVEL + 1,
.e_result = -EINVAL
- }, /* @{1} */
- /* Invalid combination of set_tid & set_tid_size */
- {
.args.set_tid_size = 1,
.e_result = -EINVAL
- }, /* @{2} */
- /* Invalid exit_signal */
- {
.args.exit_signal = _NSIG + 1,
.e_result = -EINVAL
- }, /* @{3} */
- /* Invalid cgroup number */
- {
.args.flags = CLONE_INTO_CGROUP,
.args.cgroup = (__u64)INT_MAX + 1,
.e_result = -EINVAL
- }, /* @{4} */
- /* Invalid size for clone_args with cgroup */
- {
.args_size = offsetof(struct clone_args, cgroup),
.args.flags = CLONE_INTO_CGROUP,
.args.cgroup = 1,
.e_result = -EINVAL
- }, /* @{5} */
- /* Invalid stack & stack_size combination */
- {
.args.stack_size = STACK_SIZE,
.test_flags = CUSTOM_CLONE_STACK_INV,
.e_result = -EINVAL
- }, /* @{6} */
- {
.test_flags = CUSTOM_CLONE_STACK_INV,
.e_result = -EINVAL
- }, /* @{7} */
- /* Invalid set_tid entry */
- {
.args.set_tid = (uintptr_t)&(pid_t){1},
.args.set_tid_size = 1,
.e_result = -EEXIST
- }, /* @{8} */
- /* END_SECTION: expected failure */
- {
.args.flags = CLONE_PIDFD | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
.e_result = 0
- }, /* @{9} */
- {
.args.flags = CLONE_PARENT_SETTID | CLONE_CHILD_SETTID,
.e_result = 0
- }, /* @{10} */
- {
.args.flags = CLONE_SETTLS,
.e_result = 0
- }, /* @{11} */
+};
+static __attribute__((noinline)) void run_child(struct clone_args *args) +{
- static __thread int tls_data;
- if (args->flags & CLONE_CHILD_SETTID) {
pid_t current_pid = getpid();
ASSERT_EQ(current_pid, *(pid_t *)args->child_tid);
- }
- if (args->flags & CLONE_SETTLS && args->tls) {
ptraddr_t base_addr = cheri_address_get(args->tls);
ptraddr_t ref_addr = cheri_address_get(&tls_data);
size_t length = cheri_length_get(args->tls);
ASSERT_TRUE(cheri_tag_get(&tls_data));
ASSERT_TRUE(ref_addr >= base_addr && ref_addr < base_addr + length);
- }
- if (args->flags & CLONE_PIDFD) {
sigset_t set;
ASSERT_EQ(sigemptyset(&set), 0);
ASSERT_EQ(sigaddset(&set, SIGUSR1), 0);
ASSERT_EQ(sigprocmask(SIG_BLOCK, &set, NULL), 0);
/* Suspend utill parent kicks things back in */
ASSERT_EQ(syscall(__NR_kill, getpid(), SIGSTOP), 0);
/* Wait for a signal */
ASSERT_EQ(rt_sigtimedwait(&set, NULL, 0, sizeof(set)),
SIGUSR1);
- }
- syscall(__NR_exit, 0);
+}
+static inline __attribute__((always_inline)) +void run_clone3(struct clone3_fixture *data) +{
- struct clone_args *args = &(data->args);
- int pidfd, parent_tid = 0, child_tid = 0;
- siginfo_t wait_si;
- int result;
- pid_t pid;
- void *tls;
- args->pidfd = (uintcap_t)&pidfd;
- args->parent_tid = (uintcap_t)&parent_tid;
- args->child_tid = (uintcap_t)&child_tid;
- if (!args->exit_signal)
args->exit_signal = SIGCHLD;
- if (!args->stack_size) {
args->stack = (uintcap_t) mmap_verified(NULL, STACK_SIZE,
PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE,
-1, 0, STACK_REQ_PERMS);
ASSERT_NE(args->stack, 0);
args->stack_size =
data->test_flags & CUSTOM_CLONE_STACK_INV ? 0 : STACK_SIZE;
- }
- if (data->e_result) {
result = syscall(__NR_clone3, args,
data->args_size ? data->args_size :
sizeof(struct clone_args));
I think it would be clearer to have an if (!data->args_size) above and set a default value there, this way you can keep using it in the clone3 call below too.
ASSERT_EQ(result, data->e_result) {
TH_LOG("Expected: %d while %d was received",
GET_ERRNO(data->e_result), GET_ERRNO(result));
}
if (args->stack)
munmap((void *)args->stack, STACK_SIZE);
return;
- }
- args->flags |= CLONE_VM;
- if (args->flags & CLONE_SETTLS) {
tls = mmap_verified(NULL, TLS_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0,
CAP_LOAD_PERMS | CAP_STORE_PERMS);
Nit: these lines are not aligned any more.
ASSERT_NE(tls, NULL);
args->tls = (uintcap_t)cheri_bounds_set(tls, TLS_SIZE);
- }
- pid = syscall(__NR_clone3, args, sizeof(struct clone_args));
- ASSERT_GE(pid, 0);
- if (!pid)
run_child(args);
- if (args->flags & CLONE_PIDFD) {
/* Make sure the child here gets a chance to block the signal */
result = waitid(P_PID, pid, &wait_si, WSTOPPED, NULL);
ASSERT_EQ(result, 0);
result = syscall(__NR_kill, pid, SIGCONT);
ASSERT_EQ(result, 0);
/* Signal handling is not the test target here: valid pidfd is */
result = syscall(__NR_pidfd_send_signal, pidfd, SIGUSR1, NULL, 0);
ASSERT_EQ(result, 0);
- }
- result = waitid(P_PID, pid, &wait_si, WEXITED, NULL);
- ASSERT_EQ(result, 0);
- /* child_tid set once the thread gets scheduled */
- if (args->flags & CLONE_PARENT_SETTID && args->flags & CLONE_CHILD_SETTID
&& !(args->flags & CLONE_CHILD_CLEARTID)) {
ASSERT_EQ(parent_tid, child_tid);
ASSERT_NE(parent_tid, 0);
- }
- if (args->flags & CLONE_CHILD_CLEARTID)
ASSERT_EQ(child_tid, 0);
- munmap((void *)args->stack, STACK_SIZE);
- if (args->flags & CLONE_SETTLS)
/* unmap TLS with a capability cached prior to setting the bounds */
munmap((void *)tls, TLS_SIZE);
Nit: no need for the cast any more.
All these are really minor and I'm happy with the series otherwise, so if you like I can make the changes myself when merging.
Kevin
+}
+TEST(test_clone3) +{
- int ncount = sizeof(clone3_data)/sizeof(clone3_data[0]);
- for (int i = 0; i < ncount; ++i) {
TH_LOG("Validating clone3 @{%d}", i);
run_clone3(&clone3_data[i]);
- }
+}
void run_restricted(uintcap_t entry_point) { void *new_stack = allocate_mem_raw(STACK_SIZE); @@ -290,5 +509,6 @@ int main(void) run_restricted((uintcap_t)test_clone_tls_restricted); test_wait_raw(); test_wait_rusage();
- test_clone3(); return 0;
}
linux-morello@op-lists.linaro.org