argv and envp strings can be large enough so that a capability cannot be created to access them exclusively: their bounds would not be exactly representable and go outside of the strings.
During allocation on the stack, align the strings that need an adjusted base and pad if needed for alignment or a representable length. Handle this potential padding in fs/binfmt_elf by detecting extra 0s.
This is done irrespective of the calling binary or loaded binary ABIs. Indeed, at the point where strings are put on the stack during `execve` we can only know the ABI of the calling binary, but not that of the loaded binary. Thus the compat path will be slightly slower and the memory used by the strings will be larger, as if to create capabilities with exact bounds. However, this should only happen in rare cases where argv or envp strings are more than a couple thousand characters. Even when it does, the memory impact should stay minimal.
Add a helper function to put already created capabilities on the stack during elf setup.
Additionally, align the newly created stack to a multiple of the maximum argument length to make sure the relocated arguments will still be representable with their new location in a PCuABI kernel.
Signed-off-by: Teo Couprie Diaz teo.coupriediaz@arm.com --- fs/binfmt_elf.c | 88 +++++++++++++++++++++++++++++++++++++++++-------- fs/exec.c | 55 +++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 17 deletions(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6455313d7f36..8e2a704ac723 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -151,6 +151,71 @@ static int padzero(unsigned long elf_bss) } return 0; } +/* + * Put a pointer or capability to a string on the stack, return the effective + * size of the string on the stack or an error. + * When building a PCuABI kernel, this will take into account padding and + * create an exactly representable capability if in purecap. + */ +static long put_str_ptr(char __user *ustr, + elf_stack_item_t __user *stack_item) +{ + size_t len; +#if defined(CONFIG_CHERI_PURECAP_UABI) + char c; + size_t pad_len; + char __user *str_ptr = ustr; + len = strnlen_user(ustr, MAX_ARG_STRLEN); + + if (!len || len > MAX_ARG_STRLEN) + return -EINVAL; + + /* + * The start of the string should always be properly aligned, but + * its representable length might be different. Get the representable + * length by using the same length that was used during allocation: + * the length of the original string. + * This takes into account the padding due to length change, but not that + * for alignment. Thus we might not end up at the start of the next arg. + * If not, we will need to take a slow + * path to go through the padding. + */ + len = cheri_representable_length(len); + +#if (ELF_COMPAT == 0) + str_ptr = cheri_perms_and(str_ptr, + (CHERI_PERM_GLOBAL | CHERI_PERM_STORE | CHERI_PERM_LOAD)); + str_ptr = cheri_bounds_set_exact(str_ptr, len); +#endif + if (elf_stack_put_user(str_ptr, stack_item++)) + return -EFAULT; + + /* + * If right after the end of the argument length we have a zero, + * that means the argument alignment was adjusted in order to create a + * representable capability in purecap, even if we are not loading a + * purecap binary. This padding is added at the end, so find the real + * end by going through the padding. + */ + for (pad_len = 0; len + pad_len < MAX_ARG_STRLEN; pad_len++) { + if (get_user(c, ustr + len + pad_len)) + return -EFAULT; + if (c != '\0') + break; + } + ustr += pad_len; + len += pad_len; +#else /* CONFIG_CHERI_PURECAP_UABI */ + len = strnlen_user(ustr, MAX_ARG_STRLEN); + if (!len || len > MAX_ARG_STRLEN) + return -EINVAL; + + if (elf_stack_put_user(ustr, stack_item++)) + return -EFAULT; +#endif /* CONFIG_CHERI_PURECAP_UABI */ + + return len; +}
/* Let's use some macros to make this stack manipulation a little clearer */ #ifdef CONFIG_STACK_GROWSUP @@ -213,6 +278,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, #if defined(CONFIG_CHERI_PURECAP_UABI) && (ELF_COMPAT == 0) elf_stack_item_t *mm_at_argv, *mm_at_envp; #endif + long str_ret = 0;
/* * In some cases (e.g. Hyper-Threading), we want to avoid L1 @@ -397,13 +463,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, /* In PCuABI, this derives a capability from SP pointing to arg_start */ ustr = sp + (mm->arg_start - user_ptr_addr(sp)); while (argc-- > 0) { - size_t len; - if (elf_stack_put_user(ustr, stack_item++)) - return -EFAULT; - len = strnlen_user(ustr, MAX_ARG_STRLEN); - if (!len || len > MAX_ARG_STRLEN) - return -EINVAL; - ustr += len; + str_ret = put_str_ptr(ustr, stack_item++); + if (str_ret < 0) + return str_ret; + ustr += str_ret; } if (elf_stack_put_user(0, stack_item++)) return -EFAULT; @@ -415,13 +478,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, #endif mm->env_start = user_ptr_addr(ustr); while (envc-- > 0) { - size_t len; - if (elf_stack_put_user(ustr, stack_item++)) - return -EFAULT; - len = strnlen_user(ustr, MAX_ARG_STRLEN); - if (!len || len > MAX_ARG_STRLEN) - return -EINVAL; - ustr += len; + str_ret = put_str_ptr(ustr, stack_item++); + if (str_ret < 0) + return str_ret; + ustr += str_ret; } if (elf_stack_put_user(0, stack_item++)) return -EFAULT; diff --git a/fs/exec.c b/fs/exec.c index 892cd911549d..310f043ae286 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -66,6 +66,7 @@ #include <linux/coredump.h> #include <linux/time_namespace.h> #include <linux/user_events.h> +#include <linux/cheri.h>
#include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -76,6 +77,12 @@
#include <trace/events/sched.h>
+#ifdef CONFIG_CHERI_PURECAP_UABI +#define STACK_ALIGN_SIZE MAX_ARG_STRLEN +#else +#define STACK_ALIGN_SIZE PAGE_SIZE +#endif + static int bprm_creds_from_file(struct linux_binprm *bprm);
int suid_dumpable = 0; @@ -515,6 +522,37 @@ static int bprm_stack_limits(struct linux_binprm *bprm) return 0; }
+/* + * Compute the a new value for p that allows for creating an exactly + * representable capability with all its padding after. + */ +size_t align_p_for_representable_bounds(unsigned long p, size_t len) +{ +#if defined(CONFIG_CHERI_PURECAP_UABI) + size_t repr_len = 0, repr_align; + + repr_len = cheri_representable_length(len); + repr_align = ~cheri_representable_alignment_mask(len) + 1; + /* + * We want the capability base to be aligned. As we work from the + * last to the first argument, we can place the start of the string + * to be aligned for representability. + * All padding then goes after it, both for the representable + * length and for the alignment, to fill the space we left before + * the previous argument we put on the stack. + * + * This is slightly different than the usual way representable + * capabilities are created: usually we cannot change the address + * pointed by the capability, so there can be padding between the + * capability base and the capability value. + * Here we can decide where to place the string, so we build a + * capability with equal base and value, with all padding after. + */ + p = ALIGN_DOWN(p - repr_len, repr_align) + len; +#endif + return p; +} + /* * 'copy_strings()' copies argument/environment strings from the old * processes's memory to the new process's stack. The call to get_user_pages() @@ -530,7 +568,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
while (argc-- > 0) { const char __user *str; - int len; + size_t len; unsigned long pos;
ret = -EFAULT; @@ -542,6 +580,17 @@ static int copy_strings(int argc, struct user_arg_ptr argv, if (!len) goto out;
+ /* + * Handle all strings as if they were for purecap binaries as we only + * know the calling process compat status, which might be different + * from the binary to be exec'ed. + * + * Start directly at the string end, with all the padding already + * skipped. + * No-op when !defined(CONFIG_CHERI_PURECAP_UABI). + */ + bprm->p = align_p_for_representable_bounds(bprm->p, len); + ret = -E2BIG; if (!valid_arg_len(bprm, len)) goto out; @@ -775,14 +824,14 @@ int setup_arg_pages(struct linux_binprm *bprm, if (vma->vm_end - vma->vm_start > stack_base) return -ENOMEM;
- stack_base = PAGE_ALIGN(stack_top - stack_base); + stack_base = ALIGN(stack_top - stack_base, STACK_ALIGN_SIZE);
stack_shift = vma->vm_start - stack_base; mm->arg_start = bprm->p - stack_shift; bprm->p = vma->vm_end - stack_shift; #else stack_top = arch_align_stack(stack_top); - stack_top = PAGE_ALIGN(stack_top); + stack_top = ALIGN(stack_top, STACK_ALIGN_SIZE);
if (unlikely(stack_top < mmap_min_addr) || unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))