Different capability permission and bound constraints are added as per PCuABI specification. mincore syscall does not need VMem permission and any of RWX memory permission so standard capability_owns_range() interface is not used here.
Also as mincore() allows the address range to not span whole pages so checking only a single byte at the page intersection is sufficient.
Signed-off-by: Amit Daniel Kachhap amit.kachhap@arm.com --- mm/mincore.c | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-)
diff --git a/mm/mincore.c b/mm/mincore.c index 3a307bfa91c4..dfa6b5b9c3d3 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/pgtable.h>
+#include <linux/cap_addr_mgmt.h> #include <linux/uaccess.h> #include "swap.h"
@@ -184,15 +185,19 @@ static const struct mm_walk_ops mincore_walk_ops = { * all the arguments, we hold the mmap semaphore: we should * just return the amount of info we're asked for. */ -static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) +static long do_mincore(user_uintptr_t user_addr, unsigned long pages, unsigned char *vec) { struct vm_area_struct *vma; unsigned long end; + unsigned long addr = (ptraddr_t)user_addr; int err;
vma = vma_lookup(current->mm, addr); if (!vma) return -ENOMEM; + /* Check if the capability range is valid with mmap lock. */ + if (!reserv_vma_match_capability(vma, user_addr)) + return -ERESERVATION; end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); if (!can_do_mincore(vma)) { unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); @@ -229,14 +234,16 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v * mapped * -EAGAIN - A kernel resource was temporarily unavailable. */ -SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, +SYSCALL_DEFINE3(mincore, user_uintptr_t, user_start, size_t, len, unsigned char __user *, vec) { long retval; unsigned long pages; unsigned char *tmp; - - start = untagged_addr(start); + unsigned long start = untagged_addr((ptraddr_t)user_start); +#ifdef CONFIG_CHERI_PURECAP_UABI + unsigned long cap_start, cap_len; +#endif
/* Check the start address: needs to be page-aligned.. */ if (start & ~PAGE_MASK) @@ -253,6 +260,35 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, if (!access_ok(vec, pages)) return -EFAULT;
+#ifdef CONFIG_CHERI_PURECAP_UABI + if (is_compat_task()) + goto skip_pcuabi_checks; + /* + * mincore syscall does not need VMem permission so as to allow ordinary pages. + * Also at least one of the standard memory permissions RWX will help to reject + * non memory capabilities. + */ + user_start = cheri_address_set(user_start, start); + if (cheri_is_invalid(user_start) || cheri_is_sealed(user_start) || + !(CHERI_PERM_GLOBAL & cheri_perms_get(user_start)) || + !((CHERI_PERM_LOAD | CHERI_PERM_STORE | CHERI_PERM_EXECUTE) + & cheri_perms_get(user_start))) + return -EINVAL; + /* + * mincore syscall can be invoked as: + * mincore(align_down(p, PAGE_SIZE), sz + (p.addr % PAGE_SIZE), vec) + * Hence, the capability might not consider the increased range due to + * alignment. In this scenario, check only the single byte at the page + * intersection. + */ + cap_start = cheri_base_get(user_start); + cap_len = cheri_length_get(user_start); + if ((start + PAGE_SIZE <= cap_start) || + (cap_start + cap_len < start + len - offset_in_page(len))) + return -EINVAL; +skip_pcuabi_checks: +#endif + tmp = (void *) __get_free_page(GFP_USER); if (!tmp) return -EAGAIN; @@ -264,7 +300,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, * the temporary buffer size. */ mmap_read_lock(current->mm); - retval = do_mincore(start, min(pages, PAGE_SIZE), tmp); + retval = do_mincore(user_start, min(pages, PAGE_SIZE), tmp); mmap_read_unlock(current->mm);
if (retval <= 0)