Use the recently introduced PCuABI reservation interfaces to verify the address range for madvise syscall.
do_madvise() function is used by virtual address monitoring damon and this may not satisfy the reservation range criteria so add a parameter to skip the reservation checks.
Signed-off-by: Amit Daniel Kachhap amit.kachhap@arm.com --- include/linux/mm.h | 3 ++- io_uring/advise.c | 2 +- mm/damon/vaddr.c | 2 +- mm/madvise.c | 27 +++++++++++++++++++++++---- 4 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 77e3374c65e0..c75c12d54fc7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3152,7 +3152,8 @@ extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, bool downgrade); extern int do_munmap(struct mm_struct *, user_uintptr_t, size_t, struct list_head *uf); -extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); +extern int do_madvise(struct mm_struct *mm, user_uintptr_t start, size_t len_in, + int behavior, bool reserv_ignore);
#ifdef CONFIG_MMU extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, diff --git a/io_uring/advise.c b/io_uring/advise.c index 952d9289a311..2e43142cf4df 100644 --- a/io_uring/advise.c +++ b/io_uring/advise.c @@ -55,7 +55,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags) WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
/* TODO [PCuABI] - capability checks for uaccess */ - ret = do_madvise(current->mm, user_ptr_addr(ma->addr), ma->len, ma->advice); + ret = do_madvise(current->mm, (user_uintptr_t)ma->addr, ma->len, ma->advice, false); io_req_set_res(req, ret, 0); return IOU_OK; #else diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 1fec16d7263e..fcdd3f4f608f 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -623,7 +623,7 @@ static unsigned long damos_madvise(struct damon_target *target, if (!mm) return 0;
- applied = do_madvise(mm, start, len, behavior) ? 0 : len; + applied = do_madvise(mm, start, len, behavior, true) ? 0 : len; mmput(mm);
return applied; diff --git a/mm/madvise.c b/mm/madvise.c index ad59e1e07ec9..d815f5647678 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -31,6 +31,7 @@ #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> +#include <linux/cap_addr_mgmt.h>
#include <asm/tlb.h>
@@ -1382,13 +1383,16 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ -int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) +int do_madvise(struct mm_struct *mm, user_uintptr_t user_start, size_t len_in, + int behavior, bool reserv_ignore) { unsigned long end; int error; int write; size_t len; struct blk_plug plug; + unsigned long start = (ptraddr_t)user_start; + struct vma_iterator vmi;
if (!madvise_behavior_valid(behavior)) return -EINVAL; @@ -1421,14 +1425,29 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh mmap_read_lock(mm); }
- /* TODO [PCuABI] - capability checks for uaccess */ start = untagged_addr_remote(mm, start); end = start + len;
+#ifdef CONFIG_CHERI_PURECAP_UABI + user_start = cheri_address_set(user_start, start); +#endif + if (!reserv_ignore) { + vma_iter_init(&vmi, current->mm, start); + if (!capability_owns_range(user_start, start, len)) { + error = -EINVAL; + goto out; + } + /* Check if the range exists within the reservation with mmap lock. */ + if (!reserv_vmi_match_capability(&vmi, user_start)) { + error = -ERESERVATION; + goto out; + } + } blk_start_plug(&plug); error = madvise_walk_vmas(mm, start, end, behavior, madvise_vma_behavior); blk_finish_plug(&plug); +out: if (write) mmap_write_unlock(mm); else @@ -1439,7 +1458,7 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
SYSCALL_DEFINE3(madvise, user_uintptr_t, start, size_t, len_in, int, behavior) { - return do_madvise(current->mm, start, len_in, behavior); + return do_madvise(current->mm, start, len_in, behavior, false); }
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, @@ -1494,7 +1513,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
while (iov_iter_count(&iter)) { ret = do_madvise(mm, user_ptr_addr(iter_iov_addr(&iter)), - iter_iov_len(&iter), behavior); + iter_iov_len(&iter), behavior, false); if (ret < 0) break; iov_iter_advance(&iter, iter_iov_len(&iter));