Use the recently introduced PCuABI reservation interfaces to verify the address range for madvise syscall.
do_madvise() function is used by virtual address monitoring daemon and this may not satisfy the reservation range criteria, so add a parameter to skip the reservation checks.
Signed-off-by: Amit Daniel Kachhap amitdaniel.kachhap@arm.com --- include/linux/mm.h | 3 ++- io_uring/advise.c | 2 +- mm/damon/vaddr.c | 2 +- mm/madvise.c | 26 +++++++++++++++++++++----- 4 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 44a55c3e2c06..f1c70f416eff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3375,7 +3375,8 @@ extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, bool unlock); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); -extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); +extern int do_madvise(struct mm_struct *mm, user_uintptr_t user_ptr, size_t len_in, + int behavior, bool reserv_ignore);
#ifdef CONFIG_MMU extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, diff --git a/io_uring/advise.c b/io_uring/advise.c index 952d9289a311..2e43142cf4df 100644 --- a/io_uring/advise.c +++ b/io_uring/advise.c @@ -55,7 +55,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags) WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
/* TODO [PCuABI] - capability checks for uaccess */ - ret = do_madvise(current->mm, user_ptr_addr(ma->addr), ma->len, ma->advice); + ret = do_madvise(current->mm, (user_uintptr_t)ma->addr, ma->len, ma->advice, false); io_req_set_res(req, ret, 0); return IOU_OK; #else diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index a4d1f63c5b23..3138da113117 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -643,7 +643,7 @@ static unsigned long damos_madvise(struct damon_target *target, if (!mm) return 0;
- applied = do_madvise(mm, start, len, behavior) ? 0 : len; + applied = do_madvise(mm, start, len, behavior, true) ? 0 : len; mmput(mm);
return applied; diff --git a/mm/madvise.c b/mm/madvise.c index d0c8e854636e..3bbb353f5f0b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -31,6 +31,7 @@ #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> +#include <linux/cap_addr_mgmt.h>
#include <asm/tlb.h>
@@ -1394,13 +1395,16 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ -int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) +int do_madvise(struct mm_struct *mm, user_uintptr_t user_ptr, size_t len_in, + int behavior, bool reserv_ignore) { unsigned long end; int error; int write; size_t len; struct blk_plug plug; + unsigned long start = (ptraddr_t)user_ptr; + struct vma_iterator vmi;
if (!madvise_behavior_valid(behavior)) return -EINVAL; @@ -1433,14 +1437,26 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh mmap_read_lock(mm); }
- /* TODO [PCuABI] - capability checks for uaccess */ start = untagged_addr_remote(mm, start); end = start + len;
+ if (!reserv_ignore) { + vma_iter_init(&vmi, current->mm, start); + if (!check_user_ptr_owning(user_ptr, start, len)) { + error = -EINVAL; + goto out; + } + /* Check if the range exists within the reservation with mmap lock. */ + if (!reserv_vmi_cap_within_reserv(&vmi, user_ptr, true)) { + error = -ERESERVATION; + goto out; + } + } blk_start_plug(&plug); error = madvise_walk_vmas(mm, start, end, behavior, madvise_vma_behavior); blk_finish_plug(&plug); +out: if (write) mmap_write_unlock(mm); else @@ -1449,9 +1465,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh return error; }
-SYSCALL_DEFINE3(madvise, user_uintptr_t, start, size_t, len_in, int, behavior) +SYSCALL_DEFINE3(madvise, user_uintptr_t, user_ptr, size_t, len_in, int, behavior) { - return do_madvise(current->mm, start, len_in, behavior); + return do_madvise(current->mm, user_ptr, len_in, behavior, false); }
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, @@ -1506,7 +1522,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
while (iov_iter_count(&iter)) { ret = do_madvise(mm, user_ptr_addr(iter_iov_addr(&iter)), - iter_iov_len(&iter), behavior); + iter_iov_len(&iter), behavior, false); if (ret < 0) break; iov_iter_advance(&iter, iter_iov_len(&iter));