On 03/05/2023 16:17, Luca Vizzarro wrote:
Whenever a GUP call is made, the page address for the lookup is a 64-bit long raw pointer. When working in PCuABI, this means that the metadata of the capability gets discarded, hence any access made by the GUP is not checked in hardware.
This commit introduces explicit capability checks whenever a call to the current mm through the GUP functions is made.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com
Hello,
this patch adds explicit capability checks needed in conjuction with GUP calls. Submitting only for review and not merging.
In order for this patch to work Kevin's user pointer helpers patch is required, in addition to a modification that I have suggested in his series thread.
Another essential required change is actually porting the futex_waitv syscall to PCuABI. Otherwise this patch will be a BREAKING change and LTP will fail. LTP was ran successfully against this patch ONLY with the minimum required changes for futex_waitv to work, which are still a WIP.
The ticket related to this patch is #7: https://git.morello-project.org/morello/kernel/linux/-/issues/7
This patch can be found on the following branch, which contains some futex_waitv changes, Kevin's series and its modification: https://git.morello-project.org/Sevenarth/linux/-/commits/morello/gup-checks
Best, Luca
drivers/usb/core/devio.c | 8 ++++++-- io_uring/kbuf.c | 5 ++++- io_uring/net.c | 4 +++- io_uring/rsrc.c | 7 ++++++- kernel/bpf/helpers.c | 4 +++- kernel/futex/core.c | 11 ++++++++--- lib/iov_iter.c | 21 ++++++++++++++++++--- mm/gup.c | 8 ++++++-- 8 files changed, 54 insertions(+), 14 deletions(-)
I haven't reviewed the patch thoroughly but it's a good start. Just a few comments below on what caught my eyes. I also second Tudor's suggestions, especially in terms of splitting the patches (there's no reason not to split them by subsystem here).
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index cb37f6d2010a..4d3249ba343a 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1584,8 +1584,12 @@ find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb) { struct usb_memory *usbm = NULL, *iter; unsigned long flags;
- /* TODO [PCuABI] - capability checks for uaccess */
- unsigned long uurb_start = user_ptr_addr(uurb->buffer);
- unsigned long uurb_start;
- if (!check_user_ptr_rw(uurb->buffer, uurb->buffer_length))
return ERR_PTR(-EFAULT);
- uurb_start = user_ptr_addr(uurb->buffer);
spin_lock_irqsave(&ps->lock, flags); list_for_each_entry(iter, &ps->memory_list, memlist) { diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 70056c27d778..d6e6227caab0 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -587,7 +587,10 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) pages_size = io_in_compat64(ctx) ? size_mul(sizeof(struct compat_io_uring_buf), reg.ring_entries) : size_mul(sizeof(struct io_uring_buf), reg.ring_entries);
- /* TODO [PCuABI] - capability checks for uaccess */
- if (!check_user_ptr_rw((void __user *)reg.ring_addr, pages_size))
Maybe a question both for you and Tudor: do we actually have any idea whether the operation that will be performed on the registered buffer is R or W at this point? If not this is a pretty serious issue, as we must not reject say a read-only pointer if the buffer is only ever read from, not written to. The same question applies to all users of io_pin_pages(); for io_import_fixed() it looks the prototype already tells us about that.
return -EFAULT;
- pages = io_pin_pages(reg.ring_addr, pages_size, &nr_pages); if (IS_ERR(pages)) { kfree(free_bl);
diff --git a/io_uring/net.c b/io_uring/net.c index 6fd28a49b671..a8766d53cad8 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1088,7 +1088,9 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_addr(req, &__address, issue_flags); if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
/* TODO [PCuABI] - capability checks for uaccess */
if (!check_user_ptr_write(zc->buf, zc->len))
return -EFAULT;
- ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, user_ptr_addr(zc->buf), zc->len); if (unlikely(ret))
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 9e716fef91d7..285938fcf119 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1285,7 +1285,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, return 0; ret = -ENOMEM;
- /* TODO [PCuABI] - capability checks for uaccess */
- if (!check_user_ptr_rw(iov->iov_base, iov->iov_len)) {
ret = -EFAULT;
goto done;
- }
- pages = io_pin_pages(user_ptr_addr(iov->iov_base), iov->iov_len, &nr_pages); if (IS_ERR(pages)) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a8e76cf06da7..4db910f1758d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -675,7 +675,9 @@ BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, if (unlikely(!size)) return 0;
- /* TODO [PCuABI] - capability checks for uaccess */
- if (!check_user_ptr_read(user_ptr, size))
return -EFAULT;
- ret = access_process_vm(tsk, user_ptr_addr(user_ptr), dst, size, 0); if (ret == size) return 0;
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 9613080ccf0c..04289dc13b4a 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -226,8 +226,6 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, struct address_space *mapping; int err, ro = 0;
- /* TODO [PCuABI] - capability checks for uaccess */
- /*
*/
- The futex address must be "naturally" aligned.
@@ -239,6 +237,12 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, if (unlikely(!access_ok(uaddr, sizeof(u32)))) return -EFAULT;
- if (rw == FUTEX_READ && !check_user_ptr_read(uaddr, sizeof(u32)))
return -EFAULT;
- if (rw == FUTEX_WRITE && !check_user_ptr_rw(uaddr, sizeof(u32)))
return -EFAULT;
- if (unlikely(should_fail_futex(fshared))) return -EFAULT;
@@ -413,7 +417,8 @@ int fault_in_user_writeable(u32 __user *uaddr) struct mm_struct *mm = current->mm; int ret;
- /* TODO [PCuABI] - capability checks for uaccess */
- if (!check_user_ptr_rw(uaddr, PAGE_SIZE))
Following up on Tudor's comments, the size to require here is tricky. The purpose of this function is to bring in a (writeable) page in order to retry the atomic operation. This does not however mean that the pointer needs to be able to access the entire page. In fact, AFAICT the pointer we are manipulating here is only ever used to access a futex, i.e. a u32. So the required size should probably be that of a u32.
Maybe some elaboration is required here on why we need a check at all. In itself causing a page fault should have no functional effect in userspace, so from a capability model perspective we don't really need an explicit check. However, it is essential, for another reason: breaking the loop between futex_cmpxchg_value_locked() (or similar) when it returns -EFAULT, and fault_in_user_writeable() when it succeeds. The explicit check ensures that when the atomic operation faults, we figure out here that this was due to the pointer itself and break the loop, irrespective of the underlying page. Probably worth a comment in this function.
return -EFAULT;
mmap_read_lock(mm); ret = fixup_user_fault(mm, user_ptr_addr(uaddr), diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 2d74d8d00ad9..046915cc1562 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1481,10 +1481,16 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { size_t skip; long k;
- bool is_rw = iov_iter_rw(i) != WRITE;
- if (iter_is_ubuf(i)) {
if (is_rw && !check_user_ptr_rw(i->ubuf, *size))
return -EFAULT;
if (!is_rw && !check_user_ptr_read(i->ubuf, *size))
return -EFAULT;
- if (iter_is_ubuf(i))
return user_ptr_addr(i->ubuf) + i->iov_offset;/* TODO [PCuABI] - capability checks for uaccess */
- }
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { size_t len = i->iov[k].iov_len - skip; @@ -1493,7 +1499,13 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) continue; if (*size > len) *size = len;
/* TODO [PCuABI] - capability checks for uaccess */
if (is_rw && !check_user_ptr_rw(i->iov[k].iov_base + skip,
*size))
return -EFAULT;
if (!is_rw && !check_user_ptr_read(i->iov[k].iov_base + skip,
*size))
return -EFAULT;
- return user_ptr_addr(i->iov[k].iov_base) + skip; } BUG(); // if it had been empty, we wouldn't get called
@@ -1539,6 +1551,9 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, gup_flags |= FOLL_NOFAULT; addr = first_iovec_segment(i, &maxsize);
if (IS_ERR_VALUE(addr))
return addr;
- *start = addr % PAGE_SIZE; addr &= PAGE_MASK; n = want_pages_array(pages, maxsize, *start, maxpages);
diff --git a/mm/gup.c b/mm/gup.c index dc02749eaf9b..197459c58cd1 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1843,11 +1843,15 @@ EXPORT_SYMBOL(fault_in_subpage_writeable); */ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) {
- /* TODO [PCuABI] - capability checks for uaccess */
- unsigned long start = user_ptr_addr(uaddr), end;
- unsigned long start, end; struct mm_struct *mm = current->mm; bool unlocked = false;
- if (!check_user_ptr_read(uaddr, size))
check_user_ptr_write() surely? That's the main reason why you need the change in prototype in the patch adding the helpers AIUI.
Kevin
return 0;
- start = user_ptr_addr(uaddr);
- if (unlikely(size == 0)) return 0; end = PAGE_ALIGN(start + size);