Hello!
Here is patch series v4 incoming for the explicit capability checking series for issue #7[1].
This patch series will be found on my fork at the link in the footnotes[2], as soon as GitLab is fixed.
Kind regards, Luca
[1] https://git.morello-project.org/morello/kernel/linux/-/issues/7 [2] https://git.morello-project.org/Sevenarth/linux/-/commits/morello/gup-checks...
v4: - rebased onto morello/next - rephrased commit descriptions and notes left in the code - signature of first_iovec_segment has been updated to return a pointer instead of an address and the appropriate changes have been made - read+write checks have been combined together in the same if statement - unlikely check has been removed where appropriate - the USB User Request Block buffer is now checked against both write and read permissions according to the transfer direction as indicated by is_in - a leftover from v2 at io_uring/rsrc.c:1249 has been reverted back to original v3: - rebased onto morello/next - amended commit description for "gup: Add explicit capability checks" - refactored mm/gup.c - refactored lib/iov_iter.c - removed bpf patch - moved USB Request Block explicit check to proc_do_submiturb - removed explicit check in get_futex_key - changed prototype of io_uring_cmd_import_fixed and io_import_fixed to use a pointer type and adjusted the relevant castings - fixed io_uring_cmd_import_fixed prototype for !defined(CONFIG_IO_URING) - refactored explicit check in io_uring/kbuf.c:io_register_pbuf_ring(..) - removed explicit check from io_uring/kbuf.c:io_add_buffers(..) - rephrased the no explicit check needed note in io_sqe_buffer_register - reverted "struct io_mapped_ubuf" to use u64 - removed explicit check from io_uring_cmd_prep - updated TODO for the NVMe driver
Luca Vizzarro (7): gup: Add explicit capability checks iov_iter: Add explicit capability checks usb: core: Fix copy of URB from userspace usb: core: Add explicit capability checks futex: Add explicit capability checks io_uring: Add explicit capability checks nvme: Add TODO for PCuABI implementation
drivers/nvme/host/ioctl.c | 1 + drivers/usb/core/devio.c | 8 ++++++-- include/linux/io_uring.h | 6 +++--- include/linux/pagemap.h | 2 +- io_uring/kbuf.c | 26 +++++++++++++------------- io_uring/net.c | 3 +-- io_uring/rsrc.c | 14 ++++++++++++-- io_uring/rsrc.h | 2 +- io_uring/rw.c | 3 +-- io_uring/uring_cmd.c | 2 +- kernel/futex/core.c | 11 ++++++++--- lib/iov_iter.c | 31 ++++++++++++++++++++++++------- mm/gup.c | 6 ++++-- 13 files changed, 76 insertions(+), 39 deletions(-)
With PCuABI we want to make sure that any userspace capability is correctly validated.
In this commit the fault_in_safe_writeable function is attempting to fault in the pointed user page for writing purposes. In order to do so it uses fixup_user_fault, which considers the address and ignores any capability metadata. Therefore, it is necessary to perform an explicit capability check to make sure that the provided pointer has the appropriate bounds and permissions to write to the specified address range.
This also requires to change the signature of the function by dropping the const qualifier to the pointer argument, as check_user_ptr_write() expects a non-const pointer. This is acceptable given that the expectation is for the pointed region of memory to be writeable.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- include/linux/pagemap.h | 2 +- mm/gup.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a56308a9d1a4..a77203ae3686 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1094,7 +1094,7 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); */ size_t fault_in_writeable(char __user *uaddr, size_t size); size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); -size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); +size_t fault_in_safe_writeable(char __user *uaddr, size_t size); size_t fault_in_readable(const char __user *uaddr, size_t size);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping, diff --git a/mm/gup.c b/mm/gup.c index 8dc517c42ea8..dcfe02390eea 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1809,13 +1809,15 @@ EXPORT_SYMBOL(fault_in_subpage_writeable); * Returns the number of bytes not faulted in, like copy_to_user() and * copy_from_user(). */ -size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) +size_t fault_in_safe_writeable(char __user *uaddr, size_t size) { - /* TODO [PCuABI] - capability checks for uaccess */ unsigned long start = user_ptr_addr(uaddr), end; struct mm_struct *mm = current->mm; bool unlocked = false;
+ if (unlikely(!check_user_ptr_write(uaddr, size))) + return 0; + if (unlikely(size == 0)) return 0; end = PAGE_ALIGN(start + size);
With PCuABI we want to make sure that any userspace capability is correctly validated.
In this commit explicit capability checks are added in first_iovec_segment to ensure that the pointer received by userspace has the correct bounds and permissions to access such segment.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- lib/iov_iter.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 3b082f598162..6d8c41191ed0 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1536,14 +1536,17 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, }
/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ -static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) +static void __user *first_iovec_segment(const struct iov_iter *i, size_t *size) { + bool is_source = iov_iter_rw(i) == ITER_SOURCE; + void __user *seg; size_t skip; long k;
- if (iter_is_ubuf(i)) - /* TODO [PCuABI] - capability checks for uaccess */ - return user_ptr_addr(i->ubuf) + i->iov_offset; + if (iter_is_ubuf(i)) { + seg = i->ubuf + i->iov_offset; + goto ptr_check; + }
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { const struct iovec *iov = iter_iov(i) + k; @@ -1553,9 +1556,18 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) continue; if (*size > len) *size = len; - return user_ptr_addr(iov->iov_base) + skip; + + seg = iov->iov_base + skip; + goto ptr_check; } BUG(); // if it had been empty, we wouldn't get called + +ptr_check: + if ((is_source && !check_user_ptr_read(seg, *size)) || + (!is_source && !check_user_ptr_write(seg, *size))) + return ERR_USER_PTR(-EFAULT); + + return seg; }
/* must be done on non-empty ITER_BVEC one */ @@ -1599,7 +1611,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT;
- addr = first_iovec_segment(i, &maxsize); + addr = user_ptr_addr(first_iovec_segment(i, &maxsize)); + if (IS_ERR_VALUE(addr)) + return addr; + *start = addr % PAGE_SIZE; addr &= PAGE_MASK; n = want_pages_array(pages, maxsize, *start, maxpages); @@ -2316,7 +2331,9 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT;
- addr = first_iovec_segment(i, &maxsize); + addr = user_ptr_addr(first_iovec_segment(i, &maxsize)); + if (IS_ERR_VALUE(addr)) + return addr; *offset0 = offset = addr % PAGE_SIZE; addr &= PAGE_MASK; maxpages = want_pages_array(pages, maxsize, offset, maxpages);
On 29/08/2023 16:10, Luca Vizzarro wrote:
With PCuABI we want to make sure that any userspace capability is correctly validated.
In this commit explicit capability checks are added in first_iovec_segment to ensure that the pointer received by userspace
s/by/from/
Kevin
has the correct bounds and permissions to access such segment.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com
lib/iov_iter.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 3b082f598162..6d8c41191ed0 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1536,14 +1536,17 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, } /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ -static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) +static void __user *first_iovec_segment(const struct iov_iter *i, size_t *size) {
- bool is_source = iov_iter_rw(i) == ITER_SOURCE;
- void __user *seg; size_t skip; long k;
- if (iter_is_ubuf(i))
/* TODO [PCuABI] - capability checks for uaccess */
return user_ptr_addr(i->ubuf) + i->iov_offset;
- if (iter_is_ubuf(i)) {
seg = i->ubuf + i->iov_offset;
goto ptr_check;
- }
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { const struct iovec *iov = iter_iov(i) + k; @@ -1553,9 +1556,18 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) continue; if (*size > len) *size = len;
return user_ptr_addr(iov->iov_base) + skip;
seg = iov->iov_base + skip;
} BUG(); // if it had been empty, we wouldn't get calledgoto ptr_check;
+ptr_check:
- if ((is_source && !check_user_ptr_read(seg, *size)) ||
(!is_source && !check_user_ptr_write(seg, *size)))
return ERR_USER_PTR(-EFAULT);
- return seg;
} /* must be done on non-empty ITER_BVEC one */ @@ -1599,7 +1611,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT;
addr = first_iovec_segment(i, &maxsize);
addr = user_ptr_addr(first_iovec_segment(i, &maxsize));
if (IS_ERR_VALUE(addr))
return addr;
- *start = addr % PAGE_SIZE; addr &= PAGE_MASK; n = want_pages_array(pages, maxsize, *start, maxpages);
@@ -2316,7 +2331,9 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, if (i->nofault) gup_flags |= FOLL_NOFAULT;
- addr = first_iovec_segment(i, &maxsize);
- addr = user_ptr_addr(first_iovec_segment(i, &maxsize));
- if (IS_ERR_VALUE(addr))
*offset0 = offset = addr % PAGE_SIZE; addr &= PAGE_MASK; maxpages = want_pages_array(pages, maxsize, offset, maxpages);return addr;
When working with PCuABI, user pointers carry metadata as they are capabilities and they have a tag associated to them, which is placed in another region of memory. The kernel's copy_from_user function does not support them, and as a consequence their tag is not copied in the process.
This commit changes the used function to copy_from_user_with_ptr which exists to solve this problem.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- drivers/usb/core/devio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 2eb493e2123e..6972829ec6b5 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1990,7 +1990,7 @@ static int proc_submiturb(struct usb_dev_state *ps, void __user *arg) struct usbdevfs_urb uurb; sigval_t userurb_sigval;
- if (copy_from_user(&uurb, arg, sizeof(uurb))) + if (copy_from_user_with_ptr(&uurb, arg, sizeof(uurb))) return -EFAULT;
memset(&userurb_sigval, 0, sizeof(userurb_sigval));
Whenever a GUP call is made, a user address in the form of a 64-bit integer is used to lookup its corresponding page. When working in PCuABI, this means that the metadata of the user capability gets discarded, hence any access made by the GUP is not checked in hardware.
This commit introduces explicit capability checks whenever a call to the current mm through the GUP functions is made.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- drivers/usb/core/devio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6972829ec6b5..e1d93cafb142 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1591,7 +1591,6 @@ find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb) { struct usb_memory *usbm = NULL, *iter; unsigned long flags; - /* TODO [PCuABI] - capability checks for uaccess */ unsigned long uurb_start = user_ptr_addr(uurb->buffer);
spin_lock_irqsave(&ps->lock, flags); @@ -1773,6 +1772,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb ret = -EFAULT; goto error; } + if ((!is_in && !check_user_ptr_read(uurb->buffer, uurb->buffer_length)) || + (is_in && !check_user_ptr_write(uurb->buffer, uurb->buffer_length))) { + ret = -EFAULT; + goto error; + } as = alloc_async(number_of_packets); if (!as) { ret = -ENOMEM;
On 29/08/2023 16:10, Luca Vizzarro wrote:
Whenever a GUP call is made, a user address in the form of a 64-bit integer is used to lookup its corresponding page. When working in PCuABI, this means that the metadata of the user capability gets discarded, hence any access made by the GUP is not checked in hardware.
This commit introduces explicit capability checks whenever a call to the current mm through the GUP functions is made.
That commit still needs an appropriate message (especially since the permission check is not so obvious).
Kevin
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com
drivers/usb/core/devio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6972829ec6b5..e1d93cafb142 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1591,7 +1591,6 @@ find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb) { struct usb_memory *usbm = NULL, *iter; unsigned long flags;
- /* TODO [PCuABI] - capability checks for uaccess */ unsigned long uurb_start = user_ptr_addr(uurb->buffer);
spin_lock_irqsave(&ps->lock, flags); @@ -1773,6 +1772,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb ret = -EFAULT; goto error; }
- if ((!is_in && !check_user_ptr_read(uurb->buffer, uurb->buffer_length)) ||
(is_in && !check_user_ptr_write(uurb->buffer, uurb->buffer_length))) {
ret = -EFAULT;
goto error;
- } as = alloc_async(number_of_packets); if (!as) { ret = -ENOMEM;
When working with PCuABI, a user pointer is a capability carrying metadata such as access bounds and permissions. These are automatically enforced in hardware when a direct access is performed, which triggers a fault if either the bounds or permissions checks failed.
Whenever a futex key is accessed and an access fault occurs, the assumption is that the page containing the futex key must have faulted and the code proceeds to fix it up. Because no distinction is made between a capability fault and a page fault, an explicit capability check needs to be employed to verify which of the two triggered the fault. Therefore unnecessarily avoiding a a user page fault fixup, if the former case happened.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- kernel/futex/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 4b79b5e2ad3b..8efd02515654 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -226,8 +226,6 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, struct address_space *mapping; int err, ro = 0;
- /* TODO [PCuABI] - capability checks for uaccess */ - /* * The futex address must be "naturally" aligned. */ @@ -413,7 +411,14 @@ int fault_in_user_writeable(u32 __user *uaddr) struct mm_struct *mm = current->mm; int ret;
- /* TODO [PCuABI] - capability checks for uaccess */ + /* + * When working in PCuABI, we may have reached this point because an + * atomic load/store failed as a consequence of a capability fault + * and not a page fault. To cover this case we should employ an explicit + * check, so that we can fail and return early. + */ + if (!check_user_ptr_rw(uaddr, sizeof(u32))) + return -EFAULT;
mmap_read_lock(mm); ret = fixup_user_fault(mm, user_ptr_addr(uaddr),
On 29/08/2023 16:10, Luca Vizzarro wrote:
When working with PCuABI, a user pointer is a capability carrying metadata such as access bounds and permissions. These are automatically enforced in hardware when a direct access is performed, which triggers a fault if either the bounds or permissions checks failed.
Whenever a futex key is accessed and an access fault occurs, the assumption is that the page containing the futex key must have faulted and the code proceeds to fix it up. Because no distinction is made between a capability fault and a page fault, an explicit capability check needs to be employed to verify which of the two triggered the fault. Therefore unnecessarily avoiding a a user page fault fixup, if the former case happened.
I can't parse that last sentence. The main point we should make is that in the capability fault case, we must bail out as we would otherwise enter an infinite loop: while(1) { get_futex_key() [no capability check] futex_atomic...() [-EFAULT due to capability fault] fault_in_user_writeable() [faults in the user page, has no effect on the capability] }
We should also explain that we make no explicit check in get_futex_key() because it does not actually dereference uaddr, and the futex operations automatically check the capability metadata as you explained in the first paragraph.
Kevin
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com
kernel/futex/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 4b79b5e2ad3b..8efd02515654 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -226,8 +226,6 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, struct address_space *mapping; int err, ro = 0;
- /* TODO [PCuABI] - capability checks for uaccess */
- /*
*/
- The futex address must be "naturally" aligned.
@@ -413,7 +411,14 @@ int fault_in_user_writeable(u32 __user *uaddr) struct mm_struct *mm = current->mm; int ret;
- /* TODO [PCuABI] - capability checks for uaccess */
- /*
* When working in PCuABI, we may have reached this point because an
* atomic load/store failed as a consequence of a capability fault
* and not a page fault. To cover this case we should employ an explicit
* check, so that we can fail and return early.
*/
- if (!check_user_ptr_rw(uaddr, sizeof(u32)))
return -EFAULT;
mmap_read_lock(mm); ret = fixup_user_fault(mm, user_ptr_addr(uaddr),
Whenever a GUP call is made, a user address in the form of a 64-bit integer is used to lookup its corresponding page. When working in PCuABI, this means that the metadata of the user capability gets discarded, hence any access made by the GUP is not checked in hardware.
This commit introduces explicit capability checks whenever a call to the current mm through the GUP functions is made.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- include/linux/io_uring.h | 6 +++--- io_uring/kbuf.c | 26 +++++++++++++------------- io_uring/net.c | 3 +-- io_uring/rsrc.c | 14 ++++++++++++-- io_uring/rsrc.h | 2 +- io_uring/rw.c | 3 +-- io_uring/uring_cmd.c | 2 +- 7 files changed, 32 insertions(+), 24 deletions(-)
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 7fe31b2cd02f..d285acebeee2 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -42,7 +42,7 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) }
#if defined(CONFIG_IO_URING) -int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, +int io_uring_cmd_import_fixed(void __user *ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); @@ -72,8 +72,8 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } #else -static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd) +static inline int io_uring_cmd_import_fixed(void __user *ubuf, unsigned long len, + int rw, struct iov_iter *iter, void *ioucmd) { return -EOPNOTSUPP; } diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 625fba732db0..ba226b0ba7a0 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -561,7 +561,6 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, struct page **pages; int nr_pages;
- /* TODO [PCuABI] - capability checks for uaccess */ pages = io_pin_pages(reg->ring_addr, ring_size, &nr_pages); if (IS_ERR(pages)) return PTR_ERR(pages); @@ -620,12 +619,24 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (copy_io_uring_buf_reg_from_user(ctx, ®, arg)) return -EFAULT;
+ if (!is_power_of_2(reg.ring_entries)) + return -EINVAL; + + /* cannot disambiguate full vs empty due to head/tail size */ + if (reg.ring_entries >= 65536) + return -EINVAL; + + ring_size = reg.ring_entries * (io_in_compat64(ctx) ? + sizeof(struct compat_io_uring_buf) : + sizeof(struct io_uring_buf)); + if (reg.resv[0] || reg.resv[1] || reg.resv[2]) return -EINVAL; if (reg.flags & ~IOU_PBUF_RING_MMAP) return -EINVAL; if (!(reg.flags & IOU_PBUF_RING_MMAP)) { - if (!reg.ring_addr) + if (!reg.ring_addr || + !check_user_ptr_read((void __user *)reg.ring_addr, ring_size)) return -EFAULT; if (reg.ring_addr & ~PAGE_MASK) return -EINVAL; @@ -634,13 +645,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -EINVAL; }
- if (!is_power_of_2(reg.ring_entries)) - return -EINVAL; - - /* cannot disambiguate full vs empty due to head/tail size */ - if (reg.ring_entries >= 65536) - return -EINVAL; - if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { int ret = io_init_bl_list(ctx); if (ret) @@ -658,10 +662,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -ENOMEM; }
- ring_size = reg.ring_entries * (io_in_compat64(ctx) ? - sizeof(struct compat_io_uring_buf) : - sizeof(struct io_uring_buf)); - if (!(reg.flags & IOU_PBUF_RING_MMAP)) ret = io_pin_pbuf_ring(®, ring_size, bl); else diff --git a/io_uring/net.c b/io_uring/net.c index eb4795553bf8..0f3dbe4da64b 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1133,9 +1133,8 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_addr(req, &__address, issue_flags);
if (zc->flags & IORING_RECVSEND_FIXED_BUF) { - /* TODO [PCuABI] - capability checks for uaccess */ ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu, - user_ptr_addr(zc->buf), zc->len); + zc->buf, zc->len); if (unlikely(ret)) return ret; msg.sg_from_iter = io_sg_from_iter; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 44abf9c74b86..8bfc755b8766 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1192,7 +1192,13 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, return 0;
ret = -ENOMEM; - /* TODO [PCuABI] - capability checks for uaccess */ + + /* + * When working in PCuABI, explicit user pointer checking is not needed + * during registration as we are already doing it in io_import_fixed. + * Specifically we employ explicit checks before the user data provided + * in the pre-registered buffers is accessed. + */ pages = io_pin_pages(user_ptr_addr(iov->iov_base), iov->iov_len, &nr_pages); if (IS_ERR(pages)) { @@ -1327,8 +1333,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, - u64 buf_addr, size_t len) + void __user *ubuf, size_t len) { + u64 buf_addr = user_ptr_addr(ubuf); u64 buf_end; size_t offset;
@@ -1339,6 +1346,9 @@ int io_import_fixed(int ddir, struct iov_iter *iter, /* not inside the mapped region */ if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) return -EFAULT; + if (unlikely((ddir == ITER_SOURCE && !check_user_ptr_read(ubuf, len)) || + (ddir == ITER_DEST && !check_user_ptr_write(ubuf, len)))) + return -EFAULT;
/* * Might not be a start of buffer, set size appropriately diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 0a8a95e9b99e..67a4929ec782 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -65,7 +65,7 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc);
int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, - u64 buf_addr, size_t len); + void __user *buf_addr, size_t len);
void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_unregister(struct io_ring_ctx *ctx); diff --git a/io_uring/rw.c b/io_uring/rw.c index e2107505649d..7ac10b37dece 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -370,8 +370,7 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req, ssize_t ret;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(ddir, iter, req->imu, - user_ptr_addr(rw->addr), rw->len); + ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len); if (ret) return ERR_PTR(ret); return NULL; diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 5e32db48696d..535bc82a4ef8 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -148,7 +148,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) return IOU_ISSUE_SKIP_COMPLETE; }
-int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, +int io_uring_cmd_import_fixed(void __user *ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
On 29/08/2023 16:10, Luca Vizzarro wrote:
Whenever a GUP call is made, a user address in the form of a 64-bit integer is used to lookup its corresponding page. When working in PCuABI, this means that the metadata of the user capability gets discarded, hence any access made by the GUP is not checked in hardware.
This commit introduces explicit capability checks whenever a call to the current mm through the GUP functions is made.
We need an appropriate commit message here as well. These changes are especially not self-evident, and we should refer to the PCuABI spec to justify them.
Kevin
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com
include/linux/io_uring.h | 6 +++--- io_uring/kbuf.c | 26 +++++++++++++------------- io_uring/net.c | 3 +-- io_uring/rsrc.c | 14 ++++++++++++-- io_uring/rsrc.h | 2 +- io_uring/rw.c | 3 +-- io_uring/uring_cmd.c | 2 +- 7 files changed, 32 insertions(+), 24 deletions(-)
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 7fe31b2cd02f..d285acebeee2 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -42,7 +42,7 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) } #if defined(CONFIG_IO_URING) -int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, +int io_uring_cmd_import_fixed(void __user *ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); @@ -72,8 +72,8 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } #else -static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
+static inline int io_uring_cmd_import_fixed(void __user *ubuf, unsigned long len,
int rw, struct iov_iter *iter, void *ioucmd)
{ return -EOPNOTSUPP; } diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 625fba732db0..ba226b0ba7a0 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -561,7 +561,6 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, struct page **pages; int nr_pages;
- /* TODO [PCuABI] - capability checks for uaccess */ pages = io_pin_pages(reg->ring_addr, ring_size, &nr_pages); if (IS_ERR(pages)) return PTR_ERR(pages);
@@ -620,12 +619,24 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (copy_io_uring_buf_reg_from_user(ctx, ®, arg)) return -EFAULT;
- if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
- /* cannot disambiguate full vs empty due to head/tail size */
- if (reg.ring_entries >= 65536)
return -EINVAL;
- ring_size = reg.ring_entries * (io_in_compat64(ctx) ?
sizeof(struct compat_io_uring_buf) :
sizeof(struct io_uring_buf));
- if (reg.resv[0] || reg.resv[1] || reg.resv[2]) return -EINVAL; if (reg.flags & ~IOU_PBUF_RING_MMAP) return -EINVAL; if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
if (!reg.ring_addr)
if (!reg.ring_addr ||
if (reg.ring_addr & ~PAGE_MASK) return -EINVAL;!check_user_ptr_read((void __user *)reg.ring_addr, ring_size)) return -EFAULT;
@@ -634,13 +645,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -EINVAL; }
- if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
- /* cannot disambiguate full vs empty due to head/tail size */
- if (reg.ring_entries >= 65536)
return -EINVAL;
- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { int ret = io_init_bl_list(ctx); if (ret)
@@ -658,10 +662,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -ENOMEM; }
- ring_size = reg.ring_entries * (io_in_compat64(ctx) ?
sizeof(struct compat_io_uring_buf) :
sizeof(struct io_uring_buf));
- if (!(reg.flags & IOU_PBUF_RING_MMAP)) ret = io_pin_pbuf_ring(®, ring_size, bl); else
diff --git a/io_uring/net.c b/io_uring/net.c index eb4795553bf8..0f3dbe4da64b 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1133,9 +1133,8 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) return io_setup_async_addr(req, &__address, issue_flags); if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu,/* TODO [PCuABI] - capability checks for uaccess */
user_ptr_addr(zc->buf), zc->len);
if (unlikely(ret)) return ret; msg.sg_from_iter = io_sg_from_iter;zc->buf, zc->len);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 44abf9c74b86..8bfc755b8766 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1192,7 +1192,13 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, return 0; ret = -ENOMEM;
- /* TODO [PCuABI] - capability checks for uaccess */
- /*
* When working in PCuABI, explicit user pointer checking is not needed
* during registration as we are already doing it in io_import_fixed.
* Specifically we employ explicit checks before the user data provided
* in the pre-registered buffers is accessed.
pages = io_pin_pages(user_ptr_addr(iov->iov_base), iov->iov_len, &nr_pages); if (IS_ERR(pages)) {*/
@@ -1327,8 +1333,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu,
u64 buf_addr, size_t len)
void __user *ubuf, size_t len)
{
- u64 buf_addr = user_ptr_addr(ubuf); u64 buf_end; size_t offset;
@@ -1339,6 +1346,9 @@ int io_import_fixed(int ddir, struct iov_iter *iter, /* not inside the mapped region */ if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) return -EFAULT;
- if (unlikely((ddir == ITER_SOURCE && !check_user_ptr_read(ubuf, len)) ||
(ddir == ITER_DEST && !check_user_ptr_write(ubuf, len))))
return -EFAULT;
/* * Might not be a start of buffer, set size appropriately diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 0a8a95e9b99e..67a4929ec782 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -65,7 +65,7 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc); int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu,
u64 buf_addr, size_t len);
void __user *buf_addr, size_t len);
void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_unregister(struct io_ring_ctx *ctx); diff --git a/io_uring/rw.c b/io_uring/rw.c index e2107505649d..7ac10b37dece 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -370,8 +370,7 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req, ssize_t ret; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
ret = io_import_fixed(ddir, iter, req->imu,
user_ptr_addr(rw->addr), rw->len);
if (ret) return ERR_PTR(ret); return NULL;ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len);
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 5e32db48696d..535bc82a4ef8 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -148,7 +148,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) return IOU_ISSUE_SKIP_COMPLETE; } -int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, +int io_uring_cmd_import_fixed(void __user *ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
The prototype of io_uring_cmd_import_fixed has been updated to support PCuABI. This function is used in the nvme module which currently does not support PCuABI.
Signed-off-by: Luca Vizzarro Luca.Vizzarro@arm.com --- drivers/nvme/host/ioctl.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index f15e7330b75a..dcebc571e3f7 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -177,6 +177,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, /* fixedbufs is only for non-vectored io */ if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) return -EINVAL; + /* TODO [PCuABI]: change ubuffer type to void __user * */ ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); if (ret < 0)
linux-morello@op-lists.linaro.org