Commit 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") introduced struct io_{s,c}qring_offsets::user_addr to allow the user to specify ring mappings themselves. As usual, these fields are typed as __u64, although they are meant to hold pointers. This means that only the mapping address can be passed, and not a full pointer in PCuABI.
Move to operating on full pointers and perform the usual adaptations:
* __u64 becomes __kernel_uintptr_t (able to hold a full pointer in any ABI)
* Propagate the value as a user pointer down to the point of use (__io_uaddr_map()).
* To keep the ABI unchanged in compat64, introduce compat handling for io_uring_setup(). struct io_uring_params is both copied in and out, so we need to convert the layout in both directions. Not all fields are necessarily relevant in both cases, but for simplicity's sake and forward-compatibility, all of them are copied.
Finally, because the user memory is accessed via a kernel mapping, the provided pointer needs to be explicitly checked before calling pin_user_pages_fast(). Here again, for simplicity's sake, we require a RW pointer in both cases. Write access may not be strictly required for struct io_sqring_offsets::user_addr, but userspace needs to have a writeable mapping / pointer to be able to make use of it at all, so this should not make much difference in practice.
Signed-off-by: Kevin Brodsky kevin.brodsky@arm.com ---
This patch is for the branch rebased on 6.7 (currently 6.7-rc5). It was tested against the latest liburing tests with updated Morello patches. The upstream patch mentioned above is part of [1], landed in 6.5.
The patch is available at the top of my 6.7-based branch:
https://git.morello-project.org/kbrodsky-arm/linux/-/commits/morello/next-6....
[1] https://lore.kernel.org/all/20230513141643.1037620-1-axboe@kernel.dk/
include/linux/io_uring_compat.h | 37 ++++++++++ include/uapi/linux/io_uring.h | 4 +- io_uring/io_uring.c | 118 ++++++++++++++++++++++++++++++-- 3 files changed, 150 insertions(+), 9 deletions(-)
diff --git a/include/linux/io_uring_compat.h b/include/linux/io_uring_compat.h index 12e541f333f8..5d8dc250c015 100644 --- a/include/linux/io_uring_compat.h +++ b/include/linux/io_uring_compat.h @@ -62,6 +62,43 @@ struct compat_io_uring_cqe { __u64 big_cqe[]; };
+struct compat_io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 user_addr; +}; + +struct compat_io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u32 flags; + __u32 resv1; + __u64 user_addr; +}; + +struct compat_io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 features; + __u32 wq_fd; + __u32 resv[3]; + struct compat_io_sqring_offsets sq_off; + struct compat_io_cqring_offsets cq_off; +}; + struct compat_io_uring_files_update { __u32 offset; __u32 resv; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index eba3db0a0c3d..0630df584587 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -449,7 +449,7 @@ struct io_sqring_offsets { __u32 dropped; __u32 array; __u32 resv1; - __u64 user_addr; + __kernel_uintptr_t user_addr; };
/* @@ -468,7 +468,7 @@ struct io_cqring_offsets { __u32 cqes; __u32 flags; __u32 resv1; - __u64 user_addr; + __kernel_uintptr_t user_addr; };
/* diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 39dc644f56ae..e77a1460e55c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -208,6 +208,107 @@ static int copy_io_uring_getevents_arg_from_user(struct io_ring_ctx *ctx, return 0; }
+static int get_compat64_io_uring_params(struct io_uring_params *params, + const void __user *user_params) +{ + struct compat_io_uring_params compat_params; + + if (copy_from_user(&compat_params, user_params, sizeof(compat_params))) + return -EFAULT; + + params->sq_entries = compat_params.sq_entries; + params->cq_entries = compat_params.cq_entries; + params->flags = compat_params.flags; + params->sq_thread_cpu = compat_params.sq_thread_cpu; + params->sq_thread_idle = compat_params.sq_thread_idle; + params->features = compat_params.features; + params->wq_fd = compat_params.wq_fd; + memcpy(params->resv, compat_params.resv, sizeof(params->resv)); + + params->sq_off.head = compat_params.sq_off.head; + params->sq_off.tail = compat_params.sq_off.tail; + params->sq_off.ring_mask = compat_params.sq_off.ring_mask; + params->sq_off.ring_entries = compat_params.sq_off.ring_entries; + params->sq_off.flags = compat_params.sq_off.flags; + params->sq_off.dropped = compat_params.sq_off.dropped; + params->sq_off.array = compat_params.sq_off.array; + params->sq_off.resv1 = compat_params.sq_off.resv1; + params->sq_off.user_addr = (__kernel_uintptr_t)compat_ptr(compat_params.sq_off.user_addr); + + params->cq_off.head = compat_params.cq_off.head; + params->cq_off.tail = compat_params.cq_off.tail; + params->cq_off.ring_mask = compat_params.cq_off.ring_mask; + params->cq_off.ring_entries = compat_params.cq_off.ring_entries; + params->cq_off.overflow = compat_params.cq_off.overflow; + params->cq_off.cqes = compat_params.cq_off.cqes; + params->cq_off.flags = compat_params.cq_off.flags; + params->cq_off.resv1 = compat_params.cq_off.resv1; + params->cq_off.user_addr = (__kernel_uintptr_t)compat_ptr(compat_params.cq_off.user_addr); + + return 0; +} + +static int copy_io_uring_params_from_user(struct io_uring_params *params, + const void __user *src) +{ + if (IS_ENABLED(CONFIG_COMPAT64) && in_compat_syscall()) + return get_compat64_io_uring_params(params, src); + if (copy_from_user_with_ptr(params, src, sizeof(*params))) + return -EFAULT; + return 0; +} + +static int set_compat64_io_uring_params(void __user *user_params, + const struct io_uring_params *params) +{ + struct compat_io_uring_params compat_params; + + memset(&compat_params, 0, sizeof(compat_params)); + + compat_params.sq_entries = params->sq_entries; + compat_params.cq_entries = params->cq_entries; + compat_params.flags = params->flags; + compat_params.sq_thread_cpu = params->sq_thread_cpu; + compat_params.sq_thread_idle = params->sq_thread_idle; + compat_params.features = params->features; + compat_params.wq_fd = params->wq_fd; + + compat_params.sq_off.head = params->sq_off.head; + compat_params.sq_off.tail = params->sq_off.tail; + compat_params.sq_off.ring_mask = params->sq_off.ring_mask; + compat_params.sq_off.ring_entries = params->sq_off.ring_entries; + compat_params.sq_off.flags = params->sq_off.flags; + compat_params.sq_off.dropped = params->sq_off.dropped; + compat_params.sq_off.array = params->sq_off.array; + compat_params.sq_off.user_addr = (__u64)params->sq_off.user_addr; + + compat_params.cq_off.head = params->cq_off.head; + compat_params.cq_off.tail = params->cq_off.tail; + compat_params.cq_off.ring_mask = params->cq_off.ring_mask; + compat_params.cq_off.ring_entries = params->cq_off.ring_entries; + compat_params.cq_off.overflow = params->cq_off.overflow; + compat_params.cq_off.cqes = params->cq_off.cqes; + compat_params.cq_off.flags = params->cq_off.flags; + compat_params.cq_off.resv1 = params->cq_off.resv1; + compat_params.cq_off.user_addr = (__u64)params->cq_off.user_addr; + + if (copy_to_user(user_params, &compat_params, sizeof(compat_params))) + return -EFAULT; + + return 0; +} + +static int copy_io_uring_params_to_user(struct io_ring_ctx *ctx, + void __user *dst, + const struct io_uring_params *params) +{ + if (io_in_compat64(ctx)) + return set_compat64_io_uring_params(dst, params); + if (copy_to_user_with_ptr(dst, params, sizeof(*params))) + return -EFAULT; + return 0; +} + struct sock *io_uring_get_socket(struct file *file) { #if defined(CONFIG_UNIX) @@ -2745,8 +2846,9 @@ static void io_pages_free(struct page ***pages, int npages) }
static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, - unsigned long uaddr, size_t size) + void __user *uptr, size_t size) { + unsigned long uaddr = user_ptr_addr(uptr); struct page **page_array; unsigned int nr_pages; void *page_addr; @@ -2756,6 +2858,8 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
if (uaddr & (PAGE_SIZE - 1) || !size) return ERR_PTR(-EINVAL); + if (!check_user_ptr_rw(uptr, size)) + return ERR_PTR(-EFAULT);
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages > USHRT_MAX) @@ -2801,14 +2905,14 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, return page_to_virt(page_array[0]); }
-static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr, +static void *io_rings_map(struct io_ring_ctx *ctx, void __user *uaddr, size_t size) { return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr, size); }
-static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr, +static void *io_sqes_map(struct io_ring_ctx *ctx, void __user *uaddr, size_t size) { return __io_uaddr_map(&ctx->sqe_pages, &ctx->n_sqe_pages, uaddr, @@ -3895,7 +3999,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, if (!(ctx->flags & IORING_SETUP_NO_MMAP)) rings = io_mem_alloc(size); else - rings = io_rings_map(ctx, p->cq_off.user_addr, size); + rings = io_rings_map(ctx, (void __user *)p->cq_off.user_addr, size);
if (IS_ERR(rings)) return PTR_ERR(rings); @@ -3921,7 +4025,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, if (!(ctx->flags & IORING_SETUP_NO_MMAP)) ptr = io_mem_alloc(size); else - ptr = io_sqes_map(ctx, p->sq_off.user_addr, size); + ptr = io_sqes_map(ctx, (void __user *)p->sq_off.user_addr, size);
if (IS_ERR(ptr)) { io_rings_free(ctx); @@ -4139,7 +4243,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING;
- if (copy_to_user(params, p, sizeof(*p))) { + if (copy_io_uring_params_to_user(ctx, params, p)) { ret = -EFAULT; goto err; } @@ -4190,7 +4294,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) struct io_uring_params p; int i;
- if (copy_from_user(&p, params, sizeof(p))) + if (copy_io_uring_params_from_user(&p, params)) return -EFAULT; for (i = 0; i < ARRAY_SIZE(p.resv); i++) { if (p.resv[i])