Introduce compat versions of the structs exposed in the uAPI headers that might contain pointers as a member. Also, implement functions that convert the compat versions to the native versions of the struct.
A subsequent patch is going to change the io_uring structs to enable them to support new architectures. On such architectures, the current struct layout still needs to be supported for compat tasks.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com --- include/linux/io_uring_compat.h | 130 ++++++++++++++++++ include/linux/io_uring_types.h | 11 +- io_uring/cancel.c | 28 +++- io_uring/epoll.c | 2 +- io_uring/fdinfo.c | 77 +++++++---- io_uring/io_uring.c | 231 ++++++++++++++++++++++---------- io_uring/io_uring.h | 108 ++++++++++++--- io_uring/kbuf.c | 98 ++++++++++++-- io_uring/kbuf.h | 6 +- io_uring/net.c | 5 +- io_uring/rsrc.c | 110 +++++++++++++-- io_uring/tctx.c | 56 +++++++- io_uring/uring_cmd.h | 4 + 13 files changed, 716 insertions(+), 150 deletions(-) create mode 100644 include/linux/io_uring_compat.h
diff --git a/include/linux/io_uring_compat.h b/include/linux/io_uring_compat.h new file mode 100644 index 0000000000000..07432144e2745 --- /dev/null +++ b/include/linux/io_uring_compat.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef IO_URING_COMPAT_H +#define IO_URING_COMPAT_H + +#include <linux/types.h> +#include <linux/time.h> +#include <linux/fs.h> + +struct compat_io_uring_sqe { + __u8 opcode; + __u8 flags; + __u16 ioprio; + __s32 fd; + union { + __u64 off; + __u64 addr2; + struct { + __u32 cmd_op; + __u32 __pad1; + }; + }; + union { + __u64 addr; + __u64 splice_off_in; + }; + __u32 len; + /* This member is actually a union in the native struct */ + __kernel_rwf_t rw_flags; + __u64 user_data; + union { + __u16 buf_index; + __u16 buf_group; + } __packed; + __u16 personality; + union { + __s32 splice_fd_in; + __u32 file_index; + struct { + __u16 addr_len; + __u16 __pad3[1]; + }; + }; + union { + struct { + __u64 addr3; + __u64 __pad2[1]; + }; + __u8 cmd[0]; + }; +}; + +struct compat_io_uring_cqe { + __u64 user_data; + __s32 res; + __u32 flags; + __u64 big_cqe[]; +}; + +struct compat_io_uring_files_update { + __u32 offset; + __u32 resv; + __aligned_u64 fds; +}; + +struct compat_io_uring_rsrc_register { + __u32 nr; + __u32 flags; + __u64 resv2; + __aligned_u64 data; + __aligned_u64 tags; +}; + +struct compat_io_uring_rsrc_update { + __u32 offset; + __u32 resv; + __aligned_u64 data; +}; + +struct compat_io_uring_rsrc_update2 { + __u32 offset; + __u32 resv; + __aligned_u64 data; + __aligned_u64 tags; + __u32 nr; + __u32 resv2; +}; + +struct compat_io_uring_buf { + __u64 addr; + __u32 len; + __u16 bid; + __u16 resv; +}; + +struct compat_io_uring_buf_ring { + union { + struct { + __u64 resv1; + __u32 resv2; + __u16 resv3; + __u16 tail; + }; + struct compat_io_uring_buf bufs[0]; + }; +}; + +struct compat_io_uring_buf_reg { + __u64 ring_addr; + __u32 ring_entries; + __u16 bgid; + __u16 pad; + __u64 resv[3]; +}; + +struct compat_io_uring_getevents_arg { + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad; + __u64 ts; +}; + +struct compat_io_uring_sync_cancel_reg { + __u64 addr; + __s32 fd; + __u32 flags; + struct __kernel_timespec timeout; + __u64 pad[4]; +}; + +#endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3d14c6feb51b6..9506a8858f0ff 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -5,6 +5,7 @@ #include <linux/task_work.h> #include <linux/bitmap.h> #include <linux/llist.h> +#include <linux/io_uring_compat.h> #include <uapi/linux/io_uring.h>
struct io_wq_work_node { @@ -216,7 +217,10 @@ struct io_ring_ctx { * array. */ u32 *sq_array; - struct io_uring_sqe *sq_sqes; + union { + struct compat_io_uring_sqe *sq_sqes_compat; + struct io_uring_sqe *sq_sqes; + }; unsigned cached_sq_head; unsigned sq_entries;
@@ -271,7 +275,10 @@ struct io_ring_ctx { * produced, so the application is allowed to modify pending * entries. */ - struct io_uring_cqe *cqes; + union { + struct compat_io_uring_cqe *cqes_compat; + struct io_uring_cqe *cqes; + };
/* * We cache a range of free CQEs we can use, once exhausted it diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 2291a53cdabd1..0f942da7455b5 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -27,6 +27,32 @@ struct io_cancel { #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED)
+static int get_compat64_io_uring_sync_cancel_reg(struct io_uring_sync_cancel_reg *sc, + const void __user *user_sc) +{ + struct compat_io_uring_sync_cancel_reg compat_sc; + + if (copy_from_user(&compat_sc, user_sc, sizeof(compat_sc))) + return -EFAULT; + sc->addr = compat_sc.addr; + sc->fd = compat_sc.fd; + sc->flags = compat_sc.flags; + sc->timeout = compat_sc.timeout; + memcpy(sc->pad, compat_sc.pad, sizeof(sc->pad)); + return 0; +} + +static int copy_io_uring_sync_cancel_reg_from_user(struct io_ring_ctx *ctx, + struct io_uring_sync_cancel_reg *sc, + const void __user *arg) +{ + if (io_in_compat64(ctx)) + return get_compat64_io_uring_sync_cancel_reg(sc, arg); + if (copy_from_user(sc, arg, sizeof(*sc))) + return -EFAULT; + return 0; +} + static bool io_cancel_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); @@ -243,7 +269,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) DEFINE_WAIT(wait); int ret;
- if (copy_from_user(&sc, arg, sizeof(sc))) + if (copy_io_uring_sync_cancel_reg_from_user(ctx, &sc, arg)) return -EFAULT; if (sc.flags & ~CANCEL_FLAGS) return -EINVAL; diff --git a/io_uring/epoll.c b/io_uring/epoll.c index 9aa74d2c80bc4..d5580ff465c3e 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -40,7 +40,7 @@ int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct epoll_event __user *ev;
ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); - if (copy_from_user(&epoll->event, ev, sizeof(*ev))) + if (copy_epoll_event_from_user(&epoll->event, ev, req->ctx->compat)) return -EFAULT; }
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index bc8c9d764bc13..1f0a0f47ce17b 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -48,6 +48,38 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, return 0; }
+#define print_sqe(m, sqe, sq_idx, sq_shift) \ + do { \ + seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " \ + "addr:0x%llx, rw_flags:0x%x, buf_index:%d " \ + "user_data:%llu", \ + (sq_idx), io_uring_get_opcode((sqe)->opcode), (sqe)->fd, \ + (sqe)->flags, (unsigned long long) (sqe)->off, \ + (unsigned long long) (sqe)->addr, (sqe)->rw_flags, \ + (sqe)->buf_index, (sqe)->user_data); \ + if (sq_shift) { \ + u64 *sqeb = (void *) ((sqe) + 1); \ + int size = sizeof(*(sqe)) / sizeof(u64); \ + int j; \ + \ + for (j = 0; j < size; j++) { \ + seq_printf(m, ", e%d:0x%llx", j, \ + (unsigned long long) *sqeb); \ + sqeb++; \ + } \ + } \ + } while (0) + +#define print_cqe(m, cqe, cq_idx, cq_shift) \ + do { \ + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x", \ + (cq_idx), (cqe)->user_data, (cqe)->res, \ + (cqe)->flags); \ + if (cq_shift) \ + seq_printf(m, ", extra1:%llu, extra2:%llu\n", \ + (cqe)->big_cqe[0], (cqe)->big_cqe[1]); \ + } while (0) + static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { @@ -88,45 +120,32 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { unsigned int entry = i + sq_head; - struct io_uring_sqe *sqe; - unsigned int sq_idx; + unsigned int sq_idx, sq_off;
sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); if (sq_idx > sq_mask) continue; - sqe = &ctx->sq_sqes[sq_idx << sq_shift]; - seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " - "addr:0x%llx, rw_flags:0x%x, buf_index:%d " - "user_data:%llu", - sq_idx, io_uring_get_opcode(sqe->opcode), sqe->fd, - sqe->flags, (unsigned long long) sqe->off, - (unsigned long long) sqe->addr, sqe->rw_flags, - sqe->buf_index, sqe->user_data); - if (sq_shift) { - u64 *sqeb = (void *) (sqe + 1); - int size = sizeof(struct io_uring_sqe) / sizeof(u64); - int j; - - for (j = 0; j < size; j++) { - seq_printf(m, ", e%d:0x%llx", j, - (unsigned long long) *sqeb); - sqeb++; - } - } + sq_off = sq_idx << sq_shift; + + if (io_in_compat64(ctx)) + print_sqe(m, &ctx->sq_sqes_compat[sq_off], sq_idx, sq_shift); + else + print_sqe(m, &ctx->sq_sqes[sq_off], sq_idx, sq_shift); + seq_printf(m, "\n"); } seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); cq_entries = min(cq_tail - cq_head, ctx->cq_entries); for (i = 0; i < cq_entries; i++) { unsigned int entry = i + cq_head; - struct io_uring_cqe *cqe = &ctx->cqes[(entry & cq_mask) << cq_shift]; - - seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x", - entry & cq_mask, cqe->user_data, cqe->res, - cqe->flags); - if (cq_shift) - seq_printf(m, ", extra1:%llu, extra2:%llu\n", - cqe->big_cqe[0], cqe->big_cqe[1]); + unsigned int cq_idx = entry & cq_mask; + unsigned int cq_off = cq_idx << cq_shift; + + if (io_in_compat64(ctx)) + print_cqe(m, &ctx->cqes_compat[cq_off], cq_idx, cq_shift); + else + print_cqe(m, &ctx->cqes[cq_off], cq_idx, cq_shift); + seq_printf(m, "\n"); }
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index fb6d07e1e7358..91828646dba29 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -152,6 +152,37 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
static struct kmem_cache *req_cachep;
+static int get_compat64_io_uring_getevents_arg(struct io_uring_getevents_arg *arg, + const void __user *user_arg) +{ + struct compat_io_uring_getevents_arg compat_arg; + + if (copy_from_user(&compat_arg, user_arg, sizeof(compat_arg))) + return -EFAULT; + arg->sigmask = compat_arg.sigmask; + arg->sigmask_sz = compat_arg.sigmask_sz; + arg->pad = compat_arg.pad; + arg->ts = compat_arg.ts; + return 0; +} + +static int copy_io_uring_getevents_arg_from_user(struct io_ring_ctx *ctx, + struct io_uring_getevents_arg *arg, + const void __user *argp, + size_t size) +{ + if (io_in_compat64(ctx)) { + if (size != sizeof(struct compat_io_uring_getevents_arg)) + return -EINVAL; + return get_compat64_io_uring_getevents_arg(arg, argp); + } + if (size != sizeof(*arg)) + return -EINVAL; + if (copy_from_user(arg, argp, sizeof(*arg))) + return -EFAULT; + return 0; +} + struct sock *io_uring_get_socket(struct file *file) { #if defined(CONFIG_UNIX) @@ -604,14 +635,10 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx) static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) { bool all_flushed; - size_t cqe_size = sizeof(struct io_uring_cqe);
if (!force && __io_cqring_events(ctx) == ctx->cq_entries) return false;
- if (ctx->flags & IORING_SETUP_CQE32) - cqe_size <<= 1; - io_cq_lock(ctx); while (!list_empty(&ctx->cq_overflow_list)) { struct io_uring_cqe *cqe = io_get_cqe_overflow(ctx, true); @@ -621,10 +648,20 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) break; ocqe = list_first_entry(&ctx->cq_overflow_list, struct io_overflow_cqe, list); - if (cqe) - memcpy(cqe, &ocqe->cqe, cqe_size); - else + if (cqe) { + u64 extra1 = 0; + u64 extra2 = 0; + + if (ctx->flags & IORING_SETUP_CQE32) { + extra1 = ocqe->cqe.big_cqe[0]; + extra2 = ocqe->cqe.big_cqe[1]; + } + + __io_fill_cqe(ctx, cqe, ocqe->cqe.user_data, ocqe->cqe.res, + ocqe->cqe.flags, extra1, extra2); + } else { io_account_cq_overflow(ctx); + }
list_del(&ocqe->list); kfree(ocqe); @@ -774,7 +811,17 @@ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow) ctx->cqe_cached++; if (ctx->flags & IORING_SETUP_CQE32) ctx->cqe_cached++; - return &ctx->cqes[off]; + return __io_get_ith_cqe(ctx, off); +} + +/* + * Retrieves a pointer to the ith CQE + */ +struct io_uring_cqe *__io_get_ith_cqe(struct io_ring_ctx *ctx, unsigned int i) +{ + return io_in_compat64(ctx) ? + (struct io_uring_cqe *)&ctx->cqes_compat[i] : + &ctx->cqes[i]; }
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags, @@ -793,14 +840,7 @@ bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags if (likely(cqe)) { trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- WRITE_ONCE(cqe->user_data, user_data); - WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, cflags); - - if (ctx->flags & IORING_SETUP_CQE32) { - WRITE_ONCE(cqe->big_cqe[0], 0); - WRITE_ONCE(cqe->big_cqe[1], 0); - } + __io_fill_cqe(ctx, cqe, user_data, res, cflags, 0, 0); return true; }
@@ -2240,7 +2280,9 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) /* double index for 128-byte SQEs, twice as long */ if (ctx->flags & IORING_SETUP_SQE128) head <<= 1; - return &ctx->sq_sqes[head]; + return io_in_compat64(ctx) ? + (struct io_uring_sqe *)&ctx->sq_sqes_compat[head] : + &ctx->sq_sqes[head]; }
/* drop invalid entries */ @@ -2267,6 +2309,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) do { const struct io_uring_sqe *sqe; struct io_kiocb *req; + struct io_uring_sqe native_sqe[2];
if (unlikely(!io_alloc_req_refill(ctx))) break; @@ -2276,6 +2319,11 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) io_req_add_to_cache(req, ctx); break; } + if (io_in_compat64(ctx)) { + convert_compat64_io_uring_sqe(ctx, native_sqe, + (struct compat_io_uring_sqe *)sqe); + sqe = native_sqe; + }
/* * Continue submitting even for sqe failure if the @@ -2480,6 +2528,9 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries { struct io_rings *rings; size_t off, cq_array_size, sq_array_size; + size_t cqe_size = io_in_compat64(ctx) ? + sizeof(struct compat_io_uring_cqe) : + sizeof(struct io_uring_cqe);
off = sizeof(*rings);
@@ -2492,7 +2543,7 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries if (cq_offset) *cq_offset = off;
- cq_array_size = array_size(sizeof(struct io_uring_cqe), cq_entries); + cq_array_size = array_size(cqe_size, cq_entries); if (cq_array_size == SIZE_MAX) return SIZE_MAX;
@@ -3120,20 +3171,19 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
#endif /* !CONFIG_MMU */
-static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz) +static int io_validate_ext_arg(struct io_ring_ctx *ctx, unsigned int flags, + const void __user *argp, size_t argsz) { if (flags & IORING_ENTER_EXT_ARG) { struct io_uring_getevents_arg arg;
- if (argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; + return copy_io_uring_getevents_arg_from_user(ctx, &arg, argp, argsz); } return 0; }
-static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, +static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned int flags, + const void __user *argp, size_t *argsz, #ifdef CONFIG_CHERI_PURECAP_UABI struct __kernel_timespec * __capability *ts, const sigset_t * __capability *sig) @@ -3143,6 +3193,7 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz #endif { struct io_uring_getevents_arg arg; + int ret;
/* * If EXT_ARG isn't set, then we have no timespec and the argp pointer @@ -3158,10 +3209,9 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz * EXT_ARG is set - ensure we agree on the size of it and copy in our * timespec and sigset_t pointers if good. */ - if (*argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; + ret = copy_io_uring_getevents_arg_from_user(ctx, &arg, argp, *argsz); + if (ret) + return ret; if (arg.pad) return -EINVAL; *sig = u64_to_user_ptr(arg.sigmask); @@ -3268,7 +3318,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ mutex_lock(&ctx->uring_lock); iopoll_locked: - ret2 = io_validate_ext_arg(flags, argp, argsz); + ret2 = io_validate_ext_arg(ctx, flags, argp, argsz); if (likely(!ret2)) { min_complete = min(min_complete, ctx->cq_entries); @@ -3279,7 +3329,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, const sigset_t __user *sig; struct __kernel_timespec __user *ts;
- ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); + ret2 = io_get_ext_arg(ctx, flags, argp, &argsz, &ts, &sig); if (likely(!ret2)) { min_complete = min(min_complete, ctx->cq_entries); @@ -3329,6 +3379,9 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, { struct io_rings *rings; size_t size, cqes_offset, sq_array_offset; + size_t sqe_size = io_in_compat64(ctx) ? + sizeof(struct compat_io_uring_sqe) : + sizeof(struct io_uring_sqe);
/* make sure these are sane, as we already accounted them */ ctx->sq_entries = p->sq_entries; @@ -3351,9 +3404,9 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->cq_ring_entries = p->cq_entries;
if (p->flags & IORING_SETUP_SQE128) - size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); + size = array_size(2 * sqe_size, p->sq_entries); else - size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + size = array_size(sqe_size, p->sq_entries); if (size == SIZE_MAX) { io_mem_free(ctx->rings); ctx->rings = NULL; @@ -4107,48 +4160,48 @@ static int __init io_uring_init(void) #define BUILD_BUG_SQE_ELEM_SIZE(eoffset, esize, ename) \ __BUILD_BUG_VERIFY_OFFSET_SIZE(struct io_uring_sqe, eoffset, esize, ename) BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); - BUILD_BUG_SQE_ELEM(0, __u8, opcode); - BUILD_BUG_SQE_ELEM(1, __u8, flags); - BUILD_BUG_SQE_ELEM(2, __u16, ioprio); - BUILD_BUG_SQE_ELEM(4, __s32, fd); - BUILD_BUG_SQE_ELEM(8, __u64, off); - BUILD_BUG_SQE_ELEM(8, __u64, addr2); - BUILD_BUG_SQE_ELEM(8, __u32, cmd_op); + BUILD_BUG_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_SQE_ELEM(1, __u8, flags); + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_SQE_ELEM(4, __s32, fd); + BUILD_BUG_SQE_ELEM(8, __u64, off); + BUILD_BUG_SQE_ELEM(8, __u64, addr2); + BUILD_BUG_SQE_ELEM(8, __u32, cmd_op); BUILD_BUG_SQE_ELEM(12, __u32, __pad1); - BUILD_BUG_SQE_ELEM(16, __u64, addr); - BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); - BUILD_BUG_SQE_ELEM(24, __u32, len); + BUILD_BUG_SQE_ELEM(16, __u64, addr); + BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); + BUILD_BUG_SQE_ELEM(24, __u32, len); BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); - BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); - BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); - BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); - BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); - BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); - BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); - BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); - BUILD_BUG_SQE_ELEM(28, __u32, open_flags); - BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); - BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); - BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); - BUILD_BUG_SQE_ELEM(28, __u32, rename_flags); - BUILD_BUG_SQE_ELEM(28, __u32, unlink_flags); - BUILD_BUG_SQE_ELEM(28, __u32, hardlink_flags); - BUILD_BUG_SQE_ELEM(28, __u32, xattr_flags); - BUILD_BUG_SQE_ELEM(28, __u32, msg_ring_flags); - BUILD_BUG_SQE_ELEM(32, __u64, user_data); - BUILD_BUG_SQE_ELEM(40, __u16, buf_index); - BUILD_BUG_SQE_ELEM(40, __u16, buf_group); - BUILD_BUG_SQE_ELEM(42, __u16, personality); - BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); - BUILD_BUG_SQE_ELEM(44, __u32, file_index); - BUILD_BUG_SQE_ELEM(44, __u16, addr_len); - BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); - BUILD_BUG_SQE_ELEM(48, __u64, addr3); + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); + BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); + BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); + BUILD_BUG_SQE_ELEM(28, __u32, open_flags); + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); + BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); + BUILD_BUG_SQE_ELEM(28, __u32, rename_flags); + BUILD_BUG_SQE_ELEM(28, __u32, unlink_flags); + BUILD_BUG_SQE_ELEM(28, __u32, hardlink_flags); + BUILD_BUG_SQE_ELEM(28, __u32, xattr_flags); + BUILD_BUG_SQE_ELEM(28, __u32, msg_ring_flags); + BUILD_BUG_SQE_ELEM(32, __u64, user_data); + BUILD_BUG_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_SQE_ELEM(40, __u16, buf_group); + BUILD_BUG_SQE_ELEM(42, __u16, personality); + BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); + BUILD_BUG_SQE_ELEM(44, __u32, file_index); + BUILD_BUG_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); + BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); - BUILD_BUG_SQE_ELEM(56, __u64, __pad2); + BUILD_BUG_SQE_ELEM(56, __u64, __pad2);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) != sizeof(struct io_uring_rsrc_update)); @@ -4160,6 +4213,46 @@ static int __init io_uring_init(void) BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) != offsetof(struct io_uring_buf_ring, tail));
+#ifdef CONFIG_COMPAT64 +#define BUILD_BUG_COMPAT_SQE_ELEM(eoffset, etype, ename) \ + __BUILD_BUG_VERIFY_OFFSET_SIZE(struct compat_io_uring_sqe, eoffset, sizeof(etype), ename) +#define BUILD_BUG_COMPAT_SQE_ELEM_SIZE(eoffset, esize, ename) \ + __BUILD_BUG_VERIFY_OFFSET_SIZE(struct compat_io_uring_sqe, eoffset, esize, ename) + BUILD_BUG_ON(sizeof(struct compat_io_uring_sqe) != 64); + BUILD_BUG_COMPAT_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_COMPAT_SQE_ELEM(1, __u8, flags); + BUILD_BUG_COMPAT_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_COMPAT_SQE_ELEM(4, __s32, fd); + BUILD_BUG_COMPAT_SQE_ELEM(8, __u64, off); + BUILD_BUG_COMPAT_SQE_ELEM(8, __u64, addr2); + BUILD_BUG_COMPAT_SQE_ELEM(8, __u32, cmd_op); + BUILD_BUG_COMPAT_SQE_ELEM(12, __u32, __pad1); + BUILD_BUG_COMPAT_SQE_ELEM(16, __u64, addr); + BUILD_BUG_COMPAT_SQE_ELEM(16, __u64, splice_off_in); + BUILD_BUG_COMPAT_SQE_ELEM(24, __u32, len); + BUILD_BUG_COMPAT_SQE_ELEM(28, __kernel_rwf_t, rw_flags); + BUILD_BUG_COMPAT_SQE_ELEM(32, __u64, user_data); + BUILD_BUG_COMPAT_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_COMPAT_SQE_ELEM(40, __u16, buf_group); + BUILD_BUG_COMPAT_SQE_ELEM(42, __u16, personality); + BUILD_BUG_COMPAT_SQE_ELEM(44, __s32, splice_fd_in); + BUILD_BUG_COMPAT_SQE_ELEM(44, __u32, file_index); + BUILD_BUG_COMPAT_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_COMPAT_SQE_ELEM(46, __u16, __pad3[0]); + BUILD_BUG_COMPAT_SQE_ELEM(48, __u64, addr3); + BUILD_BUG_COMPAT_SQE_ELEM_SIZE(48, 0, cmd); + BUILD_BUG_COMPAT_SQE_ELEM(56, __u64, __pad2); + + BUILD_BUG_ON(sizeof(struct compat_io_uring_files_update) != + sizeof(struct compat_io_uring_rsrc_update)); + BUILD_BUG_ON(sizeof(struct compat_io_uring_rsrc_update) > + sizeof(struct compat_io_uring_rsrc_update2)); + + BUILD_BUG_ON(offsetof(struct compat_io_uring_buf_ring, bufs) != 0); + BUILD_BUG_ON(offsetof(struct compat_io_uring_buf, resv) != + offsetof(struct compat_io_uring_buf_ring, tail)); +#endif /* CONFIG_COMPAT64 */ + /* should fit into one byte */ BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(SQE_COMMON_FLAGS >= (1 << 8)); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 6d9720dd8f469..bafacea17fc19 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -5,6 +5,7 @@ #include <linux/lockdep.h> #include <linux/io_uring_types.h> #include "io-wq.h" +#include "uring_cmd.h" #include "slist.h" #include "filetable.h"
@@ -24,7 +25,9 @@ enum { IOU_STOP_MULTISHOT = -ECANCELED, };
+ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow); +struct io_uring_cqe *__io_get_ith_cqe(struct io_ring_ctx *ctx, unsigned int i); bool io_req_cqe_overflow(struct io_kiocb *req); int io_run_task_work_sig(struct io_ring_ctx *ctx); int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked); @@ -93,11 +96,61 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx)
void io_cq_unlock_post(struct io_ring_ctx *ctx);
+static inline bool io_in_compat64(struct io_ring_ctx *ctx) +{ + return IS_ENABLED(CONFIG_COMPAT64) && ctx->compat; +} + +static inline void convert_compat64_io_uring_sqe(struct io_ring_ctx *ctx, + struct io_uring_sqe *sqe, + const struct compat_io_uring_sqe *compat_sqe) +{ +/* + * The struct io_uring_sqe contains anonymous unions and there is no field + * keeping track of which union's member is active. Because in all the cases, + * the unions are between integral types and the types are compatible, use the + * largest member of each union to perform the copy. Use this compile-time check + * to ensure that the union's members are not truncated during the conversion. + */ +#define BUILD_BUG_COMPAT_SQE_UNION_ELEM(elem1, elem2) \ + BUILD_BUG_ON(sizeof_field(struct compat_io_uring_sqe, elem1) != \ + (offsetof(struct compat_io_uring_sqe, elem2) - \ + offsetof(struct compat_io_uring_sqe, elem1))) + + sqe->opcode = READ_ONCE(compat_sqe->opcode); + sqe->flags = READ_ONCE(compat_sqe->flags); + sqe->ioprio = READ_ONCE(compat_sqe->ioprio); + sqe->fd = READ_ONCE(compat_sqe->fd); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr2, addr); + sqe->addr2 = READ_ONCE(compat_sqe->addr2); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr, len); + sqe->addr = READ_ONCE(compat_sqe->addr); + sqe->len = READ_ONCE(compat_sqe->len); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(rw_flags, user_data); + sqe->rw_flags = READ_ONCE(compat_sqe->rw_flags); + sqe->user_data = READ_ONCE(compat_sqe->user_data); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(buf_index, personality); + sqe->buf_index = READ_ONCE(compat_sqe->buf_index); + sqe->personality = READ_ONCE(compat_sqe->personality); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(splice_fd_in, addr3); + sqe->splice_fd_in = READ_ONCE(compat_sqe->splice_fd_in); + if (sqe->opcode == IORING_OP_URING_CMD) { + size_t compat_cmd_size = compat_uring_cmd_pdu_size(ctx->flags & + IORING_SETUP_SQE128); + + memcpy(sqe->cmd, compat_sqe->cmd, compat_cmd_size); + } else { + sqe->addr3 = READ_ONCE(compat_sqe->addr3); + sqe->__pad2[0] = READ_ONCE(compat_sqe->__pad2[0]); + } +#undef BUILD_BUG_COMPAT_SQE_UNION_ELEM +} + static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, bool overflow) { if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { - struct io_uring_cqe *cqe = &ctx->cqes[ctx->cqe_cached]; + struct io_uring_cqe *cqe = __io_get_ith_cqe(ctx, ctx->cqe_cached);
ctx->cached_cq_tail++; ctx->cqe_cached++; @@ -114,10 +167,40 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) return io_get_cqe_overflow(ctx, false); }
+static inline void __io_fill_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe *cqe, + u64 user_data, s32 res, u32 cflags, + u64 extra1, u64 extra2) +{ + if (io_in_compat64(ctx)) { + struct compat_io_uring_cqe *compat_cqe = (struct compat_io_uring_cqe *)cqe; + + WRITE_ONCE(compat_cqe->user_data, user_data); + WRITE_ONCE(compat_cqe->res, res); + WRITE_ONCE(compat_cqe->flags, cflags); + + if (ctx->flags & IORING_SETUP_CQE32) { + WRITE_ONCE(compat_cqe->big_cqe[0], extra1); + WRITE_ONCE(compat_cqe->big_cqe[1], extra2); + } + return; + } + + WRITE_ONCE(cqe->user_data, user_data); + WRITE_ONCE(cqe->res, res); + WRITE_ONCE(cqe->flags, cflags); + + if (ctx->flags & IORING_SETUP_CQE32) { + WRITE_ONCE(cqe->big_cqe[0], extra1); + WRITE_ONCE(cqe->big_cqe[1], extra2); + } +} + static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req) { struct io_uring_cqe *cqe; + u64 extra1 = 0; + u64 extra2 = 0;
/* * If we can't get a cq entry, userspace overflowed the @@ -128,24 +211,17 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, if (unlikely(!cqe)) return io_req_cqe_overflow(req);
+ if (ctx->flags & IORING_SETUP_CQE32 && req->flags & REQ_F_CQE32_INIT) { + extra1 = req->extra1; + extra2 = req->extra2; + } + trace_io_uring_complete(req->ctx, req, req->cqe.user_data, req->cqe.res, req->cqe.flags, - (req->flags & REQ_F_CQE32_INIT) ? req->extra1 : 0, - (req->flags & REQ_F_CQE32_INIT) ? req->extra2 : 0); + extra1, extra2);
- memcpy(cqe, &req->cqe, sizeof(*cqe)); - - if (ctx->flags & IORING_SETUP_CQE32) { - u64 extra1 = 0, extra2 = 0; - - if (req->flags & REQ_F_CQE32_INIT) { - extra1 = req->extra1; - extra2 = req->extra2; - } - - WRITE_ONCE(cqe->big_cqe[0], extra1); - WRITE_ONCE(cqe->big_cqe[1], extra2); - } + __io_fill_cqe(ctx, cqe, req->cqe.user_data, req->cqe.res, + req->cqe.flags, extra1, extra2); return true; }
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 182e594b56c6e..110edd1cb84e0 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -16,6 +16,7 @@ #include "kbuf.h"
#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) +#define IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct compat_io_uring_buf))
#define BGID_ARRAY 64
@@ -28,6 +29,32 @@ struct io_provide_buf { __u16 bid; };
+static int get_compat64_io_uring_buf_reg(struct io_uring_buf_reg *reg, + const void __user *user_reg) +{ + struct compat_io_uring_buf_reg compat_reg; + + if (copy_from_user(&compat_reg, user_reg, sizeof(compat_reg))) + return -EFAULT; + reg->ring_addr = compat_reg.ring_addr; + reg->ring_entries = compat_reg.ring_entries; + reg->bgid = compat_reg.bgid; + reg->pad = compat_reg.pad; + memcpy(reg->resv, compat_reg.resv, sizeof(reg->resv)); + return 0; +} + +static int copy_io_uring_buf_reg_from_user(struct io_ring_ctx *ctx, + struct io_uring_buf_reg *reg, + const void __user *arg) +{ + if (io_in_compat64(ctx)) + return get_compat64_io_uring_buf_reg(reg, arg); + if (copy_from_user(reg, arg, sizeof(*reg))) + return -EFAULT; + return 0; +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { @@ -125,6 +152,35 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, return NULL; }
+static void __user *io_ring_buffer_select_compat64(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) +{ + struct compat_io_uring_buf_ring *br = bl->buf_ring_compat; + struct compat_io_uring_buf *buf; + __u16 head = bl->head; + + if (unlikely(smp_load_acquire(&br->tail) == head)) + return NULL; + + head &= bl->mask; + if (head < IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE) { + buf = &br->bufs[head]; + } else { + int off = head & (IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE - 1); + int index = head / IO_BUFFER_LIST_COMPAT_BUF_PER_PAGE; + buf = page_address(bl->buf_pages[index]); + buf += off; + } + if (*len == 0 || *len > buf->len) + *len = buf->len; + req->flags |= REQ_F_BUFFER_RING; + req->buf_list = bl; + req->buf_index = buf->bid; + + return compat_ptr(buf->addr); +} + static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, unsigned int issue_flags) @@ -151,6 +207,23 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->buf_list = bl; req->buf_index = buf->bid;
+ return u64_to_user_ptr(buf->addr); +} + +static void __user *io_ring_buffer_select_any(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) +{ + void __user *ret; + + if (io_in_compat64(req->ctx)) + ret = io_ring_buffer_select_compat64(req, len, bl, issue_flags); + else + ret = io_ring_buffer_select(req, len, bl, issue_flags); + + if (!ret) + return ret; + if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { /* * If we came in unlocked, we have no choice but to consume the @@ -165,7 +238,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->buf_list = NULL; bl->head++; } - return u64_to_user_ptr(buf->addr); + return ret; }
void __user *io_buffer_select(struct io_kiocb *req, size_t *len, @@ -180,7 +253,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, bl = io_buffer_get_list(ctx, req->buf_index); if (likely(bl)) { if (bl->buf_nr_pages) - ret = io_ring_buffer_select(req, len, bl, issue_flags); + ret = io_ring_buffer_select_any(req, len, bl, issue_flags); else ret = io_provided_buffer_select(req, len, bl); } @@ -215,9 +288,12 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return 0;
if (bl->buf_nr_pages) { + __u16 tail = io_in_compat64(ctx) ? + bl->buf_ring_compat->tail : + bl->buf_ring->tail; int j;
- i = bl->buf_ring->tail - bl->head; + i = tail - bl->head; for (j = 0; j < bl->buf_nr_pages; j++) unpin_user_page(bl->buf_pages[j]); kvfree(bl->buf_pages); @@ -469,13 +545,13 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { - struct io_uring_buf_ring *br; struct io_uring_buf_reg reg; struct io_buffer_list *bl, *free_bl = NULL; struct page **pages; + size_t pages_size; int nr_pages;
- if (copy_from_user(®, arg, sizeof(reg))) + if (copy_io_uring_buf_reg_from_user(ctx, ®, arg)) return -EFAULT;
if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) @@ -508,19 +584,19 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -ENOMEM; }
- pages = io_pin_pages(reg.ring_addr, - size_mul(sizeof(struct io_uring_buf), reg.ring_entries), - &nr_pages); + pages_size = io_in_compat64(ctx) ? + size_mul(sizeof(struct compat_io_uring_buf), reg.ring_entries) : + size_mul(sizeof(struct io_uring_buf), reg.ring_entries); + pages = io_pin_pages(reg.ring_addr, pages_size, &nr_pages); if (IS_ERR(pages)) { kfree(free_bl); return PTR_ERR(pages); }
- br = page_address(pages[0]); bl->buf_pages = pages; bl->buf_nr_pages = nr_pages; bl->nr_entries = reg.ring_entries; - bl->buf_ring = br; + bl->buf_ring = page_address(pages[0]); bl->mask = reg.ring_entries - 1; io_buffer_add_list(ctx, bl, reg.bgid); return 0; @@ -531,7 +607,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_uring_buf_reg reg; struct io_buffer_list *bl;
- if (copy_from_user(®, arg, sizeof(reg))) + if (copy_io_uring_buf_reg_from_user(ctx, ®, arg)) return -EFAULT; if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) return -EINVAL; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index c23e15d7d3caf..1aa5bbbc5d628 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -2,6 +2,7 @@ #ifndef IOU_KBUF_H #define IOU_KBUF_H
+#include <linux/io_uring_types.h> #include <uapi/linux/io_uring.h>
struct io_buffer_list { @@ -13,7 +14,10 @@ struct io_buffer_list { struct list_head buf_list; struct { struct page **buf_pages; - struct io_uring_buf_ring *buf_ring; + union { + struct io_uring_buf_ring *buf_ring; + struct compat_io_uring_buf_ring *buf_ring_compat; + }; }; }; __u16 bgid; diff --git a/io_uring/net.c b/io_uring/net.c index c586278858e7e..4c133bc6f9d1d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -4,6 +4,7 @@ #include <linux/file.h> #include <linux/slab.h> #include <linux/net.h> +#include <linux/uio.h> #include <linux/compat.h> #include <net/compat.h> #include <linux/io_uring.h> @@ -435,7 +436,9 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req, } else if (msg.msg_iovlen > 1) { return -EINVAL; } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) + void *iov = iovec_from_user(msg.msg_iov, 1, 1, iomsg->fast_iov, + req->ctx->compat); + if (IS_ERR(iov)) return -EFAULT; sr->len = iomsg->fast_iov[0].iov_len; iomsg->free_iov = NULL; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 41e192de9e8a7..8a2b5891f1030 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -23,6 +23,95 @@ struct io_rsrc_update { u32 offset; };
+static int get_compat64_io_uring_rsrc_update(struct io_uring_rsrc_update2 *up2, + const void __user *user_up) +{ + struct compat_io_uring_rsrc_update compat_up; + + if (copy_from_user(&compat_up, user_up, sizeof(compat_up))) + return -EFAULT; + up2->offset = compat_up.offset; + up2->resv = compat_up.resv; + up2->data = compat_up.data; + return 0; +} + +static int get_compat64_io_uring_rsrc_update2(struct io_uring_rsrc_update2 *up2, + const void __user *user_up2) +{ + struct compat_io_uring_rsrc_update2 compat_up2; + + if (copy_from_user(&compat_up2, user_up2, sizeof(compat_up2))) + return -EFAULT; + up2->offset = compat_up2.offset; + up2->resv = compat_up2.resv; + up2->data = compat_up2.data; + up2->tags = compat_up2.tags; + up2->nr = compat_up2.nr; + up2->resv2 = compat_up2.resv2; + return 0; +} + +static int get_compat64_io_uring_rsrc_register(struct io_uring_rsrc_register *rr, + const void __user *user_rr) +{ + struct compat_io_uring_rsrc_register compat_rr; + + if (copy_from_user(&compat_rr, user_rr, sizeof(compat_rr))) + return -EFAULT; + rr->nr = compat_rr.nr; + rr->flags = compat_rr.flags; + rr->resv2 = compat_rr.resv2; + rr->data = compat_rr.data; + rr->tags = compat_rr.tags; + return 0; +} + +static int copy_io_uring_rsrc_update_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up2, + const void __user *arg) +{ + if (io_in_compat64(ctx)) + return get_compat64_io_uring_rsrc_update(up2, arg); + if (copy_from_user(up2, arg, sizeof(struct io_uring_rsrc_update))) + return -EFAULT; + return 0; +} + +static int copy_io_uring_rsrc_update2_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up2, + const void __user *arg, + size_t size) +{ + if (io_in_compat64(ctx)) { + if (size != sizeof(struct compat_io_uring_rsrc_update2)) + return -EINVAL; + return get_compat64_io_uring_rsrc_update2(up2, arg); + } + if (size != sizeof(*up2)) + return -EINVAL; + if (copy_from_user(up2, arg, sizeof(*up2))) + return -EFAULT; + return 0; +} + +static int copy_io_uring_rsrc_register_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_register *rr, + const void __user *arg, + size_t size) +{ + if (io_in_compat64(ctx)) { + if (size != sizeof(struct compat_io_uring_rsrc_register)) + return -EINVAL; + return get_compat64_io_uring_rsrc_register(rr, arg); + } + if (size != sizeof(*rr)) + return -EINVAL; + if (copy_from_user(rr, arg, size)) + return -EFAULT; + return 0; +} + static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, struct io_mapped_ubuf **pimu, struct page **last_hpage); @@ -601,7 +690,7 @@ int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, if (!nr_args) return -EINVAL; memset(&up, 0, sizeof(up)); - if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) + if (copy_io_uring_rsrc_update_from_user(ctx, &up, arg)) return -EFAULT; if (up.resv || up.resv2) return -EINVAL; @@ -612,11 +701,11 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, unsigned size, unsigned type) { struct io_uring_rsrc_update2 up; + int ret;
- if (size != sizeof(up)) - return -EINVAL; - if (copy_from_user(&up, arg, sizeof(up))) - return -EFAULT; + ret = copy_io_uring_rsrc_update2_from_user(ctx, &up, arg, size); + if (ret) + return ret; if (!up.nr || up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, type, &up, up.nr); @@ -626,14 +715,11 @@ __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, unsigned int size, unsigned int type) { struct io_uring_rsrc_register rr; + int ret;
- /* keep it extendible */ - if (size != sizeof(rr)) - return -EINVAL; - - memset(&rr, 0, sizeof(rr)); - if (copy_from_user(&rr, arg, size)) - return -EFAULT; + ret = copy_io_uring_rsrc_register_from_user(ctx, &rr, arg, size); + if (ret) + return ret; if (!rr.nr || rr.resv2) return -EINVAL; if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE) diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 96f77450cf4e2..20d045b0dd831 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -12,6 +12,30 @@ #include "io_uring.h" #include "tctx.h"
+static int get_compat64_io_uring_rsrc_update(struct io_uring_rsrc_update *up, + const void __user *user_up) +{ + struct compat_io_uring_rsrc_update compat_up; + + if (copy_from_user(&compat_up, user_up, sizeof(compat_up))) + return -EFAULT; + up->offset = compat_up.offset; + up->resv = compat_up.resv; + up->data = compat_up.data; + return 0; +} + +static int copy_io_uring_rsrc_update_ringfd_from_user(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update *up, + const void __user *arg) +{ + if (io_in_compat64(ctx)) + return get_compat64_io_uring_rsrc_update(up, arg); + if (copy_from_user(up, arg, sizeof(struct io_uring_rsrc_update))) + return -EFAULT; + return 0; +} + static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, struct task_struct *task) { @@ -233,6 +257,15 @@ static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, return -EBUSY; }
+static void __user *get_ith_io_uring_rsrc_update(struct io_ring_ctx *ctx, + void __user *__arg, + int i) +{ + if (io_in_compat64(ctx)) + return &((struct compat_io_uring_rsrc_update __user *)__arg)[i]; + return &((struct io_uring_rsrc_update __user *)__arg)[i]; +} + /* * Register a ring fd to avoid fdget/fdput for each io_uring_enter() * invocation. User passes in an array of struct io_uring_rsrc_update @@ -244,8 +277,6 @@ static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, unsigned nr_args) { - struct io_uring_rsrc_update __user *arg = __arg; - struct io_uring_rsrc_update reg; struct io_uring_task *tctx; int ret, i;
@@ -260,9 +291,14 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
tctx = current->io_uring; for (i = 0; i < nr_args; i++) { + void __user *arg; + __u32 __user *arg_offset; + struct io_uring_rsrc_update reg; int start, end;
- if (copy_from_user(®, &arg[i], sizeof(reg))) { + arg = get_ith_io_uring_rsrc_update(ctx, __arg, i); + + if (copy_io_uring_rsrc_update_ringfd_from_user(ctx, ®, arg)) { ret = -EFAULT; break; } @@ -289,7 +325,10 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, break;
reg.offset = ret; - if (put_user(reg.offset, &arg[i].offset)) { + arg_offset = io_in_compat64(ctx) ? + &((struct compat_io_uring_rsrc_update __user *)arg)->offset : + &((struct io_uring_rsrc_update __user *)arg)->offset; + if (put_user(reg.offset, arg_offset)) { fput(tctx->registered_rings[reg.offset]); tctx->registered_rings[reg.offset] = NULL; ret = -EFAULT; @@ -303,9 +342,7 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, unsigned nr_args) { - struct io_uring_rsrc_update __user *arg = __arg; struct io_uring_task *tctx = current->io_uring; - struct io_uring_rsrc_update reg; int ret = 0, i;
if (!nr_args || nr_args > IO_RINGFD_REG_MAX) @@ -314,7 +351,12 @@ int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, return 0;
for (i = 0; i < nr_args; i++) { - if (copy_from_user(®, &arg[i], sizeof(reg))) { + void __user *arg; + struct io_uring_rsrc_update reg; + + arg = get_ith_io_uring_rsrc_update(ctx, __arg, i); + + if (copy_io_uring_rsrc_update_ringfd_from_user(ctx, ®, arg)) { ret = -EFAULT; break; } diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 7c6697d13cb2e..96d8686db8342 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -11,3 +11,7 @@ int io_uring_cmd_prep_async(struct io_kiocb *req); #define uring_cmd_pdu_size(is_sqe128) \ ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \ offsetof(struct io_uring_sqe, cmd)) + +#define compat_uring_cmd_pdu_size(is_sqe128) \ + ((1 + !!(is_sqe128)) * sizeof(struct compat_io_uring_sqe) - \ + offsetof(struct compat_io_uring_sqe, cmd))