Introduce compat versions of the structs exposed in the uAPI headers that might contain pointers as a member. Also, implement functions that convert the compat versions to the native versions of the struct.
Pull cqes member out from rings struct so that we are able to have a union between cqes and cqes_compat. This is done in a similar way to commit 75b28affdd6a ("io_uring: allocate the two rings together"), where sq_array was pulled out from the rings struct.
A subsequent patch is going to change the io_uring structs to enable them to support new architectures. On such architectures, the current struct layout still needs to be supported for compat tasks.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com --- fs/io_uring.c | 502 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 434 insertions(+), 68 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index d901be8da630..2db37c68e715 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -115,6 +115,93 @@
#define IO_TCTX_REFS_CACHE_NR (1U << 10)
+struct compat_io_uring_sqe { + __u8 opcode; + __u8 flags; + __u16 ioprio; + __s32 fd; + union { + __u64 off; + __u64 addr2; + }; + union { + __u64 addr; + __u64 splice_off_in; + }; + __u32 len; + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; + __u32 poll32_events; + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + __u32 fadvise_advice; + __u32 splice_flags; + __u32 rename_flags; + __u32 unlink_flags; + __u32 hardlink_flags; + }; + __u64 user_data; + union { + __u16 buf_index; + __u16 buf_group; + } __packed; + __u16 personality; + union { + __s32 splice_fd_in; + __u32 file_index; + }; + __u64 __pad2[2]; +}; + +struct compat_io_uring_cqe { + __u64 user_data; + __s32 res; + __u32 flags; +}; + +struct compat_io_uring_files_update { + __u32 offset; + __u32 resv; + __aligned_u64 fds; +}; + +struct compat_io_uring_rsrc_register { + __u32 nr; + __u32 resv; + __u64 resv2; + __aligned_u64 data; + __aligned_u64 tags; +}; + +struct compat_io_uring_rsrc_update { + __u32 offset; + __u32 resv; + __aligned_u64 data; +}; + +struct compat_io_uring_rsrc_update2 { + __u32 offset; + __u32 resv; + __aligned_u64 data; + __aligned_u64 tags; + __u32 nr; + __u32 resv2; +}; + +struct compat_io_uring_getevents_arg { + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad; + __u64 ts; +}; + struct io_uring { u32 head ____cacheline_aligned_in_smp; u32 tail ____cacheline_aligned_in_smp; @@ -188,14 +275,6 @@ struct io_rings { * ordered with any other data. */ u32 cq_overflow; - /* - * Ring buffer of completion events. - * - * The kernel writes completion events fresh every time they are - * produced, so the application is allowed to modify pending - * entries. - */ - struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; };
enum io_uring_cmd_flags { @@ -216,7 +295,10 @@ struct io_mapped_ubuf { struct io_ring_ctx;
struct io_overflow_cqe { - struct io_uring_cqe cqe; + union { + struct compat_io_uring_cqe compat_cqe; + struct io_uring_cqe cqe; + }; struct list_head list; };
@@ -345,6 +427,17 @@ struct io_ring_ctx { struct percpu_ref refs;
struct io_rings *rings; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ + union { + struct compat_io_uring_cqe *cqes_compat; + struct io_uring_cqe *cqes; + }; unsigned int flags; unsigned int compat: 1; unsigned int drain_next: 1; @@ -371,7 +464,10 @@ struct io_ring_ctx { * array. */ u32 *sq_array; - struct io_uring_sqe *sq_sqes; + union { + struct compat_io_uring_sqe *sq_sqes_compat; + struct io_uring_sqe *sq_sqes; + }; unsigned cached_sq_head; unsigned sq_entries; struct list_head defer_list; @@ -1804,10 +1900,9 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); }
-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) +static inline void *io_get_cqe(struct io_ring_ctx *ctx) { - struct io_rings *rings = ctx->rings; - unsigned tail, mask = ctx->cq_entries - 1; + unsigned int cq_idx, tail, mask = ctx->cq_entries - 1;
/* * writes to the cq entry need to come after reading head; the @@ -1818,8 +1913,19 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) return NULL;
tail = ctx->cached_cq_tail++; - return &rings->cqes[tail & mask]; + cq_idx = tail & mask; + return ctx->compat ? (void *)&ctx->cqes_compat[cq_idx] : (void *)&ctx->cqes[cq_idx]; +} + +#ifdef CONFIG_COMPAT +static inline void convert_compat_io_uring_cqe(struct io_uring_cqe *cqe, + const struct compat_io_uring_cqe *compat_cqe) +{ + cqe->user_data = READ_ONCE(compat_cqe->user_data); + cqe->res = READ_ONCE(compat_cqe->res); + cqe->flags = READ_ONCE(compat_cqe->flags); } +#endif
static void io_eventfd_signal(struct io_ring_ctx *ctx) { @@ -1896,7 +2002,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) posted = false; spin_lock(&ctx->completion_lock); while (!list_empty(&ctx->cq_overflow_list)) { - struct io_uring_cqe *cqe = io_get_cqe(ctx); + void *cqe = io_get_cqe(ctx); struct io_overflow_cqe *ocqe;
if (!cqe && !force) @@ -1904,7 +2010,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) ocqe = list_first_entry(&ctx->cq_overflow_list, struct io_overflow_cqe, list); if (cqe) - memcpy(cqe, &ocqe->cqe, sizeof(*cqe)); + memcpy(cqe, ctx->compat ? (void *)&ocqe->compat_cqe : (void *)&ocqe->cqe, + ctx->compat ? sizeof(ocqe->compat_cqe) : sizeof(ocqe->cqe)); else io_account_cq_overflow(ctx);
@@ -2010,9 +2117,18 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
} - ocqe->cqe.user_data = user_data; - ocqe->cqe.res = res; - ocqe->cqe.flags = cflags; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + ocqe->compat_cqe.user_data = user_data; + ocqe->compat_cqe.res = res; + ocqe->compat_cqe.flags = cflags; + } else +#endif + { + ocqe->cqe.user_data = user_data; + ocqe->cqe.res = res; + ocqe->cqe.flags = cflags; + } list_add_tail(&ocqe->list, &ctx->cq_overflow_list); return true; } @@ -2020,7 +2136,7 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags) { - struct io_uring_cqe *cqe; + void *cqe;
/* * If we can't get a cq entry, userspace overflowed the @@ -2029,9 +2145,22 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, */ cqe = io_get_cqe(ctx); if (likely(cqe)) { - WRITE_ONCE(cqe->user_data, user_data); - WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, cflags); +#ifdef CONFIG_COMPAT + if (ctx->compat) { + struct compat_io_uring_cqe *compat_cqe = (struct compat_io_uring_cqe *)cqe; + + WRITE_ONCE(compat_cqe->user_data, user_data); + WRITE_ONCE(compat_cqe->res, res); + WRITE_ONCE(compat_cqe->flags, cflags); + } else +#endif + { + struct io_uring_cqe *native_cqe = (struct io_uring_cqe *)cqe; + + WRITE_ONCE(native_cqe->user_data, user_data); + WRITE_ONCE(native_cqe->res, res); + WRITE_ONCE(native_cqe->flags, cflags); + } return true; } return io_cqring_event_overflow(ctx, user_data, res, cflags); @@ -7811,7 +7940,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) * used, it's important that those reads are done through READ_ONCE() to * prevent a re-load down the line. */ -static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) +static const void *io_get_sqe(struct io_ring_ctx *ctx) { unsigned head, mask = ctx->sq_entries - 1; unsigned sq_idx = ctx->cached_sq_head++ & mask; @@ -7826,7 +7955,8 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) */ head = READ_ONCE(ctx->sq_array[sq_idx]); if (likely(head < ctx->sq_entries)) - return &ctx->sq_sqes[head]; + return ctx->compat ? (void *)&ctx->sq_sqes_compat[head] + : (void *)&ctx->sq_sqes[head];
/* drop invalid entries */ ctx->cq_extra--; @@ -7835,6 +7965,40 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) return NULL; }
+#ifdef CONFIG_COMPAT +static void convert_compat_io_uring_sqe(struct io_uring_sqe *sqe, + const struct compat_io_uring_sqe *compat_sqe) +{ +/* + * The struct io_uring_sqe contains anonymous unions and there is no field keeping track of which + * union's member is active. Because in all the cases, the unions are between integral types and + * the types are compatible, use the largest member of each union to perform the copy. Use this + * compile-time check to ensure that the union's members are not truncated during the conversion. + */ +#define BUILD_BUG_COMPAT_SQE_UNION_ELEM(elem1, elem2) \ + BUILD_BUG_ON(sizeof_field(struct compat_io_uring_sqe, elem1) != \ + (offsetof(struct compat_io_uring_sqe, elem2) - \ + offsetof(struct compat_io_uring_sqe, elem1))) + sqe->opcode = READ_ONCE(compat_sqe->opcode); + sqe->flags = READ_ONCE(compat_sqe->flags); + sqe->ioprio = READ_ONCE(compat_sqe->ioprio); + sqe->fd = READ_ONCE(compat_sqe->fd); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr2, addr); + sqe->addr2 = READ_ONCE(compat_sqe->addr2); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr, len); + sqe->addr = READ_ONCE(compat_sqe->addr); + sqe->len = READ_ONCE(compat_sqe->len); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(rw_flags, user_data); + sqe->rw_flags = READ_ONCE(compat_sqe->rw_flags); + sqe->user_data = READ_ONCE(compat_sqe->user_data); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(buf_index, personality); + sqe->buf_index = READ_ONCE(compat_sqe->buf_index); + sqe->personality = READ_ONCE(compat_sqe->personality); + BUILD_BUG_COMPAT_SQE_UNION_ELEM(splice_fd_in, __pad2); + sqe->splice_fd_in = READ_ONCE(compat_sqe->splice_fd_in); +} +#endif + static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) __must_hold(&ctx->uring_lock) { @@ -7849,7 +8013,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
io_submit_state_start(&ctx->submit_state, nr); do { - const struct io_uring_sqe *sqe; + const void *sqe; struct io_kiocb *req;
if (unlikely(!io_alloc_req_refill(ctx))) { @@ -7863,6 +8027,14 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); break; } +#ifdef CONFIG_COMPAT + if (ctx->compat) { + struct io_uring_sqe native_sqe; + + convert_compat_io_uring_sqe(&native_sqe, sqe); + sqe = &native_sqe; + } +#endif /* will complete beyond this point, count as submitted */ submitted++; if (io_submit_sqe(ctx, req, sqe)) { @@ -9389,13 +9561,29 @@ static void *io_mem_alloc(size_t size) return (void *) __get_free_pages(gfp, get_order(size)); }
-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, - size_t *sq_offset) +static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries, + unsigned int cq_entries, size_t *cq_offset, size_t *sq_offset) { struct io_rings *rings; - size_t off, sq_array_size; + size_t off, cq_array_size, sq_array_size; + + off = sizeof(*rings); + +#ifdef CONFIG_SMP + off = ALIGN(off, SMP_CACHE_BYTES); + if (off == 0) + return SIZE_MAX; +#endif + + if (cq_offset) + *cq_offset = off; + + cq_array_size = array_size(ctx->compat ? sizeof(struct compat_io_uring_cqe) + : sizeof(struct io_uring_cqe), cq_entries); + if (cq_array_size == SIZE_MAX) + return SIZE_MAX;
- off = struct_size(rings, cqes, cq_entries); + off = size_add(off, cq_array_size); if (off == SIZE_MAX) return SIZE_MAX;
@@ -9412,7 +9600,8 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, if (sq_array_size == SIZE_MAX) return SIZE_MAX;
- if (check_add_overflow(off, sq_array_size, &off)) + off = size_add(off, sq_array_size); + if (off == SIZE_MAX) return SIZE_MAX;
return off; @@ -9972,7 +10161,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
io_mem_free(ctx->rings); - io_mem_free(ctx->sq_sqes); + io_mem_free(ctx->compat ? (void *)ctx->sq_sqes_compat : (void *)ctx->sq_sqes);
percpu_ref_exit(&ctx->refs); free_uid(ctx->user); @@ -10518,6 +10707,21 @@ static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, return -EBUSY; }
+#ifdef CONFIG_COMPAT +static int get_compat_io_uring_rsrc_update(struct io_uring_rsrc_update *up, + const void __user *user_up) +{ + struct compat_io_uring_rsrc_update compat_up; + + if (unlikely(copy_from_user(&compat_up, user_up, sizeof(compat_up)))) + return -EFAULT; + up->offset = compat_up.offset; + up->resv = compat_up.resv; + up->data = compat_up.data; + return 0; +} +#endif + /* * Register a ring fd to avoid fdget/fdput for each io_uring_enter() * invocation. User passes in an array of struct io_uring_rsrc_update @@ -10547,10 +10751,18 @@ static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, for (i = 0; i < nr_args; i++) { int start, end;
- if (copy_from_user(®, &arg[i], sizeof(reg))) { - ret = -EFAULT; - break; - } +#ifdef CONFIG_COMPAT + if (ctx->compat) { + if (get_compat_io_uring_rsrc_update(®, &arg[i])) { + ret = -EFAULT; + break; + } + } else +#endif + if (copy_from_user(®, &arg[i], sizeof(reg))) { + ret = -EFAULT; + break; + }
if (reg.resv) { ret = -EINVAL; @@ -10599,10 +10811,19 @@ static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, return 0;
for (i = 0; i < nr_args; i++) { - if (copy_from_user(®, &arg[i], sizeof(reg))) { - ret = -EFAULT; - break; - } +#ifdef CONFIG_COMPAT + if (ctx->compat) { + if (get_compat_io_uring_rsrc_update(®, &arg[i])) { + ret = -EFAULT; + break; + } + } else +#endif + if (copy_from_user(®, &arg[i], sizeof(reg))) { + ret = -EFAULT; + break; + } + if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { ret = -EINVAL; break; @@ -10632,7 +10853,7 @@ static void *io_uring_validate_mmap_request(struct file *file, ptr = ctx->rings; break; case IORING_OFF_SQES: - ptr = ctx->sq_sqes; + ptr = ctx->compat ? (void *)ctx->sq_sqes_compat : (void *)ctx->sq_sqes; break; default: return ERR_PTR(-EINVAL); @@ -10706,7 +10927,24 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) return 0; }
-static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, +#ifdef CONFIG_COMPAT +static int get_compat_io_uring_getevents_arg(struct io_uring_getevents_arg *arg, + const void __user *user_arg) +{ + struct compat_io_uring_getevents_arg compat_arg; + + if (unlikely(copy_from_user(&compat_arg, user_arg, sizeof(compat_arg)))) + return -EFAULT; + arg->sigmask = compat_arg.sigmask; + arg->sigmask_sz = compat_arg.sigmask_sz; + arg->pad = compat_arg.pad; + arg->ts = compat_arg.ts; + return 0; +} +#endif + +static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned int flags, const void __user *argp, + size_t *argsz, #ifdef CONFIG_CHERI_PURECAP_UABI struct __kernel_timespec * __capability *ts, const sigset_t * __capability *sig) @@ -10731,10 +10969,21 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz * EXT_ARG is set - ensure we agree on the size of it and copy in our * timespec and sigset_t pointers if good. */ - if (*argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + if (*argsz != sizeof(struct compat_io_uring_getevents_arg)) + return -EINVAL; + if (get_compat_io_uring_getevents_arg(&arg, argp)) + return -EFAULT; + } else +#endif + { + if (*argsz != sizeof(arg)) + return -EINVAL; + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + } + if (arg.pad) return -EINVAL; *sig = u64_to_user_ptr(arg.sigmask); @@ -10827,7 +11076,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, const sigset_t __user *sig; struct __kernel_timespec __user *ts;
- ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); + ret = io_get_ext_arg(ctx, flags, argp, &argsz, &ts, &sig); if (unlikely(ret)) goto out;
@@ -10926,7 +11175,16 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
if (sq_idx > sq_mask) continue; - sqe = &ctx->sq_sqes[sq_idx]; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + struct io_uring_sqe native_sqe; + struct compat_io_uring_sqe *compat_sqe = &ctx->sq_sqes_compat[sq_idx]; + + convert_compat_io_uring_sqe(&native_sqe, compat_sqe); + sqe = &native_sqe; + } else +#endif + sqe = &ctx->sq_sqes[sq_idx]; seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n", sq_idx, sqe->opcode, sqe->fd, sqe->flags, sqe->user_data); @@ -10935,7 +11193,18 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, cq_entries = min(cq_tail - cq_head, ctx->cq_entries); for (i = 0; i < cq_entries; i++) { unsigned int entry = i + cq_head; - struct io_uring_cqe *cqe = &r->cqes[entry & cq_mask]; + unsigned int cq_idx = entry & cq_mask; + struct io_uring_cqe *cqe; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + struct io_uring_cqe native_cqe; + struct compat_io_uring_cqe *compat_cqe = &ctx->cqes_compat[cq_idx]; + + convert_compat_io_uring_cqe(&native_cqe, compat_cqe); + cqe = &native_cqe; + } else +#endif + cqe = &ctx->cqes[cq_idx];
seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n", entry & cq_mask, cqe->user_data, cqe->res, @@ -10998,7 +11267,17 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
seq_puts(m, "CqOverflowList:\n"); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { - struct io_uring_cqe *cqe = &ocqe->cqe; + struct io_uring_cqe *cqe; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + struct io_uring_cqe native_cqe; + struct compat_io_uring_cqe *compat_cqe = &ocqe->compat_cqe; + + convert_compat_io_uring_cqe(&native_cqe, compat_cqe); + cqe = &native_cqe; + } else +#endif + cqe = &ocqe->cqe;
seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", cqe->user_data, cqe->res, cqe->flags); @@ -11036,13 +11315,14 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, struct io_uring_params *p) { struct io_rings *rings; - size_t size, sq_array_offset; + size_t size, cqes_offset, sq_array_offset; + void *cqes, *sqes;
/* make sure these are sane, as we already accounted them */ ctx->sq_entries = p->sq_entries; ctx->cq_entries = p->cq_entries;
- size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); + size = rings_size(ctx, p->sq_entries, p->cq_entries, &cqes_offset, &sq_array_offset); if (size == SIZE_MAX) return -EOVERFLOW;
@@ -11051,25 +11331,40 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, return -ENOMEM;
ctx->rings = rings; + cqes = (char *)rings + cqes_offset; +#ifdef CONFIG_COMPAT + if (ctx->compat) + ctx->cqes_compat = (struct compat_io_uring_cqe *)cqes; + else +#endif + ctx->cqes = (struct io_uring_cqe *)cqes; + ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); rings->sq_ring_mask = p->sq_entries - 1; rings->cq_ring_mask = p->cq_entries - 1; rings->sq_ring_entries = p->sq_entries; rings->cq_ring_entries = p->cq_entries;
- size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + size = array_size(ctx->compat ? sizeof(struct compat_io_uring_sqe) + : sizeof(struct io_uring_sqe), p->sq_entries); if (size == SIZE_MAX) { io_mem_free(ctx->rings); ctx->rings = NULL; return -EOVERFLOW; }
- ctx->sq_sqes = io_mem_alloc(size); - if (!ctx->sq_sqes) { + sqes = io_mem_alloc(size); + if (!sqes) { io_mem_free(ctx->rings); ctx->rings = NULL; return -ENOMEM; } +#ifdef CONFIG_COMPAT + if (ctx->compat) + ctx->sq_sqes_compat = (struct compat_io_uring_sqe *)sqes; + else +#endif + ctx->sq_sqes = (struct io_uring_sqe *)sqes;
return 0; } @@ -11210,7 +11505,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); - p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->cq_off.cqes = (char *)ctx->cqes - (char *)ctx->rings; p->cq_off.flags = offsetof(struct io_rings, cq_flags);
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | @@ -11453,39 +11748,103 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, if (!nr_args) return -EINVAL; memset(&up, 0, sizeof(up)); - if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) - return -EFAULT; + +#ifdef CONFIG_COMPAT + if (ctx->compat) { + if (get_compat_io_uring_rsrc_update((struct io_uring_rsrc_update *)&up, arg)) + return -EFAULT; + } else +#endif + if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) + return -EFAULT; + if (up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); }
+#ifdef CONFIG_COMPAT +static int get_compat_io_uring_rsrc_update2(struct io_uring_rsrc_update2 *up2, + const void __user *user_up2) +{ + struct compat_io_uring_rsrc_update2 compat_up2; + + if (unlikely(copy_from_user(&compat_up2, user_up2, sizeof(compat_up2)))) + return -EFAULT; + up2->offset = compat_up2.offset; + up2->resv = compat_up2.resv; + up2->data = compat_up2.data; + up2->tags = compat_up2.tags; + up2->nr = compat_up2.nr; + up2->resv2 = compat_up2.resv2; + return 0; +} +#endif + static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, unsigned size, unsigned type) { struct io_uring_rsrc_update2 up;
- if (size != sizeof(up)) - return -EINVAL; - if (copy_from_user(&up, arg, sizeof(up))) - return -EFAULT; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + if (size != sizeof(struct compat_io_uring_rsrc_update2)) + return -EINVAL; + if (get_compat_io_uring_rsrc_update2(&up, arg)) + return -EFAULT; + } else +#endif + { + if (size != sizeof(up)) + return -EINVAL; + if (copy_from_user(&up, arg, sizeof(up))) + return -EFAULT; + } + if (!up.nr || up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, type, &up, up.nr); }
+ +#ifdef CONFIG_COMPAT +static int get_compat_io_uring_rsrc_register(struct io_uring_rsrc_register *rr, + const void __user *user_rr) +{ + struct compat_io_uring_rsrc_register compat_rr; + + if (unlikely(copy_from_user(&compat_rr, user_rr, sizeof(compat_rr)))) + return -EFAULT; + rr->nr = compat_rr.nr; + rr->resv = compat_rr.resv; + rr->resv2 = compat_rr.resv2; + rr->data = compat_rr.data; + rr->tags = compat_rr.tags; + return 0; +} +#endif + static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, unsigned int size, unsigned int type) { struct io_uring_rsrc_register rr;
- /* keep it extendible */ - if (size != sizeof(rr)) - return -EINVAL; +#ifdef CONFIG_COMPAT + if (ctx->compat) { + if (size != sizeof(struct compat_io_uring_rsrc_register)) + return -EINVAL; + if (get_compat_io_uring_rsrc_register(&rr, arg)) + return -EFAULT; + } else +#endif + { + /* keep it extendible */ + if (size != sizeof(rr)) + return -EINVAL; + if (copy_from_user(&rr, arg, size)) + return -EFAULT; + }
- memset(&rr, 0, sizeof(rr)); - if (copy_from_user(&rr, arg, size)) - return -EFAULT; if (!rr.nr || rr.resv || rr.resv2) return -EINVAL;
@@ -11799,6 +12158,13 @@ static int __init io_uring_init(void) BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ } while (0)
+ BUILD_BUG_ON(__builtin_types_compatible_p(struct io_uring_sqe, struct compat_io_uring_sqe)); + + BUILD_BUG_ON(sizeof(struct compat_io_uring_files_update) != + sizeof(struct compat_io_uring_rsrc_update)); + BUILD_BUG_ON(sizeof(struct compat_io_uring_rsrc_update) > + sizeof(struct compat_io_uring_rsrc_update2)); + #define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);