Some members of the io_uring uAPI structs may contain user pointers. In the PCuABI, a user pointer is a 129-bit capability, so the __u64 type is not big enough to hold it. Use the __kernel_uintptr_t type instead, which is big enough on the affected architectures while remaining 64-bit on others.
The user_data field must be passed unchanged from the submission queue to the completion queue. As it is standard practice to store a pointer in user_data, expand the field to __kernel_uintptr_t. However, the kernel doesn't dereference the user_data, so don't convert it in the compat case.
In addition, for the io_uring structs containing user pointers, use the special copy routines when copying user pointers from/to userspace.
In the case of operation IORING_OP_POLL_REMOVE, if IORING_POLL_UPDATE_USER_DATA is set in the SQE len field, then the request will update the user_data of an existing poll request based on the value passed in the addr2 field, instead of the off field. This is required because the off field is not large enough to fit a user_data value.
Note that the structs io_uring_sqe and io_uring_cqe are doubled in size in PCuABI. The setup flags IORING_SETUP_SQE128 and IORING_SETUP_CQE32 used to double the sizes of the two structs up to 128 bytes and 32 bytes respectively. In PCuABI, the two flags are still used to double the sizes of the two structs, but, as they increased in size, they increase up to 256 bytes and 64 bytes.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com --- include/linux/io_uring_types.h | 4 +- include/trace/events/io_uring.h | 46 ++++++++++---------- include/uapi/linux/io_uring.h | 76 ++++++++++++++++++--------------- io_uring/advise.c | 7 +-- io_uring/cancel.c | 6 +-- io_uring/cancel.h | 2 +- io_uring/epoll.c | 2 +- io_uring/fdinfo.c | 8 ++-- io_uring/fs.c | 16 +++---- io_uring/io_uring.c | 62 +++++++++++++++++++++++---- io_uring/io_uring.h | 25 ++++++----- io_uring/kbuf.c | 19 +++++---- io_uring/kbuf.h | 2 +- io_uring/msg_ring.c | 4 +- io_uring/net.c | 20 ++++----- io_uring/openclose.c | 4 +- io_uring/poll.c | 6 +-- io_uring/rsrc.c | 44 +++++++++---------- io_uring/rw.c | 18 ++++---- io_uring/statx.c | 4 +- io_uring/tctx.c | 4 +- io_uring/timeout.c | 10 ++--- io_uring/uring_cmd.c | 5 +++ io_uring/xattr.c | 12 +++--- 24 files changed, 235 insertions(+), 171 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f0eb34ad8b709..186504cfb2f9a 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -600,8 +600,8 @@ struct io_task_work { };
struct io_cqe { - __u64 user_data; - __s32 res; + __kernel_uintptr_t user_data; + __s32 res; /* fd initially, then cflags for completion */ union { __u32 flags; diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 936fd41bf147e..846e762d8a0ea 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -112,10 +112,10 @@ TRACE_EVENT(io_uring_file_get, TP_ARGS(req, fd),
TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) - __field( u64, user_data ) - __field( int, fd ) + __field( void *, ctx ) + __field( void *, req ) + __field( __kernel_uintptr_t, user_data ) + __field( int, fd ) ),
TP_fast_assign( @@ -146,7 +146,7 @@ TRACE_EVENT(io_uring_queue_async_work, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( u64, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( u8, opcode ) __field( unsigned int, flags ) __field( struct io_wq_work *, work ) @@ -190,7 +190,7 @@ TRACE_EVENT(io_uring_defer, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( unsigned long long, data ) + __field( __kernel_uintptr_t, data ) __field( u8, opcode )
__string( op_str, io_uring_get_opcode(req->opcode) ) @@ -289,7 +289,7 @@ TRACE_EVENT(io_uring_fail_link, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( unsigned long long, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( u8, opcode ) __field( void *, link )
@@ -325,19 +325,19 @@ TRACE_EVENT(io_uring_fail_link, */ TRACE_EVENT(io_uring_complete,
- TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags, + TP_PROTO(void *ctx, void *req, __kernel_uintptr_t user_data, int res, unsigned cflags, u64 extra1, u64 extra2),
TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
TP_STRUCT__entry ( - __field( void *, ctx ) - __field( void *, req ) - __field( u64, user_data ) - __field( int, res ) - __field( unsigned, cflags ) - __field( u64, extra1 ) - __field( u64, extra2 ) + __field( void *, ctx ) + __field( void *, req ) + __field( __kernel_uintptr_t, user_data ) + __field( int, res ) + __field( unsigned, cflags ) + __field( u64, extra1 ) + __field( u64, extra2 ) ),
TP_fast_assign( @@ -377,7 +377,7 @@ TRACE_EVENT(io_uring_submit_sqe, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( unsigned long long, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( u8, opcode ) __field( u32, flags ) __field( bool, force_nonblock ) @@ -423,7 +423,7 @@ TRACE_EVENT(io_uring_poll_arm, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( unsigned long long, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( u8, opcode ) __field( int, mask ) __field( int, events ) @@ -464,7 +464,7 @@ TRACE_EVENT(io_uring_task_add, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( unsigned long long, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( u8, opcode ) __field( int, mask )
@@ -505,19 +505,19 @@ TRACE_EVENT(io_uring_req_failed, TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( unsigned long long, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( u8, opcode ) __field( u8, flags ) __field( u8, ioprio ) __field( u64, off ) - __field( u64, addr ) + __field( __kernel_uintptr_t, addr ) __field( u32, len ) __field( u32, op_flags ) __field( u16, buf_index ) __field( u16, personality ) __field( u32, file_index ) __field( u64, pad1 ) - __field( u64, addr3 ) + __field( __kernel_uintptr_t, addr3 ) __field( int, error )
__string( op_str, io_uring_get_opcode(sqe->opcode) ) @@ -573,14 +573,14 @@ TRACE_EVENT(io_uring_req_failed, */ TRACE_EVENT(io_uring_cqe_overflow,
- TP_PROTO(void *ctx, unsigned long long user_data, s32 res, u32 cflags, + TP_PROTO(void *ctx, __kernel_uintptr_t user_data, s32 res, u32 cflags, void *ocqe),
TP_ARGS(ctx, user_data, res, cflags, ocqe),
TP_STRUCT__entry ( __field( void *, ctx ) - __field( unsigned long long, user_data ) + __field( __kernel_uintptr_t, user_data ) __field( s32, res ) __field( u32, cflags ) __field( void *, ocqe ) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2df3225b562fa..121c9aef5ad00 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -11,6 +11,11 @@ #include <linux/fs.h> #include <linux/types.h> #include <linux/time_types.h> +#ifdef __KERNEL__ +#include <linux/stddef.h> /* for offsetof */ +#else +#include <stddef.h> /* for offsetof */ +#endif
#ifdef __cplusplus extern "C" { @@ -25,16 +30,16 @@ struct io_uring_sqe { __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ union { - __u64 off; /* offset into file */ - __u64 addr2; + __u64 off; /* offset into file */ + __kernel_uintptr_t addr2; struct { __u32 cmd_op; __u32 __pad1; }; }; union { - __u64 addr; /* pointer to buffer or iovecs */ - __u64 splice_off_in; + __kernel_uintptr_t addr; /* pointer to buffer or iovecs */ + __u64 splice_off_in; }; __u32 len; /* buffer size or number of iovecs */ union { @@ -58,7 +63,7 @@ struct io_uring_sqe { __u32 msg_ring_flags; __u32 uring_cmd_flags; }; - __u64 user_data; /* data to be passed back at completion time */ + __kernel_uintptr_t user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ union { /* index into fixed buffers, if used */ @@ -78,12 +83,14 @@ struct io_uring_sqe { }; union { struct { - __u64 addr3; - __u64 __pad2[1]; + __kernel_uintptr_t addr3; + __kernel_uintptr_t __pad2[1]; }; /* * If the ring is initialized with IORING_SETUP_SQE128, then - * this field is used for 80 bytes of arbitrary command data + * this field is used to double the size of the + * struct io_uring_sqe to store bytes of arbitrary + * command data, i.e. 80 bytes or 160 bytes in PCuABI */ __u8 cmd[0]; }; @@ -326,13 +333,14 @@ enum { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ - __s32 res; /* result code for this event */ - __u32 flags; + __kernel_uintptr_t user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags;
/* * If the ring is initialized with IORING_SETUP_CQE32, then this field - * contains 16-bytes of padding, doubling the size of the CQE. + * doubles the size of the CQE, i.e. contains 16 bytes, or in PCuABI, + * 32 bytes of padding. */ __u64 big_cqe[]; }; @@ -504,7 +512,7 @@ enum { struct io_uring_files_update { __u32 offset; __u32 resv; - __aligned_u64 /* __s32 * */ fds; + __kernel_aligned_uintptr_t /* __s32 * */ fds; };
/* @@ -517,21 +525,21 @@ struct io_uring_rsrc_register { __u32 nr; __u32 flags; __u64 resv2; - __aligned_u64 data; - __aligned_u64 tags; + __kernel_aligned_uintptr_t data; + __kernel_aligned_uintptr_t tags; };
struct io_uring_rsrc_update { __u32 offset; __u32 resv; - __aligned_u64 data; + __kernel_aligned_uintptr_t data; };
struct io_uring_rsrc_update2 { __u32 offset; __u32 resv; - __aligned_u64 data; - __aligned_u64 tags; + __kernel_aligned_uintptr_t data; + __kernel_aligned_uintptr_t tags; __u32 nr; __u32 resv2; }; @@ -581,10 +589,10 @@ struct io_uring_restriction { };
struct io_uring_buf { - __u64 addr; - __u32 len; - __u16 bid; - __u16 resv; + __kernel_uintptr_t addr; + __u32 len; + __u16 bid; + __u16 resv; };
struct io_uring_buf_ring { @@ -594,9 +602,7 @@ struct io_uring_buf_ring { * ring tail is overlaid with the io_uring_buf->resv field. */ struct { - __u64 resv1; - __u32 resv2; - __u16 resv3; + __u8 resv[offsetof(struct io_uring_buf, resv)]; __u16 tail; }; struct io_uring_buf bufs[0]; @@ -605,11 +611,11 @@ struct io_uring_buf_ring {
/* argument for IORING_(UN)REGISTER_PBUF_RING */ struct io_uring_buf_reg { - __u64 ring_addr; - __u32 ring_entries; - __u16 bgid; - __u16 pad; - __u64 resv[3]; + __kernel_uintptr_t ring_addr; + __u32 ring_entries; + __u16 bgid; + __u16 pad; + __u64 resv[3]; };
/* @@ -632,17 +638,17 @@ enum { };
struct io_uring_getevents_arg { - __u64 sigmask; - __u32 sigmask_sz; - __u32 pad; - __u64 ts; + __kernel_uintptr_t sigmask; + __u32 sigmask_sz; + __u32 pad; + __kernel_uintptr_t ts; };
/* * Argument for IORING_REGISTER_SYNC_CANCEL */ struct io_uring_sync_cancel_reg { - __u64 addr; + __kernel_uintptr_t addr; __s32 fd; __u32 flags; struct __kernel_timespec timeout; diff --git a/io_uring/advise.c b/io_uring/advise.c index 449c6f14649f7..05fd3bbaf8090 100644 --- a/io_uring/advise.c +++ b/io_uring/advise.c @@ -23,7 +23,7 @@ struct io_fadvise {
struct io_madvise { struct file *file; - u64 addr; + void __user *addr; u32 len; u32 advice; }; @@ -36,7 +36,7 @@ int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->buf_index || sqe->off || sqe->splice_fd_in) return -EINVAL;
- ma->addr = READ_ONCE(sqe->addr); + ma->addr = (void __user *)READ_ONCE(sqe->addr); ma->len = READ_ONCE(sqe->len); ma->advice = READ_ONCE(sqe->fadvise_advice); return 0; @@ -54,7 +54,8 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN;
- ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); + /* TODO [PCuABI] - capability checks for uaccess */ + ret = do_madvise(current->mm, user_ptr_addr(ma->addr), ma->len, ma->advice); io_req_set_res(req, ret, 0); return IOU_OK; #else diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 8382ea03fe899..dd642da52233f 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -19,7 +19,7 @@
struct io_cancel { struct file *file; - u64 addr; + __kernel_uintptr_t addr; u32 flags; s32 fd; }; @@ -34,7 +34,7 @@ static int get_compat64_io_uring_sync_cancel_reg(struct io_uring_sync_cancel_reg
if (copy_from_user(&compat_sc, user_sc, sizeof(compat_sc))) return -EFAULT; - sc->addr = compat_sc.addr; + sc->addr = (__kernel_uintptr_t)compat_sc.addr; sc->fd = compat_sc.fd; sc->flags = compat_sc.flags; sc->timeout = compat_sc.timeout; @@ -48,7 +48,7 @@ static int copy_io_uring_sync_cancel_reg_from_user(struct io_ring_ctx *ctx, { if (is_compat64_io_ring_ctx(ctx)) return get_compat64_io_uring_sync_cancel_reg(sc, arg); - return copy_from_user(sc, arg, sizeof(*sc)); + return copy_from_user_with_ptr(sc, arg, sizeof(*sc)); }
static bool io_cancel_cb(struct io_wq_work *work, void *data) diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 6a59ee484d0cc..7c1249d61bf25 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -5,7 +5,7 @@ struct io_cancel_data { struct io_ring_ctx *ctx; union { - u64 data; + __kernel_uintptr_t data; struct file *file; }; u32 flags; diff --git a/io_uring/epoll.c b/io_uring/epoll.c index d5580ff465c3e..d9d5983f823c2 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -39,7 +39,7 @@ int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ep_op_has_event(epoll->op)) { struct epoll_event __user *ev;
- ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ev = (struct epoll_event __user *)READ_ONCE(sqe->addr); if (copy_epoll_event_from_user(&epoll->event, ev, req->ctx->compat)) return -EFAULT; } diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index c724e6c544809..e5442e0ddbc8b 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -102,7 +102,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, sq_idx, io_uring_get_opcode((sqe)->opcode), (sqe)->fd, \ (sqe)->flags, (unsigned long long) (sqe)->off, \ (unsigned long long) (sqe)->addr, (sqe)->rw_flags, \ - (sqe)->buf_index, (sqe)->user_data); \ + (sqe)->buf_index, (unsigned long long)(sqe)->user_data); \ if (sq_shift) { \ u64 *sqeb = (void *) ((sqe) + 1); \ int size = sizeof(*(sqe)) / sizeof(u64); \ @@ -133,7 +133,8 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, #define print_cqe(cqe) \ do { \ seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x", \ - entry & cq_mask, (cqe)->user_data, (cqe)->res, \ + entry & cq_mask, \ + (unsigned long long) (cqe)->user_data, (cqe)->res, \ (cqe)->flags); \ if (cq_shift) \ seq_printf(m, ", extra1:%llu, extra2:%llu\n", \ @@ -210,7 +211,8 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct io_uring_cqe *cqe = &ocqe->cqe;
seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", - (cqe)->user_data, (cqe)->res, (cqe)->flags); + (unsigned long long) cqe->user_data, cqe->res, + cqe->flags); }
spin_unlock(&ctx->completion_lock); diff --git a/io_uring/fs.c b/io_uring/fs.c index 7100c293c13a8..2e01e7da1d4ba 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -58,8 +58,8 @@ int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF;
ren->old_dfd = READ_ONCE(sqe->fd); - oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); - newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + oldf = (char __user *)READ_ONCE(sqe->addr); + newf = (char __user *)READ_ONCE(sqe->addr2); ren->new_dfd = READ_ONCE(sqe->len); ren->flags = READ_ONCE(sqe->rename_flags);
@@ -117,7 +117,7 @@ int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (un->flags & ~AT_REMOVEDIR) return -EINVAL;
- fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + fname = (char __user *)READ_ONCE(sqe->addr); un->filename = getname(fname); if (IS_ERR(un->filename)) return PTR_ERR(un->filename); @@ -164,7 +164,7 @@ int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) mkd->dfd = READ_ONCE(sqe->fd); mkd->mode = READ_ONCE(sqe->len);
- fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + fname = (char __user *)READ_ONCE(sqe->addr); mkd->filename = getname(fname); if (IS_ERR(mkd->filename)) return PTR_ERR(mkd->filename); @@ -206,8 +206,8 @@ int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF;
sl->new_dfd = READ_ONCE(sqe->fd); - oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); - newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + oldpath = (char __user *)READ_ONCE(sqe->addr); + newpath = (char __user *)READ_ONCE(sqe->addr2);
sl->oldpath = getname(oldpath); if (IS_ERR(sl->oldpath)) @@ -250,8 +250,8 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
lnk->old_dfd = READ_ONCE(sqe->fd); lnk->new_dfd = READ_ONCE(sqe->len); - oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); - newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + oldf = (char __user *)READ_ONCE(sqe->addr); + newf = (char __user *)READ_ONCE(sqe->addr2); lnk->flags = READ_ONCE(sqe->hardlink_flags);
lnk->oldpath = getname(oldf); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d4710672b4fc7..98179f01cd12b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -159,10 +159,10 @@ static int get_compat64_io_uring_getevents_arg(struct io_uring_getevents_arg *ar
if (copy_from_user(&compat_arg, user_arg, sizeof(compat_arg))) return -EFAULT; - arg->sigmask = compat_arg.sigmask; + arg->sigmask = (__kernel_uintptr_t)compat_ptr(compat_arg.sigmask); arg->sigmask_sz = compat_arg.sigmask_sz; arg->pad = compat_arg.pad; - arg->ts = compat_arg.ts; + arg->ts = (__kernel_uintptr_t)compat_ptr(compat_arg.ts); return 0; }
@@ -178,7 +178,7 @@ static int copy_io_uring_getevents_arg_from_user(struct io_ring_ctx *ctx, } if (size != sizeof(*arg)) return -EINVAL; - return copy_from_user(arg, argp, sizeof(*arg)); + return copy_from_user_with_ptr(arg, argp, sizeof(*arg)); }
struct sock *io_uring_get_socket(struct file *file) @@ -721,7 +721,7 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task) } }
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, +static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, __kernel_uintptr_t user_data, s32 res, u32 cflags, u64 extra1, u64 extra2) { struct io_overflow_cqe *ocqe; @@ -816,8 +816,8 @@ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow) return cqe; }
-bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags, - bool allow_overflow) +bool io_fill_cqe_aux(struct io_ring_ctx *ctx, __kernel_uintptr_t user_data, + s32 res, u32 cflags, bool allow_overflow) { struct io_uring_cqe *cqe;
@@ -843,7 +843,7 @@ bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags }
bool io_post_aux_cqe(struct io_ring_ctx *ctx, - u64 user_data, s32 res, u32 cflags, + __kernel_uintptr_t user_data, s32 res, u32 cflags, bool allow_overflow) { bool filled; @@ -3214,9 +3214,9 @@ static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned int flags, return ret; if (arg.pad) return -EINVAL; - *sig = u64_to_user_ptr(arg.sigmask); + *sig = (sigset_t __user *)arg.sigmask; *argsz = arg.sigmask_sz; - *ts = u64_to_user_ptr(arg.ts); + *ts = (struct __kernel_timespec __user *)arg.ts; return 0; }
@@ -4159,6 +4159,49 @@ static int __init io_uring_init(void) __BUILD_BUG_VERIFY_OFFSET_SIZE(struct io_uring_sqe, eoffset, sizeof(etype), ename) #define BUILD_BUG_SQE_ELEM_SIZE(eoffset, esize, ename) \ __BUILD_BUG_VERIFY_OFFSET_SIZE(struct io_uring_sqe, eoffset, esize, ename) +#ifdef CONFIG_CHERI_PURECAP_UABI + BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 128); + BUILD_BUG_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_SQE_ELEM(1, __u8, flags); + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_SQE_ELEM(4, __s32, fd); + BUILD_BUG_SQE_ELEM(16, __u64, off); + BUILD_BUG_SQE_ELEM(16, __uintcap_t, addr2); + BUILD_BUG_SQE_ELEM(16, __u32, cmd_op); + BUILD_BUG_SQE_ELEM(20, __u32, __pad1); + BUILD_BUG_SQE_ELEM(32, __uintcap_t, addr); + BUILD_BUG_SQE_ELEM(32, __u64, splice_off_in); + BUILD_BUG_SQE_ELEM(48, __u32, len); + BUILD_BUG_SQE_ELEM(52, __kernel_rwf_t, rw_flags); + BUILD_BUG_SQE_ELEM(52, __u32, fsync_flags); + BUILD_BUG_SQE_ELEM(52, __u16, poll_events); + BUILD_BUG_SQE_ELEM(52, __u32, poll32_events); + BUILD_BUG_SQE_ELEM(52, __u32, sync_range_flags); + BUILD_BUG_SQE_ELEM(52, __u32, msg_flags); + BUILD_BUG_SQE_ELEM(52, __u32, timeout_flags); + BUILD_BUG_SQE_ELEM(52, __u32, accept_flags); + BUILD_BUG_SQE_ELEM(52, __u32, cancel_flags); + BUILD_BUG_SQE_ELEM(52, __u32, open_flags); + BUILD_BUG_SQE_ELEM(52, __u32, statx_flags); + BUILD_BUG_SQE_ELEM(52, __u32, fadvise_advice); + BUILD_BUG_SQE_ELEM(52, __u32, splice_flags); + BUILD_BUG_SQE_ELEM(52, __u32, rename_flags); + BUILD_BUG_SQE_ELEM(52, __u32, unlink_flags); + BUILD_BUG_SQE_ELEM(52, __u32, hardlink_flags); + BUILD_BUG_SQE_ELEM(52, __u32, xattr_flags); + BUILD_BUG_SQE_ELEM(52, __u32, msg_ring_flags); + BUILD_BUG_SQE_ELEM(64, __uintcap_t, user_data); + BUILD_BUG_SQE_ELEM(80, __u16, buf_index); + BUILD_BUG_SQE_ELEM(80, __u16, buf_group); + BUILD_BUG_SQE_ELEM(82, __u16, personality); + BUILD_BUG_SQE_ELEM(84, __s32, splice_fd_in); + BUILD_BUG_SQE_ELEM(84, __u32, file_index); + BUILD_BUG_SQE_ELEM(84, __u16, addr_len); + BUILD_BUG_SQE_ELEM(86, __u16, __pad3[0]); + BUILD_BUG_SQE_ELEM(96, __uintcap_t, addr3); + BUILD_BUG_SQE_ELEM_SIZE(96, 0, cmd); + BUILD_BUG_SQE_ELEM(112, __uintcap_t, __pad2); +#else /* !CONFIG_CHERI_PURECAP_UABI */ BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); BUILD_BUG_SQE_ELEM(0, __u8, opcode); BUILD_BUG_SQE_ELEM(1, __u8, flags); @@ -4202,6 +4245,7 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); BUILD_BUG_SQE_ELEM(56, __u64, __pad2); +#endif /* !CONFIG_CHERI_PURECAP_UABI */
BUILD_BUG_ON(sizeof(struct io_uring_files_update) != sizeof(struct io_uring_rsrc_update)); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index b44ad558137be..ad6b8d79e98de 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -34,10 +34,10 @@ void io_req_complete_failed(struct io_kiocb *req, s32 res); void __io_req_complete(struct io_kiocb *req, unsigned issue_flags); void io_req_complete_post(struct io_kiocb *req); void __io_req_complete_post(struct io_kiocb *req); -bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags, - bool allow_overflow); -bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags, - bool allow_overflow); +bool io_post_aux_cqe(struct io_ring_ctx *ctx, __kernel_uintptr_t user_data, + s32 res, u32 cflags, bool allow_overflow); +bool io_fill_cqe_aux(struct io_ring_ctx *ctx, __kernel_uintptr_t user_data, + s32 res, u32 cflags, bool allow_overflow); void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); @@ -120,13 +120,13 @@ static inline void convert_compat64_io_uring_sqe(struct io_ring_ctx *ctx, sqe->ioprio = READ_ONCE(compat_sqe->ioprio); sqe->fd = READ_ONCE(compat_sqe->fd); BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr2, addr); - sqe->addr2 = READ_ONCE(compat_sqe->addr2); + sqe->addr2 = (__kernel_uintptr_t)compat_ptr(READ_ONCE(compat_sqe->addr2)); BUILD_BUG_COMPAT_SQE_UNION_ELEM(addr, len); - sqe->addr = READ_ONCE(compat_sqe->addr); + sqe->addr = (__kernel_uintptr_t)compat_ptr(READ_ONCE(compat_sqe->addr)); sqe->len = READ_ONCE(compat_sqe->len); BUILD_BUG_COMPAT_SQE_UNION_ELEM(rw_flags, user_data); sqe->rw_flags = READ_ONCE(compat_sqe->rw_flags); - sqe->user_data = READ_ONCE(compat_sqe->user_data); + sqe->user_data = (__kernel_uintptr_t)READ_ONCE(compat_sqe->user_data); BUILD_BUG_COMPAT_SQE_UNION_ELEM(buf_index, personality); sqe->buf_index = READ_ONCE(compat_sqe->buf_index); sqe->personality = READ_ONCE(compat_sqe->personality); @@ -136,9 +136,14 @@ static inline void convert_compat64_io_uring_sqe(struct io_ring_ctx *ctx, size_t compat_cmd_size = compat_uring_cmd_pdu_size(ctx->flags & IORING_SETUP_SQE128);
+ /* + * Note that sqe->cmd is bigger than compat_sqe->cmd, but + * uring_cmd handlers are not using that extra data in the + * compat mode, so the end of sqe->cmd is left uninitialised. + */ memcpy(sqe->cmd, compat_sqe->cmd, compat_cmd_size); } else { - sqe->addr3 = READ_ONCE(compat_sqe->addr3); + sqe->addr3 = (__kernel_uintptr_t)compat_ptr(READ_ONCE(compat_sqe->addr3)); sqe->__pad2[0] = READ_ONCE(compat_sqe->__pad2[0]); } #undef BUILD_BUG_COMPAT_SQE_UNION_ELEM @@ -169,13 +174,13 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) }
static inline void __io_fill_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe *cqe, - u64 user_data, s32 res, u32 cflags, + __kernel_uintptr_t user_data, s32 res, u32 cflags, u64 extra1, u64 extra2) { if (is_compat64_io_ring_ctx(ctx)) { struct compat_io_uring_cqe *compat_cqe = (struct compat_io_uring_cqe *)cqe;
- WRITE_ONCE(compat_cqe->user_data, user_data); + WRITE_ONCE(compat_cqe->user_data, (__u64)user_data); WRITE_ONCE(compat_cqe->res, res); WRITE_ONCE(compat_cqe->flags, cflags);
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index b388592e67df9..4614ab633c4bd 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -22,7 +22,7 @@
struct io_provide_buf { struct file *file; - __u64 addr; + void __user *addr; __u32 len; __u32 bgid; __u16 nbufs; @@ -36,7 +36,7 @@ static int get_compat64_io_uring_buf_reg(struct io_uring_buf_reg *reg,
if (copy_from_user(&compat_reg, user_reg, sizeof(compat_reg))) return -EFAULT; - reg->ring_addr = compat_reg.ring_addr; + reg->ring_addr = (__kernel_uintptr_t)compat_ptr(compat_reg.ring_addr); reg->ring_entries = compat_reg.ring_entries; reg->bgid = compat_reg.bgid; reg->pad = compat_reg.pad; @@ -50,7 +50,7 @@ static int copy_io_uring_buf_reg_from_user(struct io_ring_ctx *ctx, { if (is_compat64_io_ring_ctx(ctx)) return get_compat64_io_uring_buf_reg(reg, arg); - return copy_from_user(reg, arg, sizeof(*reg)); + return copy_from_user_with_ptr(reg, arg, sizeof(*reg)); }
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, @@ -145,7 +145,7 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, req->flags |= REQ_F_BUFFER_SELECTED; req->kbuf = kbuf; req->buf_index = kbuf->bid; - return u64_to_user_ptr(kbuf->addr); + return (void __user *)kbuf->addr; } return NULL; } @@ -205,7 +205,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->buf_list = bl; req->buf_index = buf->bid;
- return u64_to_user_ptr(buf->addr); + return (void __user *)buf->addr; }
static void __user *io_ring_buffer_select_any(struct io_kiocb *req, size_t *len, @@ -403,17 +403,17 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe if (!tmp || tmp > USHRT_MAX) return -E2BIG; p->nbufs = tmp; - p->addr = READ_ONCE(sqe->addr); + p->addr = (void __user *)READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len);
if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, &size)) return -EOVERFLOW; - if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) + if (check_add_overflow(user_ptr_addr(p->addr), size, &tmp_check)) return -EOVERFLOW;
size = (unsigned long)p->len * p->nbufs; - if (!access_ok(u64_to_user_ptr(p->addr), size)) + if (!access_ok(p->addr, size)) return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group); @@ -473,7 +473,7 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, struct io_buffer_list *bl) { struct io_buffer *buf; - u64 addr = pbuf->addr; + void __user *addr = pbuf->addr; int i, bid = pbuf->bid;
for (i = 0; i < pbuf->nbufs; i++) { @@ -585,6 +585,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) pages_size = ctx->compat ? size_mul(sizeof(struct compat_io_uring_buf), reg.ring_entries) : size_mul(sizeof(struct io_uring_buf), reg.ring_entries); + /* TODO [PCuABI] - capability checks for uaccess */ pages = io_pin_pages(reg.ring_addr, pages_size, &nr_pages); if (IS_ERR(pages)) { kfree(free_bl); diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 1aa5bbbc5d628..1977c13ccf3ff 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -31,7 +31,7 @@ struct io_buffer_list {
struct io_buffer { struct list_head list; - __u64 addr; + void __user *addr; __u32 len; __u16 bid; __u16 bgid; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 90d2fc6fd80e4..654f5ad0b11c0 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -15,7 +15,7 @@
struct io_msg { struct file *file; - u64 user_data; + __kernel_uintptr_t user_data; u32 len; u32 cmd; u32 src_fd; @@ -130,7 +130,7 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->buf_index || sqe->personality)) return -EINVAL;
- msg->user_data = READ_ONCE(sqe->off); + msg->user_data = READ_ONCE(sqe->addr2); msg->len = READ_ONCE(sqe->len); msg->cmd = READ_ONCE(sqe->addr); msg->src_fd = READ_ONCE(sqe->addr3); diff --git a/io_uring/net.c b/io_uring/net.c index 4c133bc6f9d1d..6fd28a49b6715 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -243,13 +243,13 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->opcode == IORING_OP_SEND) { if (READ_ONCE(sqe->__pad3[0])) return -EINVAL; - sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + sr->addr = (void __user *)READ_ONCE(sqe->addr2); sr->addr_len = READ_ONCE(sqe->addr_len); } else if (sqe->addr2 || sqe->file_index) { return -EINVAL; }
- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->umsg = (struct user_msghdr __user *)READ_ONCE(sqe->addr); sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) @@ -421,7 +421,7 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req, struct user_msghdr msg; int ret;
- if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) + if (copy_from_user_with_ptr(&msg, sr->umsg, sizeof(*sr->umsg))) return -EFAULT;
ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); @@ -549,7 +549,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL;
- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->umsg = (struct user_msghdr __user *)READ_ONCE(sqe->addr); sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~(RECVMSG_FLAGS)) @@ -966,7 +966,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->opcode == IORING_OP_SEND_ZC) { if (READ_ONCE(sqe->__pad3[0])) return -EINVAL; - zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + zc->addr = (void __user *)READ_ONCE(sqe->addr2); zc->addr_len = READ_ONCE(sqe->addr_len); } else { if (unlikely(sqe->addr2 || sqe->file_index)) @@ -975,7 +975,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; }
- zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); + zc->buf = (void __user *)READ_ONCE(sqe->addr); zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (zc->msg_flags & MSG_DONTWAIT) @@ -1242,8 +1242,8 @@ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->len || sqe->buf_index) return -EINVAL;
- accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); - accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + accept->addr = (void __user *)READ_ONCE(sqe->addr); + accept->addr_len = (int __user *)READ_ONCE(sqe->addr2); accept->flags = READ_ONCE(sqe->accept_flags); accept->nofile = rlimit(RLIMIT_NOFILE); flags = READ_ONCE(sqe->ioprio); @@ -1392,8 +1392,8 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL;
- conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); - conn->addr_len = READ_ONCE(sqe->addr2); + conn->addr = (void __user *)READ_ONCE(sqe->addr); + conn->addr_len = READ_ONCE(sqe->off); conn->in_progress = false; return 0; } diff --git a/io_uring/openclose.c b/io_uring/openclose.c index 67178e4bb282d..0a5c838885306 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -47,7 +47,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe open->how.flags |= O_LARGEFILE;
open->dfd = READ_ONCE(sqe->fd); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + fname = (char __user *)READ_ONCE(sqe->addr); open->filename = getname(fname); if (IS_ERR(open->filename)) { ret = PTR_ERR(open->filename); @@ -81,7 +81,7 @@ int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) size_t len; int ret;
- how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + how = (struct open_how __user *)READ_ONCE(sqe->addr2); len = READ_ONCE(sqe->len); if (len < OPEN_HOW_SIZE_VER0) return -EINVAL; diff --git a/io_uring/poll.c b/io_uring/poll.c index d9bf1767867e6..0b7936c817e50 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -22,8 +22,8 @@
struct io_poll_update { struct file *file; - u64 old_user_data; - u64 new_user_data; + __kernel_uintptr_t old_user_data; + __kernel_uintptr_t new_user_data; __poll_t events; bool update_events; bool update_user_data; @@ -890,7 +890,7 @@ int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
- upd->new_user_data = READ_ONCE(sqe->off); + upd->new_user_data = READ_ONCE(sqe->addr2); if (!upd->update_user_data && upd->new_user_data) return -EINVAL; if (upd->update_events) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index c65b99fb9264f..7c308e00e1c2c 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -18,7 +18,7 @@
struct io_rsrc_update { struct file *file; - u64 arg; + __s32 __user *arg; u32 nr_args; u32 offset; }; @@ -32,7 +32,7 @@ static int get_compat64_io_uring_rsrc_update(struct io_uring_rsrc_update2 *up2, return -EFAULT; up2->offset = compat_up.offset; up2->resv = compat_up.resv; - up2->data = compat_up.data; + up2->data = (__kernel_uintptr_t)compat_ptr(compat_up.data); return 0; }
@@ -45,8 +45,8 @@ static int get_compat64_io_uring_rsrc_update2(struct io_uring_rsrc_update2 *up2, return -EFAULT; up2->offset = compat_up2.offset; up2->resv = compat_up2.resv; - up2->data = compat_up2.data; - up2->tags = compat_up2.tags; + up2->data = (__kernel_uintptr_t)compat_ptr(compat_up2.data); + up2->tags = (__kernel_uintptr_t)compat_ptr(compat_up2.tags); up2->nr = compat_up2.nr; up2->resv2 = compat_up2.resv2; return 0; @@ -62,8 +62,8 @@ static int get_compat64_io_uring_rsrc_register(struct io_uring_rsrc_register *rr rr->nr = compat_rr.nr; rr->flags = compat_rr.flags; rr->resv2 = compat_rr.resv2; - rr->data = compat_rr.data; - rr->tags = compat_rr.tags; + rr->data = (__kernel_uintptr_t)compat_ptr(compat_rr.data); + rr->tags = (__kernel_uintptr_t)compat_ptr(compat_rr.tags); return 0; }
@@ -73,7 +73,7 @@ static int copy_io_uring_rsrc_update_from_user(struct io_ring_ctx *ctx, { if (is_compat64_io_ring_ctx(ctx)) return get_compat64_io_uring_rsrc_update(up2, arg); - return copy_from_user(up2, arg, sizeof(struct io_uring_rsrc_update)); + return copy_from_user_with_ptr(up2, arg, sizeof(struct io_uring_rsrc_update)); }
static int copy_io_uring_rsrc_update2_from_user(struct io_ring_ctx *ctx, @@ -88,7 +88,7 @@ static int copy_io_uring_rsrc_update2_from_user(struct io_ring_ctx *ctx, } if (size != sizeof(*up2)) return -EINVAL; - return copy_from_user(up2, arg, sizeof(*up2)); + return copy_from_user_with_ptr(up2, arg, sizeof(*up2)); }
static int copy_io_uring_rsrc_register_from_user(struct io_ring_ctx *ctx, @@ -103,7 +103,7 @@ static int copy_io_uring_rsrc_register_from_user(struct io_ring_ctx *ctx, } if (size != sizeof(*rr)) return -EINVAL; - return copy_from_user(rr, arg, size); + return copy_from_user_with_ptr(rr, arg, size); }
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, @@ -184,13 +184,13 @@ static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) return -EFAULT;
- dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); + dst->iov_base = compat_ptr(ciov.iov_base); dst->iov_len = ciov.iov_len; return 0; } #endif src = (struct iovec __user *) arg; - if (copy_from_user(dst, &src[index], sizeof(*dst))) + if (copy_from_user_with_ptr(dst, &src[index], sizeof(*dst))) return -EFAULT; return 0; } @@ -517,8 +517,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) { - u64 __user *tags = u64_to_user_ptr(up->tags); - __s32 __user *fds = u64_to_user_ptr(up->data); + u64 __user *tags = (u64 __user *)up->tags; + __s32 __user *fds = (__s32 __user *)up->data; struct io_rsrc_data *data = ctx->file_data; struct io_fixed_file *file_slot; struct file *file; @@ -597,9 +597,9 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned int nr_args) { - u64 __user *tags = u64_to_user_ptr(up->tags); + u64 __user *tags = (u64 __user *)up->tags; struct iovec iov; - struct iovec __user *iovs = u64_to_user_ptr(up->data); + struct iovec __user *iovs = (struct iovec __user *)up->data; struct page *last_hpage = NULL; bool needs_switch = false; __u32 done; @@ -725,13 +725,13 @@ __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, case IORING_RSRC_FILE: if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) break; - return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), - rr.nr, u64_to_user_ptr(rr.tags)); + return io_sqe_files_register(ctx, (void __user *)rr.data, + rr.nr, (u64 __user *)rr.tags); case IORING_RSRC_BUFFER: if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) break; - return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), - rr.nr, u64_to_user_ptr(rr.tags)); + return io_sqe_buffers_register(ctx, (void __user *)rr.data, + rr.nr, (u64 __user *)rr.tags); } return -EINVAL; } @@ -749,7 +749,7 @@ int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) up->nr_args = READ_ONCE(sqe->len); if (!up->nr_args) return -EINVAL; - up->arg = READ_ONCE(sqe->addr); + up->arg = (__s32 __user *)READ_ONCE(sqe->addr); return 0; }
@@ -757,7 +757,7 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); - __s32 __user *fds = u64_to_user_ptr(up->arg); + __s32 __user *fds = up->arg; unsigned int done; struct file *file; int ret, fd; @@ -800,7 +800,7 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags) int ret;
up2.offset = up->offset; - up2.data = up->arg; + up2.data = (__kernel_uintptr_t)up->arg; up2.nr = 0; up2.tags = 0; up2.resv = 0; diff --git a/io_uring/rw.c b/io_uring/rw.c index 2edca190450ee..229c0d778c9d6 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -23,7 +23,7 @@ struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; - u64 addr; + void __user *addr; u32 len; rwf_t flags; }; @@ -39,7 +39,7 @@ static int io_iov_compat_buffer_select_prep(struct io_rw *rw) struct compat_iovec __user *uiov; compat_ssize_t clen;
- uiov = u64_to_user_ptr(rw->addr); + uiov = rw->addr; if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(clen, &uiov->iov_len)) @@ -65,7 +65,7 @@ static int io_iov_buffer_select_prep(struct io_kiocb *req) return io_iov_compat_buffer_select_prep(rw); #endif
- uiov = u64_to_user_ptr(rw->addr); + uiov = rw->addr; if (get_user(rw->len, &uiov->iov_len)) return -EFAULT; return 0; @@ -104,7 +104,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) rw->kiocb.ki_ioprio = get_current_ioprio(); }
- rw->addr = READ_ONCE(sqe->addr); + rw->addr = (void __user *)READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags);
@@ -364,13 +364,14 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req, ssize_t ret;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len); + ret = io_import_fixed(ddir, iter, req->imu, + user_ptr_addr(rw->addr), rw->len); if (ret) return ERR_PTR(ret); return NULL; }
- buf = u64_to_user_ptr(rw->addr); + buf = rw->addr; sqe_len = rw->len;
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE || @@ -379,8 +380,7 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req, buf = io_buffer_select(req, &sqe_len, issue_flags); if (!buf) return ERR_PTR(-ENOBUFS); - /* TODO [PCuABI] - capability checks for uaccess */ - rw->addr = user_ptr_addr(buf); + rw->addr = buf; rw->len = sqe_len; }
@@ -446,7 +446,7 @@ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter) if (!iov_iter_is_bvec(iter)) { iovec = iov_iter_iovec(iter); } else { - iovec.iov_base = u64_to_user_ptr(rw->addr); + iovec.iov_base = rw->addr; iovec.iov_len = rw->len; }
diff --git a/io_uring/statx.c b/io_uring/statx.c index d8fc933d3f593..d2604fdbcbe33 100644 --- a/io_uring/statx.c +++ b/io_uring/statx.c @@ -32,8 +32,8 @@ int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sx->dfd = READ_ONCE(sqe->fd); sx->mask = READ_ONCE(sqe->len); - path = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sx->buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + path = (char __user *)READ_ONCE(sqe->addr); + sx->buffer = (struct statx __user *)READ_ONCE(sqe->addr2); sx->flags = READ_ONCE(sqe->statx_flags);
sx->filename = getname_flags(path, diff --git a/io_uring/tctx.c b/io_uring/tctx.c index e69e8d7ba36c0..d36993fb577c9 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -21,7 +21,7 @@ static int get_compat64_io_uring_rsrc_update(struct io_uring_rsrc_update *up, return -EFAULT; up->offset = compat_up.offset; up->resv = compat_up.resv; - up->data = compat_up.data; + up->data = (__kernel_uintptr_t)compat_ptr(compat_up.data); return 0; }
@@ -31,7 +31,7 @@ static int copy_io_uring_rsrc_update_from_user(struct io_ring_ctx *ctx, { if (is_compat64_io_ring_ctx(ctx)) return get_compat64_io_uring_rsrc_update(up, arg); - return copy_from_user(up, arg, sizeof(struct io_uring_rsrc_update)); + return copy_from_user_with_ptr(up, arg, sizeof(struct io_uring_rsrc_update)); }
static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, diff --git a/io_uring/timeout.c b/io_uring/timeout.c index e8a8c20994805..5a0fe53c13329 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -26,7 +26,7 @@ struct io_timeout {
struct io_timeout_rem { struct file *file; - u64 addr; + __kernel_uintptr_t addr;
/* timeout update */ struct timespec64 ts; @@ -337,7 +337,7 @@ static clockid_t io_timeout_get_clock(struct io_timeout_data *data) } }
-static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, +static int io_linked_timeout_update(struct io_ring_ctx *ctx, __kernel_uintptr_t user_data, struct timespec64 *ts, enum hrtimer_mode mode) __must_hold(&ctx->timeout_lock) { @@ -365,7 +365,7 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, return 0; }
-static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, +static int io_timeout_update(struct io_ring_ctx *ctx, __kernel_uintptr_t user_data, struct timespec64 *ts, enum hrtimer_mode mode) __must_hold(&ctx->timeout_lock) { @@ -405,7 +405,7 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) tr->ltimeout = true; if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + if (get_timespec64(&tr->ts, (struct __kernel_timespec __user *)sqe->addr2)) return -EFAULT; if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) return -EINVAL; @@ -490,7 +490,7 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags;
- if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) + if (get_timespec64(&data->ts, (struct __kernel_timespec __user *)sqe->addr)) return -EFAULT;
if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index e50de0b6b9f84..4d2d2e3f885ee 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -65,8 +65,13 @@ int io_uring_cmd_prep_async(struct io_kiocb *req) struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); size_t cmd_size;
+#ifdef CONFIG_CHERI_PURECAP_UABI + BUILD_BUG_ON(uring_cmd_pdu_size(0) != 32); + BUILD_BUG_ON(uring_cmd_pdu_size(1) != 160); +#else BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16); BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80); +#endif
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
diff --git a/io_uring/xattr.c b/io_uring/xattr.c index 99df641594d74..1f13032e59536 100644 --- a/io_uring/xattr.c +++ b/io_uring/xattr.c @@ -53,8 +53,8 @@ static int __io_getxattr_prep(struct io_kiocb *req,
ix->filename = NULL; ix->ctx.kvalue = NULL; - name = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + name = (char __user *)READ_ONCE(sqe->addr); + ix->ctx.cvalue = (void __user *)READ_ONCE(sqe->addr2); ix->ctx.size = READ_ONCE(sqe->len); ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
@@ -93,7 +93,7 @@ int io_getxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ret) return ret;
- path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); + path = (char __user *)READ_ONCE(sqe->addr3);
ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); if (IS_ERR(ix->filename)) { @@ -159,8 +159,8 @@ static int __io_setxattr_prep(struct io_kiocb *req, return -EBADF;
ix->filename = NULL; - name = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + name = (char __user *)READ_ONCE(sqe->addr); + ix->ctx.cvalue = (void __user *)READ_ONCE(sqe->addr2); ix->ctx.kvalue = NULL; ix->ctx.size = READ_ONCE(sqe->len); ix->ctx.flags = READ_ONCE(sqe->xattr_flags); @@ -189,7 +189,7 @@ int io_setxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ret) return ret;
- path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); + path = (char __user *)READ_ONCE(sqe->addr3);
ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); if (IS_ERR(ix->filename)) {