On 16/03/2023 14:40, Tudor Cretu wrote:
Some members of the io_uring uAPI structs may contain user pointers. In the PCuABI, a user pointer is a 129-bit capability, so the __u64 type is not big enough to hold it. Use the __kernel_uintptr_t type instead, which is big enough on the affected architectures while remaining 64-bit on others.
The user_data field must be passed unchanged from the submission queue to the completion queue. As it is standard practice to store a pointer in user_data, expand the field to __kernel_uintptr_t. However, the kernel doesn't dereference the user_data, so don't convert it in the compat case.
In addition, for the io_uring structs containing user pointers, use the special copy routines when copying user pointers from/to userspace.
In the case of operation IORING_OP_POLL_REMOVE, if IORING_POLL_UPDATE_USER_DATA is set in the SQE len field, then the request will update the user_data of an existing poll request based on the value passed in the addr2 field, instead of the off field. This is required because the off field is not large enough to fit a user_data value.
Note that the structs io_uring_sqe and io_uring_cqe are doubled in size in PCuABI. The setup flags IORING_SETUP_SQE128 and IORING_SETUP_CQE32 used to double the sizes of the two structs up to 128 bytes and 32 bytes respectively. In PCuABI, the two flags are still used to double the sizes of the two structs, but, as they increased in size, they increase up to 256 bytes and 64 bytes.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com
include/linux/io_uring_types.h | 4 +- include/trace/events/io_uring.h | 46 ++++++++++----------
As per my reply in v3, I think we're better off not changing the signature of trace functions unless we also make them print the capability metadata, which doesn't sound essential.
include/uapi/linux/io_uring.h | 76 ++++++++++++++++++--------------- io_uring/advise.c | 7 +-- io_uring/cancel.c | 6 +-- io_uring/cancel.h | 2 +- io_uring/epoll.c | 2 +- io_uring/fdinfo.c | 8 ++-- io_uring/fs.c | 16 +++---- io_uring/io_uring.c | 62 +++++++++++++++++++++++---- io_uring/io_uring.h | 25 ++++++----- io_uring/kbuf.c | 19 +++++---- io_uring/kbuf.h | 2 +- io_uring/msg_ring.c | 4 +- io_uring/net.c | 20 ++++----- io_uring/openclose.c | 4 +- io_uring/poll.c | 6 +-- io_uring/rsrc.c | 44 +++++++++---------- io_uring/rw.c | 18 ++++---- io_uring/statx.c | 4 +- io_uring/tctx.c | 4 +- io_uring/timeout.c | 10 ++--- io_uring/uring_cmd.c | 5 +++ io_uring/xattr.c | 12 +++--- 24 files changed, 235 insertions(+), 171 deletions(-)
[...]
static bool io_cancel_cb(struct io_wq_work *work, void *data) diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 6a59ee484d0cc..7c1249d61bf25 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -5,7 +5,7 @@ struct io_cancel_data { struct io_ring_ctx *ctx; union {
u64 data;
__kernel_uintptr_t data;
This still needs some more work if we really want to treat the user data as a capability in PCuABI. That means that functions such as io_cancel_cb() need to do a full capability comparison (standard arithmetic only compares the address). Worth mentioning in the commit message too as this is not necessarily an obvious choice.
At first I thought you could use user_ptr_is_same() for that purpose, but in fact this isn't entirely appropriate, as we always want a 64-bit comparison in !PCuABI and user pointers are 32-bit on a 32-bit arch. It would probably be better to introduce an io_uring helper, with an implementation similar to user_ptr_is_same(), but taking __kernel_uintptr_t instead of void __user * (that would also avoid unnecessary casts).
struct file *file;
}; u32 flags;
[...]
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index b388592e67df9..4614ab633c4bd 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -22,7 +22,7 @@ struct io_provide_buf { struct file *file;
- __u64 addr;
- void __user *addr; __u32 len; __u32 bgid; __u16 nbufs;
@@ -36,7 +36,7 @@ static int get_compat64_io_uring_buf_reg(struct io_uring_buf_reg *reg, if (copy_from_user(&compat_reg, user_reg, sizeof(compat_reg))) return -EFAULT;
- reg->ring_addr = compat_reg.ring_addr;
- reg->ring_addr = (__kernel_uintptr_t)compat_ptr(compat_reg.ring_addr); reg->ring_entries = compat_reg.ring_entries; reg->bgid = compat_reg.bgid; reg->pad = compat_reg.pad;
@@ -50,7 +50,7 @@ static int copy_io_uring_buf_reg_from_user(struct io_ring_ctx *ctx, { if (is_compat64_io_ring_ctx(ctx)) return get_compat64_io_uring_buf_reg(reg, arg);
- return copy_from_user(reg, arg, sizeof(*reg));
- return copy_from_user_with_ptr(reg, arg, sizeof(*reg));
} static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, @@ -145,7 +145,7 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, req->flags |= REQ_F_BUFFER_SELECTED; req->kbuf = kbuf; req->buf_index = kbuf->bid;
return u64_to_user_ptr(kbuf->addr);
return (void __user *)kbuf->addr;
I think you don't need a cast any more.
[...]
diff --git a/io_uring/tctx.c b/io_uring/tctx.c index e69e8d7ba36c0..d36993fb577c9 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -21,7 +21,7 @@ static int get_compat64_io_uring_rsrc_update(struct io_uring_rsrc_update *up, return -EFAULT; up->offset = compat_up.offset; up->resv = compat_up.resv;
- up->data = compat_up.data;
- up->data = (__kernel_uintptr_t)compat_ptr(compat_up.data); return 0;
} @@ -31,7 +31,7 @@ static int copy_io_uring_rsrc_update_from_user(struct io_ring_ctx *ctx, { if (is_compat64_io_ring_ctx(ctx)) return get_compat64_io_uring_rsrc_update(up, arg);
- return copy_from_user(up, arg, sizeof(struct io_uring_rsrc_update));
- return copy_from_user_with_ptr(up, arg, sizeof(struct io_uring_rsrc_update));
You might have missed my comment in v3, there is no pointer here and we may want to rename that function to disambiguate.
Kevin