On 04/04/2023 15:53, Tudor Cretu wrote:
The io_event struct may contain user pointers. In the PCuABI, a user pointer is a 129-bit capability, so the __u64 type is not big enough to hold it. Use the __kernel_uintptr_t type instead, which is big enough on the affected architectures while remaining 64-bit on others.
We should probably elaborate on what libaio does here too, as it's not obvious that struct io_event::data and even ::obj should be pointers. libaio needs both to be pointers for its callback mechanism, so we don't have much choice.
The aio_context_t represents the pointer to the userspace mapping of aio_ring buffer. In some userspace software, including libaio, it's used to directly access the aio_ring. As __kernel_ulong_t type is not large enough to fit a user pointer in the PCuABI, change the aio_context_t to __kernel_uintptr_t.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com
fs/aio.c | 29 +++++++++++++++-------------- include/uapi/linux/aio_abi.h | 12 ++++++------ 2 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c index a83bf7f656ca4..3405e056a8ac5 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -323,8 +323,8 @@ static void copy_io_event_to_ring(struct kioctx *ctx, if (is_compat64_aio_ctx(ctx)) { struct compat_io_event *compat_ring_event = (struct compat_io_event *)ring_event;
compat_ring_event->data = native_event->data;
compat_ring_event->obj = native_event->obj;
compat_ring_event->data = (__u64)native_event->data;
compat_ring_event->res = native_event->res; compat_ring_event->res2 = native_event->res2; return;compat_ring_event->obj = (__u64)native_event->obj;
@@ -1168,7 +1168,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) return req; } -static struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(aio_context_t ctx_id)
From an ABI perspective, we need to decide what to do with the way we match aio_context_t. Currently struct kioctx::user_id remains an unsigned long so we only match the address, which is not very consistent with the way we're handling this sort of situation in general. That said, doing a full capability comparison may prove challenging. It should not be too hard in the normal case, as do_mmap() will eventually return a capability. However, if the mapping is remapped, aio_ring_mremap() will adjust user_id accordingly, but if we represent it as a capability we would need to ensure that we derive exactly the same capability as returned by the mremap syscall. Not sure if this is worth the hassle, as the security benefit seems dubious, but the inconsistency is not great either.
{ struct aio_ring __user *ring = (void __user *)ctx_id; struct mm_struct *mm = current->mm; @@ -1244,8 +1244,9 @@ static void aio_complete(struct aio_kiocb *iocb) flush_dcache_page(ctx->ring_pages[page_idx]); pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
(void __user *)(unsigned long)iocb->ki_res.obj,
iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
(void __user *)iocb->ki_res.obj,
I'm suspecting this line doesn't get built at all, as otherwise we'd get a warning in PCuABI here (at least with GCC), as %p is not appropriate to print capabilities. Unfortunately I still haven't got round to looking into [1] to add a new specifier to print user pointers generically. Printing the address should be good enough here though.
[1] https://git.morello-project.org/morello/kernel/linux/-/issues/53
(unsigned long long)iocb->ki_res.data,
iocb->ki_res.res, iocb->ki_res.res2);
/* after flagging the request as done, we * must never even look at it again @@ -1447,24 +1448,25 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL;
- unsigned long ctx;
- aio_context_t ctx; long ret;
- ret = get_user(ctx, ctxp);
- ret = get_user_ptr(ctx, ctxp); if (unlikely(ret)) goto out;
ret = -EINVAL; if (unlikely(ctx || nr_events == 0)) { pr_debug("EINVAL: ctx %lu nr_events %u\n",
ctx, nr_events);
(unsigned long)ctx, nr_events);
Note that it will also warn if aio_context_t is changed to be a user pointer (see below). Using user_ptr_addr() would be appropriate in that case.
goto out;
} ioctx = ioctx_alloc(nr_events); ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
/* TODO [PCuABI] - derive proper capability */
if (ret) kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users);ret = put_user_ptr(uaddr_to_user_ptr_safe(ioctx->user_id), ctxp);
@@ -2072,7 +2074,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, return -EFAULT; }
- req->ki_res.obj = user_ptr_addr(user_iocb);
- req->ki_res.obj = (__kernel_uintptr_t)user_iocb; req->ki_res.data = iocb->aio_data; req->ki_res.res = 0; req->ki_res.res2 = 0;
@@ -2103,7 +2105,7 @@ static int get_compat_iocb(struct iocb *iocb, const struct iocb __user *user_ioc struct compat_iocb compat_iocb; if (unlikely(copy_from_user(&compat_iocb, user_iocb, sizeof(struct compat_iocb)))) return -EFAULT;
- iocb->aio_data = compat_iocb.aio_data;
- iocb->aio_data = (__kernel_uintptr_t)compat_iocb.aio_data; iocb->aio_key = compat_iocb.aio_key; iocb->aio_rw_flags = compat_iocb.aio_rw_flags; iocb->aio_lio_opcode = compat_iocb.aio_lio_opcode;
@@ -2287,7 +2289,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct aio_kiocb *kiocb; int ret = -EINVAL; u32 key;
- u64 obj = user_ptr_addr(iocb);
ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) @@ -2306,7 +2307,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, spin_lock_irq(&ctx->ctx_lock); /* TODO: use a hash or array, this sucks. */ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_res.obj == obj) {
if (user_ptr_is_same((struct iocb __user *)kiocb->ki_res.obj, iocb)) { ret = kiocb->ki_cancel(&kiocb->rw); list_del_init(&kiocb->ki_list); break;
@@ -2403,7 +2404,7 @@ SYSCALL_DEFINE6(io_pgetevents, if (timeout && unlikely(get_timespec64(&ts, timeout))) return -EFAULT;
- if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
- if (usig && copy_from_user_with_ptr(&ksig, usig, sizeof(ksig))) return -EFAULT;
ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 8050909c28876..b8804ef20b8f5 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -31,7 +31,7 @@ #include <linux/fs.h> #include <asm/byteorder.h> -typedef __kernel_ulong_t aio_context_t; +typedef __kernel_uintptr_t aio_context_t;
__kernel_uintptr_t is only backwards-compatible when replacing __u64. In this case we would effectively expand the type to 64 bits in 32-bit, which is not intended.
I wonder if we could get away with just defining it as void __user *. It looks like a few fixups in the kernel would be needed (Clang and GCC don't seem to understand T __capability * if T is itself a capability), and more importantly it is a uapi change, but I am quite hopeful it wouldn't break much if anything. libaio doesn't use aio_context_t and instead defines its own io_context_t as a pointer. A Debian Code Search does show quite a few users of aio_context_t, but AFAICT it is only ever passed to the AIO syscalls or initialised to 0 (works if defined as a pointer too) - pretty much an opaque type.
Kevin
enum { IOCB_CMD_PREAD = 0, @@ -58,10 +58,10 @@ enum { /* read() from /dev/aio returns these structures. */ struct io_event {
- __u64 data; /* the data field from the iocb */
- __u64 obj; /* what iocb this event came from */
- __s64 res; /* result code for this event */
- __s64 res2; /* secondary result */
- __kernel_uintptr_t data; /* the data field from the iocb */
- __kernel_uintptr_t obj; /* what iocb this event came from */
- __s64 res; /* result code for this event */
- __s64 res2; /* secondary result */
}; /* @@ -72,7 +72,7 @@ struct io_event { struct iocb { /* these are internal to the kernel/libc. */
- __u64 aio_data; /* data to be returned in event's data */
- __kernel_uintptr_t aio_data; /* data to be returned in event's data */
#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) __u32 aio_key; /* the kernel sets aio_key to the req # */