The aio_context_t represents the pointer to the userspace mapping of aio_ring buffer. In some userspace software, including libaio, it's used to directly access the aio_ring. As __kernel_ulong_t type is not large enough to fit a user pointer in the PCuABI, change the aio_context_t to void __user *. Moreover, make a full capability comparison in lookup_ioctx().
Also, make sure to always create a valid user pointer in the compat syscall handlers or if the syscalls are invoked from a compat context by using compat_ptr().
Signed-off-by: Tudor Cretu tudor.cretu@arm.com --- fs/aio.c | 71 +++++++++++++++++++++++++----------- include/uapi/linux/aio_abi.h | 2 +- 2 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c index a0bd049b0c45..dff01598760e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -97,7 +97,7 @@ struct kioctx {
struct percpu_ref reqs;
- unsigned long user_id; + aio_context_t user_id;
struct __percpu kioctx_cpu *cpu;
@@ -314,6 +314,14 @@ static u32 __user *aio_key_uptr(struct kioctx *ctx, &user_iocb->aio_key; }
+static inline bool aio_ctx_id_is_same(struct kioctx *ctx, + aio_context_t ctx_id) +{ + if (IS_ENABLED(CONFIG_CHERI_PURECAP_UABI) && !aio_in_compat64(ctx)) + return __builtin_cheri_equal_exact(ctx->user_id, ctx_id); + return ctx->user_id == ctx_id; +} + static int copy_io_events_to_user(struct kioctx *ctx, struct io_event __user *event_array, long offset, @@ -457,7 +465,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { if (!atomic_read(&ctx->dead)) { - ctx->user_id = ctx->mmap_base = vma->vm_start; + ctx->mmap_base = vma->vm_start; + /* TODO [PCuABI] - derive proper capability */ + ctx->user_id = uaddr_to_user_ptr_safe(ctx->mmap_base); res = 0; } break; @@ -658,7 +668,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
- ctx->user_id = ctx->mmap_base; + /* TODO [PCuABI] - derive proper capability */ + ctx->user_id = uaddr_to_user_ptr_safe(ctx->mmap_base); ctx->nr_events = nr_events; /* trusted copy */
ctx->ring = vmap(ctx->ring_pages, nr_pages, VM_MAP, PAGE_KERNEL); @@ -910,7 +921,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) mutex_unlock(&ctx->ring_lock);
pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", - ctx, ctx->user_id, mm, ctx->nr_events); + ctx, user_ptr_addr(ctx->user_id), mm, ctx->nr_events); return ctx;
err_cleanup: @@ -1160,9 +1171,9 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) return req; }
-static struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(aio_context_t ctx_id) { - struct aio_ring __user *ring = (void __user *)ctx_id; + struct aio_ring __user *ring = ctx_id; struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; struct kioctx_table *table; @@ -1179,7 +1190,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
id = array_index_nospec(id, table->nr); ctx = rcu_dereference(table->table[id]); - if (ctx && ctx->user_id == ctx_id) { + if (ctx && aio_ctx_id_is_same(ctx, ctx_id)) { if (percpu_ref_tryget_live(&ctx->users)) ret = ctx; } @@ -1399,27 +1410,32 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, * pointer is passed for ctxp. Will fail with -ENOSYS if not * implemented. */ -SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) +SYSCALL_DEFINE2(io_setup, unsigned, nr_events, +#ifdef CONFIG_CHERI_PURECAP_UABI +void * __capability * __capability, ctxp) +#else +aio_context_t __user *, ctxp) +#endif { struct kioctx *ioctx = NULL; - unsigned long ctx; + aio_context_t ctx; long ret;
- ret = get_user(ctx, ctxp); + ret = get_user_ptr(ctx, ctxp); if (unlikely(ret)) goto out;
ret = -EINVAL; if (unlikely(ctx || nr_events == 0)) { pr_debug("EINVAL: ctx %lu nr_events %u\n", - ctx, nr_events); + user_ptr_addr(ctx), nr_events); goto out; }
ioctx = ioctx_alloc(nr_events); ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { - ret = put_user(ioctx->user_id, ctxp); + ret = put_user_ptr(ioctx->user_id, ctxp); if (ret) kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users); @@ -1433,7 +1449,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, compat_aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL; - unsigned long ctx; + compat_aio_context_t ctx; long ret;
ret = get_user(ctx, ctxp); @@ -1451,7 +1467,7 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, compat_aio_context_t __use ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { /* truncating is ok because it's a user address */ - ret = put_user((compat_aio_context_t)ioctx->user_id, ctxp); + ret = put_user(ptr_to_compat(ioctx->user_id), ctxp); if (ret) kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users); @@ -1470,7 +1486,12 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, compat_aio_context_t __use */ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { - struct kioctx *ioctx = lookup_ioctx(ctx); + struct kioctx *ioctx; + + if (in_compat_syscall()) + ctx = compat_ptr(user_ptr_addr(ctx)); + + ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { struct ctx_rq_wait wait; int ret; @@ -2194,7 +2215,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, if (unlikely(nr < 0)) return -EINVAL;
- ctx = lookup_ioctx(ctx_id); + ctx = lookup_ioctx(compat_ptr(ctx_id)); if (unlikely(!ctx)) { pr_debug("EINVAL: invalid context id\n"); return -EINVAL; @@ -2244,6 +2265,9 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, u32 key; u64 obj = user_ptr_addr(iocb);
+ if (in_compat_syscall()) + ctx_id = compat_ptr(user_ptr_addr(ctx_id)); + ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) return -EINVAL; @@ -2329,6 +2353,9 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, if (timeout && unlikely(get_timespec64(&ts, timeout))) return -EFAULT;
+ if (in_compat_syscall()) + ctx_id = compat_ptr(user_ptr_addr(ctx_id)); + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); if (!ret && signal_pending(current)) ret = -EINTR; @@ -2378,7 +2405,7 @@ SYSCALL_DEFINE6(io_pgetevents, #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
SYSCALL_DEFINE6(io_pgetevents_time32, - aio_context_t, ctx_id, + compat_aio_context_t, ctx_id, long, min_nr, long, nr, struct io_event __user *, events, @@ -2401,7 +2428,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, if (ret) return ret;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); + ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &ts : NULL);
interrupted = signal_pending(current); restore_saved_sigmask_unless(interrupted); @@ -2415,7 +2442,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
#if defined(CONFIG_COMPAT_32BIT_TIME)
-SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, +SYSCALL_DEFINE5(io_getevents_time32, compat_aio_context_t, ctx_id, __s32, min_nr, __s32, nr, struct io_event __user *, events, @@ -2427,7 +2454,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, if (timeout && get_old_timespec32(&t, timeout)) return -EFAULT;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &t : NULL); if (!ret && signal_pending(current)) ret = -EINTR; return ret; @@ -2467,7 +2494,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (ret) return ret;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &t : NULL);
interrupted = signal_pending(current); restore_saved_sigmask_unless(interrupted); @@ -2502,7 +2529,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, if (ret) return ret;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &t : NULL);
interrupted = signal_pending(current); restore_saved_sigmask_unless(interrupted); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 8050909c2887..87c270021449 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -31,7 +31,7 @@ #include <linux/fs.h> #include <asm/byteorder.h>
-typedef __kernel_ulong_t aio_context_t; +typedef void __user *aio_context_t;
enum { IOCB_CMD_PREAD = 0,