On 22-06-2023 08:59, Kevin Brodsky wrote:
On 15/06/2023 17:54, Tudor Cretu wrote:
The aio_context_t represents the pointer to the userspace mapping of aio_ring buffer.
I would slightly rephrase this. It seems that originally aio_context_t was meant to represent a token with no particular meaning. In a way it still represents that, but it can also be used in the way you describe afterwards.
Indeed!
In some userspace software, including libaio, it's used to directly access the aio_ring. As __kernel_ulong_t type is not large enough to fit a user pointer in the PCuABI, change the aio_context_t to void __user *. Moreover, make a full capability comparison in lookup_ioctx().
Would be good to explain why (mainly we still consider it a token).
Done
Also, make sure to always create a valid user pointer in the compat syscall handlers or if the syscalls are invoked from a compat context by using compat_ptr().
Signed-off-by: Tudor Cretu tudor.cretu@arm.com
fs/aio.c | 71 +++++++++++++++++++++++++----------- include/uapi/linux/aio_abi.h | 2 +- 2 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c index a0bd049b0c45..dff01598760e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -97,7 +97,7 @@ struct kioctx { struct percpu_ref reqs;
- unsigned long user_id;
- aio_context_t user_id;
struct __percpu kioctx_cpu *cpu; @@ -314,6 +314,14 @@ static u32 __user *aio_key_uptr(struct kioctx *ctx, &user_iocb->aio_key; } +static inline bool aio_ctx_id_is_same(struct kioctx *ctx,
aio_context_t ctx_id)
+{
- if (IS_ENABLED(CONFIG_CHERI_PURECAP_UABI) && !aio_in_compat64(ctx))
return __builtin_cheri_equal_exact(ctx->user_id, ctx_id);
I've pondered on this some more and I think this is the right thing to do, starting from the assumption that aio_context_t represents some token and should therefore not be modified in any way. In any case we do want to check the capability metadata, as it needs to enable access to the ring (which we access via a kernel mapping, so some kind of explicit checking is required). It might be a bit tricky to ensure that the derivation in aio_ring_mremap() matches exactly what mremap() returns once we start narrowing bounds/permissions there, but we'll see how it goes. I'm not sure that functionality is actually used at all anyway.
Small issue with the current implementation, it won't build on !CHERI targets. That can be fixed by using user_ptr_is_same(). It is generic, so no need to special-case PCuABI at all. It still makes sense to special-case compat64 I think, as a full capability comparison would be artificial in that case (and slower).
Done!
- return ctx->user_id == ctx_id;
+}
- static int copy_io_events_to_user(struct kioctx *ctx, struct io_event __user *event_array, long offset,
@@ -457,7 +465,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { if (!atomic_read(&ctx->dead)) {
ctx->user_id = ctx->mmap_base = vma->vm_start;
ctx->mmap_base = vma->vm_start;
/* TODO [PCuABI] - derive proper capability */
ctx->user_id = uaddr_to_user_ptr_safe(ctx->mmap_base); res = 0; } break;
@@ -658,7 +668,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
- ctx->user_id = ctx->mmap_base;
- /* TODO [PCuABI] - derive proper capability */
- ctx->user_id = uaddr_to_user_ptr_safe(ctx->mmap_base); ctx->nr_events = nr_events; /* trusted copy */
ctx->ring = vmap(ctx->ring_pages, nr_pages, VM_MAP, PAGE_KERNEL); @@ -910,7 +921,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) mutex_unlock(&ctx->ring_lock); pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
ctx, ctx->user_id, mm, ctx->nr_events);
return ctx;ctx, user_ptr_addr(ctx->user_id), mm, ctx->nr_events);
err_cleanup: @@ -1160,9 +1171,9 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) return req; } -static struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(aio_context_t ctx_id) {
- struct aio_ring __user *ring = (void __user *)ctx_id;
- struct aio_ring __user *ring = ctx_id; struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; struct kioctx_table *table;
@@ -1179,7 +1190,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) id = array_index_nospec(id, table->nr); ctx = rcu_dereference(table->table[id]);
- if (ctx && ctx->user_id == ctx_id) {
- if (ctx && aio_ctx_id_is_same(ctx, ctx_id)) { if (percpu_ref_tryget_live(&ctx->users)) ret = ctx; }
@@ -1399,27 +1410,32 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
- pointer is passed for ctxp. Will fail with -ENOSYS if not
- implemented.
*/ -SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) +SYSCALL_DEFINE2(io_setup, unsigned, nr_events, +#ifdef CONFIG_CHERI_PURECAP_UABI +void * __capability * __capability, ctxp)
I think we can still use aio_context_t, can't we? We just need to move the second __capability after *.
Indeed!
Also these argument lines should be indented as usual.
Oups!
+#else +aio_context_t __user *, ctxp) +#endif { struct kioctx *ioctx = NULL;
- unsigned long ctx;
- aio_context_t ctx; long ret;
- ret = get_user(ctx, ctxp);
- ret = get_user_ptr(ctx, ctxp); if (unlikely(ret)) goto out;
ret = -EINVAL; if (unlikely(ctx || nr_events == 0)) { pr_debug("EINVAL: ctx %lu nr_events %u\n",
ctx, nr_events);
goto out; }user_ptr_addr(ctx), nr_events);
ioctx = ioctx_alloc(nr_events); ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret) kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users);ret = put_user_ptr(ioctx->user_id, ctxp);
@@ -1433,7 +1449,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, compat_aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL;
- unsigned long ctx;
- compat_aio_context_t ctx; long ret;
ret = get_user(ctx, ctxp); @@ -1451,7 +1467,7 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, compat_aio_context_t __use ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { /* truncating is ok because it's a user address */
I'd remove that comment now we're converting the pointer the proper way.
Done
ret = put_user((compat_aio_context_t)ioctx->user_id, ctxp);
if (ret) kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users);ret = put_user(ptr_to_compat(ioctx->user_id), ctxp);
@@ -1470,7 +1486,12 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, compat_aio_context_t __use */ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) {
- struct kioctx *ioctx = lookup_ioctx(ctx);
- struct kioctx *ioctx;
- if (in_compat_syscall())
ctx = compat_ptr(user_ptr_addr(ctx));
No need to bother with that: in compat, the syscall wrappers automatically create the user pointers a sycall expects using compat_ptr() :) Otherwise ctxp wouldn't be usable in io_setup, for instance.
It slipped my mind when I did this change. Thank you!
- ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { struct ctx_rq_wait wait; int ret;
@@ -2194,7 +2215,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, if (unlikely(nr < 0)) return -EINVAL;
- ctx = lookup_ioctx(ctx_id);
- ctx = lookup_ioctx(compat_ptr(ctx_id)); if (unlikely(!ctx)) { pr_debug("EINVAL: invalid context id\n"); return -EINVAL;
@@ -2244,6 +2265,9 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, u32 key; u64 obj = user_ptr_addr(iocb);
- if (in_compat_syscall())
ctx_id = compat_ptr(user_ptr_addr(ctx_id));
- ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) return -EINVAL;
@@ -2329,6 +2353,9 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, if (timeout && unlikely(get_timespec64(&ts, timeout))) return -EFAULT;
- if (in_compat_syscall())
ctx_id = compat_ptr(user_ptr_addr(ctx_id));
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); if (!ret && signal_pending(current)) ret = -EINTR;
@@ -2378,7 +2405,7 @@ SYSCALL_DEFINE6(io_pgetevents, #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) SYSCALL_DEFINE6(io_pgetevents_time32,
aio_context_t, ctx_id,
compat_aio_context_t, ctx_id,
I'm confused, this is not a compat handler?
Done!
Many thanks, Tudor
Kevin
long, min_nr, long, nr, struct io_event __user *, events,
@@ -2401,7 +2428,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, if (ret) return ret;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &ts : NULL);
interrupted = signal_pending(current); restore_saved_sigmask_unless(interrupted); @@ -2415,7 +2442,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, #if defined(CONFIG_COMPAT_32BIT_TIME) -SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, +SYSCALL_DEFINE5(io_getevents_time32, compat_aio_context_t, ctx_id, __s32, min_nr, __s32, nr, struct io_event __user *, events, @@ -2427,7 +2454,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, if (timeout && get_old_timespec32(&t, timeout)) return -EFAULT;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &t : NULL); if (!ret && signal_pending(current)) ret = -EINTR; return ret;
@@ -2467,7 +2494,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (ret) return ret;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &t : NULL);
interrupted = signal_pending(current); restore_saved_sigmask_unless(interrupted); @@ -2502,7 +2529,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, if (ret) return ret;
- ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- ret = do_io_getevents(compat_ptr(ctx_id), min_nr, nr, events, timeout ? &t : NULL);
interrupted = signal_pending(current); restore_saved_sigmask_unless(interrupted); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 8050909c2887..87c270021449 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -31,7 +31,7 @@ #include <linux/fs.h> #include <asm/byteorder.h> -typedef __kernel_ulong_t aio_context_t; +typedef void __user *aio_context_t; enum { IOCB_CMD_PREAD = 0,