Some userspace software, including libaio, use the layouts for struct aio_ring and struct io_event, even if the uAPI headers don't expose them. This is typically done to skip the io_getevents syscall's overhead by reading directly from the io_event array in the aio_ring, which is shared with the userspace.
As the io_event struct might contain pointers as members, introduce a compat version of the struct. Also, implement functions that convert the compat version to the native version of the struct.
A subsequent patch is going to change the io_event struct to enable it to support new architectures. On such architectures, the current struct layout still needs to be supported for compat tasks.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com --- fs/aio.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 16 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c index c835deda5cdcc..e6e45498ccd27 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -53,6 +53,13 @@ #define AIO_RING_MAGIC 0xa10a10a1 #define AIO_RING_COMPAT_FEATURES 1 #define AIO_RING_INCOMPAT_FEATURES 0 +struct compat_io_event { + __u64 data; + __u64 obj; + __s64 res; + __s64 res2; +}; + struct aio_ring { unsigned id; /* kernel internal index number */ unsigned nr; /* number of io_events */ @@ -164,6 +171,7 @@ struct kioctx { struct file *aio_ring_file;
unsigned id; + bool compat; };
/* @@ -280,6 +288,50 @@ static struct vfsmount *aio_mnt; static const struct file_operations aio_ring_fops; static const struct address_space_operations aio_ctx_aops;
+static inline bool is_compat64_aio_ctx(struct kioctx *ctx) +{ + return IS_ENABLED(CONFIG_COMPAT64) && ctx->compat; +} + +static inline size_t io_event_size(struct kioctx *ctx) +{ + return is_compat64_aio_ctx(ctx) ? sizeof(struct compat_io_event) + : sizeof(struct io_event); +} + +static struct io_event __user *get_ith_user_io_event(struct kioctx *ctx, + struct io_event __user *event_array, + int i) +{ + if (is_compat64_aio_ctx(ctx)) + return (struct io_event __user *)&((struct compat_io_event __user *)event_array)[i]; + return &event_array[i]; +} + +static u32 __user *get_user_key_ptr(struct kioctx *ctx, + struct iocb __user *user_iocb) +{ + return is_compat64_aio_ctx(ctx) ? + &((struct compat_iocb __user *)user_iocb)->aio_key : + &user_iocb->aio_key; +} + +static void copy_io_event_to_ring(struct kioctx *ctx, + struct io_event *native_event, + struct io_event *ring_event) +{ + if (is_compat64_aio_ctx(ctx)) { + struct compat_io_event *compat_ring_event = (struct compat_io_event *)ring_event; + + compat_ring_event->data = native_event->data; + compat_ring_event->obj = native_event->obj; + compat_ring_event->res = native_event->res; + compat_ring_event->res2 = native_event->res2; + return; + } + *ring_event = *native_event; +} + static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct file *file; @@ -519,7 +571,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) nr_events += 2; /* 1 is required, 2 for good luck */
size = sizeof(struct aio_ring); - size += sizeof(struct io_event) * nr_events; + size += io_event_size(ctx) * nr_events;
nr_pages = PFN_UP(size); if (nr_pages < 0) @@ -533,7 +585,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->aio_ring_file = file; nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) - / sizeof(struct io_event); + / io_event_size(ctx);
ctx->ring_pages = ctx->internal_pages; if (nr_pages > AIO_RING_PAGES) { @@ -588,6 +640,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ + ctx->compat = in_compat_syscall();
ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ @@ -603,7 +656,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) return 0; }
-static unsigned int get_event_pages_idx(unsigned int event_idx, +static unsigned int get_event_pages_idx(struct kioctx *ctx, + unsigned int event_idx, unsigned int nr_events, size_t *event_offset, unsigned int *nr_pages) @@ -612,12 +666,12 @@ static unsigned int get_event_pages_idx(unsigned int event_idx, size_t off;
off = sizeof(struct aio_ring); - off += sizeof(struct io_event) * event_idx; + off += io_event_size(ctx) * event_idx;
page_idx = off / PAGE_SIZE; *event_offset = offset_in_page(off);
- off += sizeof(struct io_event) * nr_events - 1; + off += io_event_size(ctx) * nr_events - 1; *nr_pages = off / PAGE_SIZE + 1 - page_idx; return page_idx; } @@ -1164,7 +1218,7 @@ static void aio_complete(struct aio_kiocb *iocb)
tail = ctx->tail;
- ctx_page_idx = get_event_pages_idx(tail, 1, &event_offset, &nr_pages); + ctx_page_idx = get_event_pages_idx(ctx, tail, 1, &event_offset, &nr_pages); ring_pages = vmap(ctx->ring_pages + ctx_page_idx, nr_pages, VM_MAP, PAGE_KERNEL); if (!unlikely(ring_pages)) { pr_warn("Couldn't map aio ring event pages\n"); @@ -1173,7 +1227,7 @@ static void aio_complete(struct aio_kiocb *iocb) } event = ring_pages + event_offset;
- *event = iocb->ki_res; + copy_io_event_to_ring(ctx, &iocb->ki_res, event);
vunmap(ring_pages); for (unsigned int page_idx = ctx_page_idx; page_idx < ctx_page_idx + nr_pages; page_idx++) @@ -1284,7 +1338,7 @@ static long aio_read_events_ring(struct kioctx *ctx, break; avail = (head <= tail ? tail : ctx->nr_events) - head; avail = min(avail, nr - ret); - ctx_page_idx = get_event_pages_idx(head, avail, &event_offset, &nr_pages); + ctx_page_idx = get_event_pages_idx(ctx, head, avail, &event_offset, &nr_pages); ring_pages = vmap(ctx->ring_pages + ctx_page_idx, nr_pages, VM_MAP, PAGE_KERNEL); if (!ring_pages) { ret = -ENOMEM; @@ -1293,7 +1347,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
copy_ret = copy_to_user(event, ring_pages + event_offset, - sizeof(struct io_event) * avail); + io_event_size(ctx) * avail);
vunmap(ring_pages);
@@ -1322,7 +1376,9 @@ static long aio_read_events_ring(struct kioctx *ctx, static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, long *i) { - long ret = aio_read_events_ring(ctx, event + *i, nr - *i); + long ret = aio_read_events_ring(ctx, + get_ith_user_io_event(ctx, event, *i), + nr - *i);
if (ret > 0) *i += ret; @@ -2001,7 +2057,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, req->ki_eventfd = eventfd; }
- if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) { + if (unlikely(put_user(KIOCB_KEY, get_user_key_ptr(ctx, user_iocb)))) { pr_debug("EFAULT: aio_key\n"); return -EFAULT; } @@ -2223,15 +2279,20 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, u32 key; u64 obj = user_ptr_addr(iocb);
- if (unlikely(get_user(key, &iocb->aio_key))) - return -EFAULT; - if (unlikely(key != KIOCB_KEY)) - return -EINVAL; - ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) return -EINVAL;
+ if (unlikely(get_user(key, get_user_key_ptr(ctx, iocb)))) { + ret = -EFAULT; + goto out; + } + + if (unlikely(key != KIOCB_KEY)) { + ret = -EINVAL; + goto out; + } + spin_lock_irq(&ctx->ctx_lock); /* TODO: use a hash or array, this sucks. */ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { @@ -2252,6 +2313,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, ret = -EINPROGRESS; }
+out: percpu_ref_put(&ctx->users);
return ret;