[linux-morello] [PATCH 1/4] aio: Fix the relationship between ctx pages and io_events array

5 Apr 2023

There were two underlying assumptions:
  1. Size of the header of aio_ring is equal to the size of an io_event
     sizeof(aio_ring) == sizeof(io_event)
  2. There is no leftover space at the end of a page when populating
     it with io_events, i.e. PAGE_SIZE is a multiple of sizeof(io_event)
This change aims to make the aio_ring mechanism more robust such that
it doesn't rely anymore on the above assumptions. It also improves the
copying of the io_events from the shared ring to the userspace,
by batching them together in a maximum of two copy_to_user calls.
Mapping pages independently using kmap doesn't create a virtually
contiguous space. As some io_event structs could span across different
pages, all the pages necessary to access the events' data need to be
mapped to a virtually contiguous space. For this reason, kmap is
replaced with vmap.
Signed-off-by: Tudor Cretu tudor.cretu@arm.com
---
 fs/aio.c | 80 ++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 52 insertions(+), 28 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 079074f47c2e7..c835deda5cdcc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -603,9 +603,24 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
    return 0;
 }
-#define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
-#define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
-#define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
+static unsigned int get_event_pages_idx(unsigned int event_idx,
+					unsigned int nr_events,
+					size_t *event_offset,
+					unsigned int *nr_pages)
+{
+	unsigned int page_idx;
+	size_t off;
+
+	off = sizeof(struct aio_ring);
+	off += sizeof(struct io_event) * event_idx;
+
+	page_idx = off / PAGE_SIZE;
+	*event_offset = offset_in_page(off);
+
+	off += sizeof(struct io_event) * nr_events - 1;
+	*nr_pages = off / PAGE_SIZE + 1 - page_idx;
+	return page_idx;
+}
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
@@ -1134,8 +1149,10 @@ static void aio_complete(struct aio_kiocb *iocb)
 {
    struct kioctx	*ctx = iocb->ki_ctx;
    struct aio_ring	*ring;
-	struct io_event	*ev_page, *event;
-	unsigned tail, pos, head;
+	struct io_event	*event;
+	void		*ring_pages;
+	size_t		event_offset;
+	unsigned int	tail, head, ctx_page_idx, nr_pages;
    unsigned long	flags;
/*
@@ -1146,18 +1163,21 @@ static void aio_complete(struct aio_kiocb *iocb)
    spin_lock_irqsave(&ctx->completion_lock, flags);
tail = ctx->tail;
-	pos = tail + AIO_EVENTS_OFFSET;
-	if (++tail >= ctx->nr_events)
-		tail = 0;
-
-	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
-	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
+	ctx_page_idx = get_event_pages_idx(tail, 1, &event_offset, &nr_pages);
+	ring_pages = vmap(ctx->ring_pages + ctx_page_idx, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!unlikely(ring_pages)) {
+		pr_warn("Couldn't map aio ring event pages\n");
+		spin_unlock_irqrestore(&ctx->completion_lock, flags);
+		return;
+	}
+	event = ring_pages + event_offset;
*event = iocb->ki_res;
-	kunmap_atomic(ev_page);
-	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+	vunmap(ring_pages);
+	for (unsigned int page_idx = ctx_page_idx; page_idx < ctx_page_idx + nr_pages; page_idx++)
+		flush_dcache_page(ctx->ring_pages[page_idx]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
    	 (void __user *)(unsigned long)iocb->ki_res.obj,
@@ -1168,6 +1188,8 @@ static void aio_complete(struct aio_kiocb *iocb)
     */
    smp_wmb();	/* make event visible before updating tail */
+	if (++tail >= ctx->nr_events)
+		tail = 0;
    ctx->tail = tail;
ring = kmap_atomic(ctx->ring_pages[0]);
@@ -1219,9 +1241,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
    			 struct io_event __user *event, long nr)
 {
    struct aio_ring *ring;
-	unsigned head, tail, pos;
+	unsigned int head, tail;
    long ret = 0;
-	int copy_ret;
/*
     * The mutex can block and wake us up and that will cause
@@ -1253,25 +1274,28 @@ static long aio_read_events_ring(struct kioctx *ctx,
    tail %= ctx->nr_events;
while (ret < nr) {
+		unsigned int ctx_page_idx, nr_pages;
+		void *ring_pages;
+		size_t event_offset;
    	long avail;
-		struct io_event *ev;
-		struct page *page;
+		int copy_ret;
-		avail = (head <= tail ?  tail : ctx->nr_events) - head;
    	if (head == tail)
    		break;
-
-		pos = head + AIO_EVENTS_OFFSET;
-		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
-		pos %= AIO_EVENTS_PER_PAGE;
-
+		avail = (head <= tail ?  tail : ctx->nr_events) - head;
    	avail = min(avail, nr - ret);
-		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
+		ctx_page_idx = get_event_pages_idx(head, avail, &event_offset, &nr_pages);
+		ring_pages = vmap(ctx->ring_pages + ctx_page_idx, nr_pages, VM_MAP, PAGE_KERNEL);
+		if (!ring_pages) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		copy_ret = copy_to_user(event,
+					ring_pages + event_offset,
+					sizeof(struct io_event) * avail);
-		ev = kmap(page);
-		copy_ret = copy_to_user(event + ret, ev + pos,
-					sizeof(*ev) * avail);
-		kunmap(page);
+		vunmap(ring_pages);
if (unlikely(copy_ret)) {
    		ret = -EFAULT;
-- 
2.34.1


    

2025

2024

2023

2022

[linux-morello] [PATCH 1/4] aio: Fix the relationship between ctx pages and io_events array