Merge tag 'for-6.2/io_uring-2022-12-08' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe: - Always ensure proper ordering in case of CQ ring overflow, which then means we can remove some work-arounds for that (Dylan) - Support completion batching for multishot, greatly increasing the efficiency for those (Dylan) - Flag epoll/eventfd wakeups done from io_uring, so that we can easily tell if we're recursing into io_uring again. Previously, this would have resulted in repeated multishot notifications if we had a dependency there. That could happen if an eventfd was registered as the ring eventfd, and we multishot polled for events on it. Or if an io_uring fd was added to epoll, and io_uring had a multishot request for the epoll fd. Test cases here: https://git.kernel.dk/cgit/liburing/commit/?id=919755a7d0096fda08fb6d65ac54ad8d0fe027cd Previously these got terminated when the CQ ring eventually overflowed, now it's handled gracefully (me). - Tightening of the IOPOLL based completions (Pavel) - Optimizations of the networking zero-copy paths (Pavel) - Various tweaks and fixes (Dylan, Pavel) * tag 'for-6.2/io_uring-2022-12-08' of git://git.kernel.dk/linux: (41 commits) io_uring: keep unlock_post inlined in hot path io_uring: don't use complete_post in kbuf io_uring: spelling fix io_uring: remove io_req_complete_post_tw io_uring: allow multishot polled reqs to defer completion io_uring: remove overflow param from io_post_aux_cqe io_uring: add lockdep assertion in io_fill_cqe_aux io_uring: make io_fill_cqe_aux static io_uring: add io_aux_cqe which allows deferred completion io_uring: allow defer completion for aux posted cqes io_uring: defer all io_req_complete_failed io_uring: always lock in io_apoll_task_func io_uring: remove iopoll spinlock io_uring: iopoll protect complete_post io_uring: inline __io_req_complete_put() io_uring: remove io_req_tw_post_queue io_uring: use io_req_task_complete() in timeout io_uring: hold locks for io_req_complete_failed io_uring: add completion locking for iopoll io_uring: kill io_cqring_ev_posted() and __io_cq_unlock_post() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2022-12-13 21:33:08 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-12-13 21:33:08 +0300
commit: 54e60e505d6144a22c787b5be1fdce996a27be1b (patch)
tree: a0b582fa8d9de216fef4fb6320199bb055125c65 /io_uring/net.c
parent: d523ec4c6af4314575d6ab8b52629ae3e2039a50 (diff)
parent: 5d772916855f593672de55c437925daccc8ecd73 (diff)
download: linux-54e60e505d6144a22c787b5be1fdce996a27be1b.tar.xz
1 files changed, 34 insertions, 22 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 2818aeefea42..cb831326ea5b 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -125,13 +125,15 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
 	struct io_cache_entry *entry;
 	struct io_async_msghdr *hdr;
 
-	if (!(issue_flags & IO_URING_F_UNLOCKED) &&
-	    (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
-		hdr = container_of(entry, struct io_async_msghdr, cache);
-		hdr->free_iov = NULL;
-		req->flags |= REQ_F_ASYNC_DATA;
-		req->async_data = hdr;
-		return hdr;
+	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+		entry = io_alloc_cache_get(&ctx->netmsg_cache);
+		if (entry) {
+			hdr = container_of(entry, struct io_async_msghdr, cache);
+			hdr->free_iov = NULL;
+			req->flags |= REQ_F_ASYNC_DATA;
+			req->async_data = hdr;
+			return hdr;
+		}
 	}
 
 	if (!io_alloc_async_data(req)) {
@@ -599,16 +601,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 	}
 
 	if (!mshot_finished) {
-		if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
-				    cflags | IORING_CQE_F_MORE, false)) {
+		if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+			       req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
 			io_recv_prep_retry(req);
 			return false;
 		}
-		/*
-		 * Otherwise stop multishot but use the current result.
-		 * Probably will end up going into overflow, but this means
-		 * we cannot trust the ordering anymore
-		 */
+		/* Otherwise stop multishot but use the current result. */
 	}
 
 	io_req_set_res(req, *ret, cflags);
@@ -923,6 +921,9 @@ void io_send_zc_cleanup(struct io_kiocb *req)
 	}
 }
 
+#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
+#define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
+
 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -935,10 +936,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (req->flags & REQ_F_CQE_SKIP)
 		return -EINVAL;
 
-	zc->flags = READ_ONCE(sqe->ioprio);
-	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
-			  IORING_RECVSEND_FIXED_BUF))
-		return -EINVAL;
 	notif = zc->notif = io_alloc_notif(ctx);
 	if (!notif)
 		return -ENOMEM;
@@ -946,6 +943,17 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	notif->cqe.res = 0;
 	notif->cqe.flags = IORING_CQE_F_NOTIF;
 	req->flags |= REQ_F_NEED_CLEANUP;
+
+	zc->flags = READ_ONCE(sqe->ioprio);
+	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
+		if (zc->flags & ~IO_ZC_FLAGS_VALID)
+			return -EINVAL;
+		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
+			io_notif_set_extended(notif);
+			io_notif_to_data(notif)->zc_report = true;
+		}
+	}
+
 	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
 		unsigned idx = READ_ONCE(sqe->buf_index);
 
@@ -1087,6 +1095,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
 			return ret;
 		msg.sg_from_iter = io_sg_from_iter;
 	} else {
+		io_notif_set_extended(zc->notif);
 		ret = import_single_range(ITER_SOURCE, zc->buf, zc->len, &iov,
 					  &msg.msg_iter);
 		if (unlikely(ret))
@@ -1148,6 +1157,8 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
 	unsigned flags;
 	int ret, min_ret = 0;
 
+	io_notif_set_extended(sr->notif);
+
 	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
 		return -ENOTSOCK;
@@ -1307,12 +1318,13 @@ retry:
 		return IOU_OK;
 	}
 
-	if (ret >= 0 &&
-	    io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false))
+	if (ret < 0)
+		return ret;
+	if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+		       req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
 		goto retry;
 
-	io_req_set_res(req, ret, 0);
-	return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK;
+	return -ECANCELED;
 }
 
 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
author	Linus Torvalds <torvalds@linux-foundation.org>	2022-12-13 21:33:08 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-12-13 21:33:08 +0300
commit	54e60e505d6144a22c787b5be1fdce996a27be1b (patch)
tree	a0b582fa8d9de216fef4fb6320199bb055125c65 /io_uring/net.c
parent	d523ec4c6af4314575d6ab8b52629ae3e2039a50 (diff)
parent	5d772916855f593672de55c437925daccc8ecd73 (diff)
download	linux-54e60e505d6144a22c787b5be1fdce996a27be1b.tar.xz