summaryrefslogtreecommitdiff
path: root/io_uring/io_uring.h
diff options
context:
space:
mode:
authorDylan Yudaken <dylany@fb.com>2022-08-30 15:50:10 +0300
committerJens Axboe <axboe@kernel.dk>2022-09-21 19:30:42 +0300
commitc0e0d6ba25f180ab76d3c18f8b360a119dffa634 (patch)
tree0725da00ad5c3ee401da9f7514d73f013ec8104a /io_uring/io_uring.h
parent2327337b881d3f24949da4a4d34a6e657a71a79d (diff)
downloadlinux-c0e0d6ba25f180ab76d3c18f8b360a119dffa634.tar.xz
io_uring: add IORING_SETUP_DEFER_TASKRUN
Allow deferring async tasks until the user calls io_uring_enter(2) with the IORING_ENTER_GETEVENTS flag. Enable this mode with a flag at io_uring_setup time. This functionality requires that the later io_uring_enter will be called from the same submission task, and therefore restrict this flag to work only when IORING_SETUP_SINGLE_ISSUER is also set. Being able to hand pick when tasks are run prevents the problem where there is current work to be done, however task work runs anyway. For example, a common workload would obtain a batch of CQEs, and process each one. Interrupting this to additional taskwork would add latency but not gain anything. If instead task work is deferred to just before more CQEs are obtained then no additional latency is added. The way this is implemented is by trying to keep task work local to a io_ring_ctx, rather than to the submission task. This is required, as the application will want to wake up only a single io_ring_ctx at a time to process work, and so the lists of work have to be kept separate. This has some other benefits like not having to check the task continually in handle_tw_list (and potentially unlocking/locking those), and reducing locks in the submit & process completions path. There are networking cases where using this option can reduce request latency by 50%. For example a contrived example using [1] where the client sends 2k data and receives the same data back while doing some system calls (to trigger task work) shows this reduction. The reason ends up being that if sending responses is delayed by processing task work, then the client side sits idle. Whereas reordering the sends first means that the client runs it's workload in parallel with the local task work. [1]: Using https://github.com/DylanZA/netbench/tree/defer_run Client: ./netbench --client_only 1 --control_port 10000 --host <host> --tx "epoll --threads 16 --per_thread 1 --size 2048 --resp 2048 --workload 1000" Server: ./netbench --server_only 1 --control_port 10000 --rx "io_uring --defer_taskrun 0 --workload 100" --rx "io_uring --defer_taskrun 1 --workload 100" Signed-off-by: Dylan Yudaken <dylany@fb.com> Link: https://lore.kernel.org/r/20220830125013.570060-5-dylany@fb.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.h')
-rw-r--r--io_uring/io_uring.h29
1 files changed, 25 insertions, 4 deletions
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 2f73f83af960..f417d75d7bc1 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -26,7 +26,8 @@ enum {
struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx);
bool io_req_cqe_overflow(struct io_kiocb *req);
-int io_run_task_work_sig(void);
+int io_run_task_work_sig(struct io_ring_ctx *ctx);
+int io_run_local_work(struct io_ring_ctx *ctx);
void io_req_complete_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
void io_req_complete_post(struct io_kiocb *req);
@@ -221,17 +222,37 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
}
-static inline bool io_run_task_work(void)
+static inline int io_run_task_work(void)
{
if (test_thread_flag(TIF_NOTIFY_SIGNAL)) {
__set_current_state(TASK_RUNNING);
clear_notify_signal();
if (task_work_pending(current))
task_work_run();
- return true;
+ return 1;
}
- return false;
+ return 0;
+}
+
+static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
+{
+ int ret = 0;
+ int ret2;
+
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ ret = io_run_local_work(ctx);
+
+ /* want to run this after in case more is added */
+ ret2 = io_run_task_work();
+
+ /* Try propagate error in favour of if tasks were run,
+ * but still make sure to run them if requested
+ */
+ if (ret >= 0)
+ ret += ret2;
+
+ return ret;
}
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)