From bc24d6bd32df0be19df3d30e74be4ba56493c0e2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:42 +0100 Subject: io_uring: add notification slot registration Let the userspace to register and unregister notification slots. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a0aa8161fe3ebb2a4cc6e5dbd0cffb96e6881cf5.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4c9b11e2e991..dcfc7a0bda0c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -457,6 +457,10 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* zerocopy notification API */ + IORING_REGISTER_NOTIFIERS = 26, + IORING_UNREGISTER_NOTIFIERS = 27, + /* this goes last */ IORING_REGISTER_LAST }; @@ -503,6 +507,19 @@ struct io_uring_rsrc_update2 { __u32 resv2; }; +struct io_uring_notification_slot { + __u64 tag; + __u64 resv[3]; +}; + +struct io_uring_notification_register { + __u32 nr_slots; + __u32 resv; + __u64 resv2; + __u64 data; + __u64 resv3; +}; + /* Skip updating fd indexes set to this value in the fd table */ #define IORING_REGISTER_FILES_SKIP (-2) -- cgit v1.2.3 From 06a5464be84e4ae48394d34441baf34bf9706827 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:43 +0100 Subject: io_uring: wire send zc request type Add a new io_uring opcode IORING_OP_SENDZC. The main distinction from IORING_OP_SEND is that the user should specify a notification slot index in sqe::notification_idx and the buffers are safe to reuse only when the used notification is flushed and completes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a80387c6a68ce9cf99b3b6ef6f71068468761fb7.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++ io_uring/net.c | 94 +++++++++++++++++++++++++++++++++++++++++++ io_uring/net.h | 3 ++ io_uring/opdef.c | 15 +++++++ 4 files changed, 117 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index dcfc7a0bda0c..82bf2991e9bd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -66,6 +66,10 @@ struct io_uring_sqe { union { __s32 splice_fd_in; __u32 file_index; + struct { + __u16 notification_idx; + __u16 __pad; + }; }; union { struct { @@ -197,6 +201,7 @@ enum io_uring_op { IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, + IORING_OP_SENDZC_NOTIF, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/io_uring/net.c b/io_uring/net.c index bbc9c603641a..89a8678ce69b 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -14,6 +14,7 @@ #include "kbuf.h" #include "alloc_cache.h" #include "net.h" +#include "notif.h" #if defined(CONFIG_NET) struct io_shutdown { @@ -59,6 +60,15 @@ struct io_sr_msg { unsigned int flags; }; +struct io_sendzc { + struct file *file; + void __user *buf; + size_t len; + u16 slot_idx; + unsigned msg_flags; + unsigned flags; +}; + #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -834,6 +844,90 @@ out_free: return ret; } +int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req); + + if (READ_ONCE(sqe->addr2) || READ_ONCE(sqe->__pad2[0]) || + READ_ONCE(sqe->addr3)) + return -EINVAL; + + zc->flags = READ_ONCE(sqe->ioprio); + if (zc->flags & ~IORING_RECVSEND_POLL_FIRST) + return -EINVAL; + + zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); + zc->len = READ_ONCE(sqe->len); + zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; + zc->slot_idx = READ_ONCE(sqe->notification_idx); + if (zc->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + zc->msg_flags |= MSG_CMSG_COMPAT; +#endif + return 0; +} + +int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_sendzc *zc = io_kiocb_to_cmd(req); + struct io_notif_slot *notif_slot; + struct io_notif *notif; + struct msghdr msg; + struct iovec iov; + struct socket *sock; + unsigned msg_flags; + int ret, min_ret = 0; + + if (!(req->flags & REQ_F_POLLED) && + (zc->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; + + if (issue_flags & IO_URING_F_UNLOCKED) + return -EAGAIN; + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + notif_slot = io_get_notif_slot(ctx, zc->slot_idx); + if (!notif_slot) + return -EINVAL; + notif = io_get_notif(ctx, notif_slot); + if (!notif) + return -ENOMEM; + + msg.msg_name = NULL; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + + ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); + if (unlikely(ret)) + return ret; + + msg_flags = zc->msg_flags | MSG_ZEROCOPY; + if (issue_flags & IO_URING_F_NONBLOCK) + msg_flags |= MSG_DONTWAIT; + if (msg_flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + + msg.msg_flags = msg_flags; + msg.msg_ubuf = ¬if->uarg; + msg.sg_from_iter = NULL; + ret = sock_sendmsg(sock, &msg); + + if (unlikely(ret < min_ret)) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + return ret == -ERESTARTSYS ? -EINTR : ret; + } + + io_req_set_res(req, ret, 0); + return IOU_OK; +} + int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_accept *accept = io_kiocb_to_cmd(req); diff --git a/io_uring/net.h b/io_uring/net.h index db20ce9d6546..7c438d39c089 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -52,6 +52,9 @@ int io_connect_prep_async(struct io_kiocb *req); int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_connect(struct io_kiocb *req, unsigned int issue_flags); +int io_sendzc(struct io_kiocb *req, unsigned int issue_flags); +int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); + void io_netmsg_cache_free(struct io_cache_entry *entry); #else static inline void io_netmsg_cache_free(struct io_cache_entry *entry) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index a7b84b43e6c2..7ab19bbf3126 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -470,6 +470,21 @@ const struct io_op_def io_op_defs[] = { .issue = io_uring_cmd, .prep_async = io_uring_cmd_prep_async, }, + [IORING_OP_SENDZC_NOTIF] = { + .name = "SENDZC_NOTIF", + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .audit_skip = 1, + .ioprio = 1, +#if defined(CONFIG_NET) + .prep = io_sendzc_prep, + .issue = io_sendzc, +#else + .prep = io_eopnotsupp_prep, +#endif + + }, }; const char *io_uring_get_opcode(u8 opcode) -- cgit v1.2.3 From 092aeedb750a9fad0f0252d6067fc91d76ca44bd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:45 +0100 Subject: io_uring: allow to pass addr into sendzc Allow to specify an address to zerocopy sends making it more like sendto(2). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/70417a8f7c5b51ab454690bae08adc0c187f89e8.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- io_uring/net.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 82bf2991e9bd..0736e2773a5d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -68,7 +68,7 @@ struct io_uring_sqe { __u32 file_index; struct { __u16 notification_idx; - __u16 __pad; + __u16 addr_len; }; }; union { diff --git a/io_uring/net.c b/io_uring/net.c index 2d04a70b0632..61414d865cd7 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -67,6 +67,8 @@ struct io_sendzc { u16 slot_idx; unsigned msg_flags; unsigned flags; + unsigned addr_len; + void __user *addr; }; #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) @@ -848,8 +850,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sendzc *zc = io_kiocb_to_cmd(req); - if (READ_ONCE(sqe->addr2) || READ_ONCE(sqe->__pad2[0]) || - READ_ONCE(sqe->addr3)) + if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) return -EINVAL; zc->flags = READ_ONCE(sqe->ioprio); @@ -862,6 +863,10 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->slot_idx = READ_ONCE(sqe->notification_idx); if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; + + zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + zc->addr_len = READ_ONCE(sqe->addr_len); + #ifdef CONFIG_COMPAT if (req->ctx->compat) zc->msg_flags |= MSG_CMSG_COMPAT; @@ -871,6 +876,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { + struct sockaddr_storage address; struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req); struct io_notif_slot *notif_slot; @@ -908,6 +914,14 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) return ret; mm_account_pinned_pages(¬if->uarg.mmp, zc->len); + if (zc->addr) { + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = zc->addr_len; + } + msg_flags = zc->msg_flags | MSG_ZEROCOPY; if (issue_flags & IO_URING_F_NONBLOCK) msg_flags |= MSG_DONTWAIT; -- cgit v1.2.3 From 10c7d33ecd51619e453cf6aeee8e326f8ba5cfea Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:46 +0100 Subject: io_uring: sendzc with fixed buffers Allow zerocopy sends to use fixed buffers. There is an optimisation for this case, the network layer don't need to reference the pages, see SKBFL_MANAGED_FRAG_REFS, so io_uring have to ensure validity of fixed buffers until the notifier is released. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e1d8bd1b5934e541d90c1824eb4020ae3f5f43f3.1657643355.git.asml.silence@gmail.com [axboe: fold in 32-bit pointer cast warning fix] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 +++++- io_uring/net.c | 29 ++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0736e2773a5d..f1a9ff9b9ea7 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -272,9 +272,13 @@ enum io_uring_op { * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if * the handler will continue to report * CQEs on behalf of the same SQE. + * + * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in + * the buf_index field. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) -#define IORING_RECV_MULTISHOT (1U << 1) +#define IORING_RECV_MULTISHOT (1U << 1) +#define IORING_RECVSEND_FIXED_BUF (1U << 2) /* * accept flags stored in sqe->ioprio diff --git a/io_uring/net.c b/io_uring/net.c index 61414d865cd7..ab443c52dcfd 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -15,6 +15,7 @@ #include "alloc_cache.h" #include "net.h" #include "notif.h" +#include "rsrc.h" #if defined(CONFIG_NET) struct io_shutdown { @@ -849,13 +850,23 @@ out_free: int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sendzc *zc = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) return -EINVAL; zc->flags = READ_ONCE(sqe->ioprio); - if (zc->flags & ~IORING_RECVSEND_POLL_FIRST) + if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)) return -EINVAL; + if (zc->flags & IORING_RECVSEND_FIXED_BUF) { + unsigned idx = READ_ONCE(sqe->buf_index); + + if (unlikely(idx >= ctx->nr_user_bufs)) + return -EFAULT; + idx = array_index_nospec(idx, ctx->nr_user_bufs); + req->imu = READ_ONCE(ctx->user_bufs[idx]); + io_req_set_rsrc_node(req, ctx, 0); + } zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); zc->len = READ_ONCE(sqe->len); @@ -909,10 +920,18 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_controllen = 0; msg.msg_namelen = 0; - ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); - if (unlikely(ret)) - return ret; - mm_account_pinned_pages(¬if->uarg.mmp, zc->len); + if (zc->flags & IORING_RECVSEND_FIXED_BUF) { + ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, + (u64)(uintptr_t)zc->buf, zc->len); + if (unlikely(ret)) + return ret; + } else { + ret = import_single_range(WRITE, zc->buf, zc->len, &iov, + &msg.msg_iter); + if (unlikely(ret)) + return ret; + mm_account_pinned_pages(¬if->uarg.mmp, zc->len); + } if (zc->addr) { ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); -- cgit v1.2.3 From 63809137ebb58f0aa2ce359117422686e3304f45 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:47 +0100 Subject: io_uring: flush notifiers after sendzc Allow to flush notifiers as a part of sendzc request by setting IORING_SENDZC_FLUSH flag. When the sendzc request succeedes it will flush the used [active] notifier. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e0b4d9a6797e2fd6092824fe42953db7a519bbc8.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ io_uring/io_uring.c | 11 +---------- io_uring/io_uring.h | 10 ++++++++++ io_uring/net.c | 5 ++++- io_uring/notif.c | 2 +- io_uring/notif.h | 11 +++++++++++ 6 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f1a9ff9b9ea7..45272eb37d10 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -275,10 +275,14 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. + * + * IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful + * successful. Only for zerocopy sends. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) +#define IORING_RECVSEND_NOTIF_FLUSH (1U << 3) /* * accept flags stored in sqe->ioprio diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cae11374456e..1d600a63643b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -621,7 +621,7 @@ void __io_put_task(struct task_struct *task, int nr) put_task_struct_many(task, nr); } -static void io_task_refs_refill(struct io_uring_task *tctx) +void io_task_refs_refill(struct io_uring_task *tctx) { unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; @@ -630,15 +630,6 @@ static void io_task_refs_refill(struct io_uring_task *tctx) tctx->cached_refs += refill; } -static inline void io_get_task_refs(int nr) -{ - struct io_uring_task *tctx = current->io_uring; - - tctx->cached_refs -= nr; - if (unlikely(tctx->cached_refs < 0)) - io_task_refs_refill(tctx); -} - static __cold void io_uring_drop_tctx_refs(struct task_struct *task) { struct io_uring_task *tctx = task->io_uring; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 66bfd880d07f..cc81a9d1fd4d 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -74,6 +74,7 @@ void io_wq_submit_work(struct io_wq_work *work); void io_free_req(struct io_kiocb *req); void io_queue_next(struct io_kiocb *req); void __io_put_task(struct task_struct *task, int nr); +void io_task_refs_refill(struct io_uring_task *tctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); @@ -270,4 +271,13 @@ static inline void io_put_task(struct task_struct *task, int nr) __io_put_task(task, nr); } +static inline void io_get_task_refs(int nr) +{ + struct io_uring_task *tctx = current->io_uring; + + tctx->cached_refs -= nr; + if (unlikely(tctx->cached_refs < 0)) + io_task_refs_refill(tctx); +} + #endif diff --git a/io_uring/net.c b/io_uring/net.c index ab443c52dcfd..9ac2ce37c522 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -856,7 +856,8 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; zc->flags = READ_ONCE(sqe->ioprio); - if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)) + if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | + IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH)) return -EINVAL; if (zc->flags & IORING_RECVSEND_FIXED_BUF) { unsigned idx = READ_ONCE(sqe->buf_index); @@ -958,6 +959,8 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) return ret == -ERESTARTSYS ? -EINTR : ret; } + if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) + io_notif_slot_flush_submit(notif_slot, 0); io_req_set_res(req, ret, 0); return IOU_OK; } diff --git a/io_uring/notif.c b/io_uring/notif.c index c5179e5c1cd6..a93887451bbb 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -133,7 +133,7 @@ struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx, return notif; } -static void io_notif_slot_flush(struct io_notif_slot *slot) +void io_notif_slot_flush(struct io_notif_slot *slot) __must_hold(&ctx->uring_lock) { struct io_notif *notif = slot->notif; diff --git a/io_uring/notif.h b/io_uring/notif.h index 00efe164bdc4..6cd73d7b965b 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -54,6 +54,7 @@ int io_notif_register(struct io_ring_ctx *ctx, int io_notif_unregister(struct io_ring_ctx *ctx); void io_notif_cache_purge(struct io_ring_ctx *ctx); +void io_notif_slot_flush(struct io_notif_slot *slot); struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx, struct io_notif_slot *slot); @@ -74,3 +75,13 @@ static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, idx = array_index_nospec(idx, ctx->nr_notif_slots); return &ctx->notif_slots[idx]; } + +static inline void io_notif_slot_flush_submit(struct io_notif_slot *slot, + unsigned int issue_flags) +{ + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + slot->notif->task = current; + io_get_task_refs(1); + } + io_notif_slot_flush(slot); +} -- cgit v1.2.3 From 4379d5f15b3fd4224c37841029178aa8082a242e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:48 +0100 Subject: io_uring: rename IORING_OP_FILES_UPDATE IORING_OP_FILES_UPDATE will be a more generic opcode serving different resource types, rename it into IORING_OP_RSRC_UPDATE and add subtype handling. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0a907133907d9af3415a8a7aa1802c6aa97c03c6.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +++++++++++- io_uring/opdef.c | 9 +++++---- io_uring/rsrc.c | 17 +++++++++++++++-- io_uring/rsrc.h | 4 ++-- 4 files changed, 33 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 45272eb37d10..210a00ab6301 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -174,7 +174,8 @@ enum io_uring_op { IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, - IORING_OP_FILES_UPDATE, + IORING_OP_RSRC_UPDATE, + IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, @@ -223,6 +224,7 @@ enum io_uring_op { #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) + /* * sqe->splice_flags * extends splice(2) flags @@ -289,6 +291,14 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) + +/* + * IORING_OP_RSRC_UPDATE flags + */ +enum { + IORING_RSRC_UPDATE_FILES, +}; + /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 7ab19bbf3126..72dd2b2d8a9d 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -246,12 +246,13 @@ const struct io_op_def io_op_defs[] = { .prep = io_close_prep, .issue = io_close, }, - [IORING_OP_FILES_UPDATE] = { + [IORING_OP_RSRC_UPDATE] = { .audit_skip = 1, .iopoll = 1, - .name = "FILES_UPDATE", - .prep = io_files_update_prep, - .issue = io_files_update, + .name = "RSRC_UPDATE", + .prep = io_rsrc_update_prep, + .issue = io_rsrc_update, + .ioprio = 1, }, [IORING_OP_STATX] = { .audit_skip = 1, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7f66b0e25674..fc2b337e6c25 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -21,6 +21,7 @@ struct io_rsrc_update { u64 arg; u32 nr_args; u32 offset; + int type; }; static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, @@ -657,7 +658,7 @@ __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; } -int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rsrc_update *up = io_kiocb_to_cmd(req); @@ -671,6 +672,7 @@ int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!up->nr_args) return -EINVAL; up->arg = READ_ONCE(sqe->addr); + up->type = READ_ONCE(sqe->ioprio); return 0; } @@ -713,7 +715,7 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req, return ret; } -int io_files_update(struct io_kiocb *req, unsigned int issue_flags) +static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req); struct io_ring_ctx *ctx = req->ctx; @@ -742,6 +744,17 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_rsrc_update *up = io_kiocb_to_cmd(req); + + switch (up->type) { + case IORING_RSRC_UPDATE_FILES: + return io_files_update(req, issue_flags); + } + return -EINVAL; +} + int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, struct io_rsrc_node *node, void *rsrc) { diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index af342fd239d0..21813a23215f 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -167,6 +167,6 @@ static inline u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) return &data->tags[table_idx][off]; } -int io_files_update(struct io_kiocb *req, unsigned int issue_flags); -int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags); +int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); #endif -- cgit v1.2.3 From 492dddb4f6e3a5839c27d41ff1fecdbe6c3ab851 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:49 +0100 Subject: io_uring: add zc notification flush requests Overlay notification control onto IORING_OP_RSRC_UPDATE (former IORING_OP_FILES_UPDATE). It allows to flush a range of zc notifications from slots with indexes [sqe->off, sqe->off+sqe->len). If sqe->arg is not zero, it also copies sqe->arg as a new tag for all flushed notifications. Note, it doesn't flush a notification of a slot if there was no requests attached to it (since last flush or registration). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/df13e2363400682a73dd9e71c3b990b8d1ff0333.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + io_uring/rsrc.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 210a00ab6301..1463cfecb56b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -297,6 +297,7 @@ enum io_uring_op { */ enum { IORING_RSRC_UPDATE_FILES, + IORING_RSRC_UPDATE_NOTIF, }; /* diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index fc2b337e6c25..9165fdf64269 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -15,6 +15,7 @@ #include "io_uring.h" #include "openclose.h" #include "rsrc.h" +#include "notif.h" struct io_rsrc_update { struct file *file; @@ -744,6 +745,41 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +static int io_notif_update(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_rsrc_update *up = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + unsigned len = up->nr_args; + unsigned idx_end, idx = up->offset; + int ret = 0; + + io_ring_submit_lock(ctx, issue_flags); + if (unlikely(check_add_overflow(idx, len, &idx_end))) { + ret = -EOVERFLOW; + goto out; + } + if (unlikely(idx_end > ctx->nr_notif_slots)) { + ret = -EINVAL; + goto out; + } + + for (; idx < idx_end; idx++) { + struct io_notif_slot *slot = &ctx->notif_slots[idx]; + + if (!slot->notif) + continue; + if (up->arg) + slot->tag = up->arg; + io_notif_slot_flush_submit(slot, issue_flags); + } +out: + io_ring_submit_unlock(ctx, issue_flags); + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} + int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req); @@ -751,6 +787,8 @@ int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) switch (up->type) { case IORING_RSRC_UPDATE_FILES: return io_files_update(req, issue_flags); + case IORING_RSRC_UPDATE_NOTIF: + return io_notif_update(req, issue_flags); } return -EINVAL; } -- cgit v1.2.3