summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2024-05-11 17:25:50 +0300
committerJens Axboe <axboe@kernel.dk>2024-05-11 17:25:50 +0300
commitad1978dbbd827c1a1a7d22d9cc9ba71989dae48a (patch)
tree6b2b571ea2abb54e6691d3597bc85162fe6746fc /include/linux
parentcf87f46fd34d6c19283d9625a7822f20d90b64a4 (diff)
parentdeb1e496a83557896fe0cca0b8af01c2a97c0dc6 (diff)
downloadlinux-ad1978dbbd827c1a1a7d22d9cc9ba71989dae48a.tar.xz
Merge branch 'for-6.10/io_uring' into net-accept-more
* for-6.10/io_uring: (97 commits) io_uring: support to inject result for NOP io_uring: fail NOP if non-zero op flags is passed in io_uring/net: add IORING_ACCEPT_POLL_FIRST flag io_uring/net: add IORING_ACCEPT_DONTWAIT flag io_uring/filetable: don't unnecessarily clear/reset bitmap io_uring/io-wq: Use set_bit() and test_bit() at worker->flags io_uring/msg_ring: cleanup posting to IOPOLL vs !IOPOLL ring io_uring: Require zeroed sqe->len on provided-buffers send io_uring/notif: disable LAZY_WAKE for linked notifs io_uring/net: fix sendzc lazy wake polling io_uring/msg_ring: reuse ctx->submitter_task read using READ_ONCE instead of re-reading it io_uring/rw: reinstate thread check for retries io_uring/notif: implement notification stacking io_uring/notif: simplify io_notif_flush() net: add callback for setting a ubuf_info to skb net: extend ubuf_info callback to ops structure io_uring/net: support bundles for recv io_uring/net: support bundles for send io_uring/kbuf: add helpers for getting/peeking multiple buffers io_uring/net: add provided buffer support for IORING_OP_SEND ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/io_uring.h6
-rw-r--r--include/linux/io_uring/cmd.h24
-rw-r--r--include/linux/io_uring/net.h18
-rw-r--r--include/linux/io_uring_types.h19
-rw-r--r--include/linux/skbuff.h21
5 files changed, 62 insertions, 26 deletions
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 68ed6697fece..e123d5e17b52 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -11,7 +11,6 @@ void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
-int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
bool io_is_uring_fops(struct file *file);
static inline void io_uring_files_cancel(void)
@@ -45,11 +44,6 @@ static inline const char *io_uring_get_opcode(u8 opcode)
{
return "";
}
-static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
- unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
static inline bool io_is_uring_fops(struct file *file)
{
return false;
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index e453a997c060..447fbfd32215 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -26,12 +26,25 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
+
+/*
+ * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
+ * and the corresponding io_uring request.
+ *
+ * Note: the caller should never hard code @issue_flags and is only allowed
+ * to pass the mask provided by the core io_uring code.
+ */
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
+
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags);
+/*
+ * Note: the caller should never hard code @issue_flags and only use the
+ * mask provided by the core io_uring code.
+ */
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags);
@@ -56,6 +69,17 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
}
#endif
+/*
+ * Polled completions must ensure they are coming from a poll queue, and
+ * hence are completed inside the usual poll handling loops.
+ */
+static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd,
+ ssize_t ret, ssize_t res2)
+{
+ lockdep_assert(in_task());
+ io_uring_cmd_done(ioucmd, ret, res2, 0);
+}
+
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
diff --git a/include/linux/io_uring/net.h b/include/linux/io_uring/net.h
new file mode 100644
index 000000000000..b58f39fed4d5
--- /dev/null
+++ b/include/linux/io_uring/net.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_IO_URING_NET_H
+#define _LINUX_IO_URING_NET_H
+
+struct io_uring_cmd;
+
+#if defined(CONFIG_IO_URING)
+int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
+
+#else
+static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index ac333ea81d31..7a6b190c7da7 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -205,6 +205,7 @@ struct io_submit_state {
bool plug_started;
bool need_plug;
+ bool cq_flush;
unsigned short submit_nr;
unsigned int cqes_count;
struct blk_plug plug;
@@ -219,7 +220,7 @@ struct io_ev_fd {
};
struct io_alloc_cache {
- struct io_wq_work_node list;
+ void **entries;
unsigned int nr_cached;
unsigned int max_cached;
size_t elem_size;
@@ -299,6 +300,8 @@ struct io_ring_ctx {
struct io_hash_table cancel_table_locked;
struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache;
+ struct io_alloc_cache rw_cache;
+ struct io_alloc_cache uring_cache;
/*
* Any cancelable uring_cmd is added to this list in
@@ -341,14 +344,8 @@ struct io_ring_ctx {
unsigned cq_last_tm_flush;
} ____cacheline_aligned_in_smp;
- struct io_uring_cqe completion_cqes[16];
-
spinlock_t completion_lock;
- /* IRQ completion list, under ->completion_lock */
- unsigned int locked_free_nr;
- struct io_wq_work_list locked_free_list;
-
struct list_head io_buffers_comp;
struct list_head cq_overflow_list;
struct io_hash_table cancel_table;
@@ -371,9 +368,6 @@ struct io_ring_ctx {
struct list_head io_buffers_cache;
- /* deferred free list, protected by ->uring_lock */
- struct hlist_head io_buf_list;
-
/* Keep this last, we don't need it for the fast path */
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
@@ -438,8 +432,6 @@ struct io_ring_ctx {
};
struct io_tw_state {
- /* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */
- bool locked;
};
enum {
@@ -480,6 +472,7 @@ enum {
REQ_F_CAN_POLL_BIT,
REQ_F_BL_EMPTY_BIT,
REQ_F_BL_NO_RECYCLE_BIT,
+ REQ_F_BUFFERS_COMMIT_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -558,6 +551,8 @@ enum {
REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT),
/* don't recycle provided buffers for this request */
REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
+ /* buffer ring head needs incrementing on put */
+ REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4ff48eda3f64..1cc72c370190 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -527,6 +527,13 @@ enum {
#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \
SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS)
+struct ubuf_info_ops {
+ void (*complete)(struct sk_buff *, struct ubuf_info *,
+ bool zerocopy_success);
+ /* has to be compatible with skb_zcopy_set() */
+ int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg);
+};
+
/*
* The callback notifies userspace to release buffers when skb DMA is done in
* lower device, the skb last reference should be 0 when calling this.
@@ -536,8 +543,7 @@ enum {
* The desc field is used to track userspace buffer index.
*/
struct ubuf_info {
- void (*callback)(struct sk_buff *, struct ubuf_info *,
- bool zerocopy_success);
+ const struct ubuf_info_ops *ops;
refcount_t refcnt;
u8 flags;
};
@@ -1662,14 +1668,13 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
}
#endif
+extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops;
+
struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
-void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
- bool success);
-
int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from,
size_t length);
@@ -1757,13 +1762,13 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
static inline void net_zcopy_put(struct ubuf_info *uarg)
{
if (uarg)
- uarg->callback(NULL, uarg, true);
+ uarg->ops->complete(NULL, uarg, true);
}
static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
if (uarg) {
- if (uarg->callback == msg_zerocopy_callback)
+ if (uarg->ops == &msg_zerocopy_ubuf_ops)
msg_zerocopy_put_abort(uarg, have_uref);
else if (have_uref)
net_zcopy_put(uarg);
@@ -1777,7 +1782,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
if (uarg) {
if (!skb_zcopy_is_nouarg(skb))
- uarg->callback(skb, uarg, zerocopy_success);
+ uarg->ops->complete(skb, uarg, zerocopy_success);
skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
}