summaryrefslogtreecommitdiff
path: root/include/uapi/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 19:56:23 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 19:56:23 +0300
commit5832d26433f2bd0d28f8b12526e3c2fdb203507f (patch)
treec0cdd1df24131bee06e1318cd453e2790fdf654a /include/uapi/linux
parent77633c77eee37ddc160493a4cf6070c166f47dc0 (diff)
parentef9f603fd3d4b7937f2cdbce40e47df0a54b2a55 (diff)
downloadlinux-5832d26433f2bd0d28f8b12526e3c2fdb203507f.tar.xz
Merge tag 'for-6.18/io_uring-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe: - Store ring provided buffers locally for the users, rather than stuff them into struct io_kiocb. These types of buffers must always be fully consumed or recycled in the current context, and leaving them in struct io_kiocb is hence not a good ideas as that struct has a vastly different life time. Basically just an architecture cleanup that can help prevent issues with ring provided buffers in the future. - Support for mixed CQE sizes in the same ring. Before this change, a CQ ring either used the default 16b CQEs, or it was setup with 32b CQE using IORING_SETUP_CQE32. For use cases where a few 32b CQEs were needed, this caused everything else to use big CQEs. This is wasteful both in terms of memory usage, but also memory bandwidth for the posted CQEs. With IORING_SETUP_CQE_MIXED, applications may use request types that post both normal 16b and big 32b CQEs on the same ring. - Add helpers for async data management, to make it harder for opcode handlers to mess it up. - Add support for multishot for uring_cmd, which ublk can use. This helps improve efficiency, by providing a persistent request type that can trigger multiple CQEs. - Add initial support for ring feature querying. We had basic support for probe operations, but the API isn't great. Rather than expand that, add support for QUERY which is easily expandable and can cover a lot more cases than the existing probe support. This will help applications get a better idea of what operations are supported on a given host. - zcrx improvements from Pavel: - Improve refill entry alignment for better caching - Various cleanups, especially around deduplicating normal memory vs dmabuf setup. - Generalisation of the niov size (Patch 12). It's still hard coded to PAGE_SIZE on init, but will let the user to specify the rx buffer length on setup. - Syscall / synchronous bufer return. It'll be used as a slow fallback path for returning buffers when the refill queue is full. Useful for tolerating slight queue size misconfiguration or with inconsistent load. - Accounting more memory to cgroups. - Additional independent cleanups that will also be useful for mutli-area support. - Various fixes and cleanups * tag 'for-6.18/io_uring-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (68 commits) io_uring/cmd: drop unused res2 param from io_uring_cmd_done() io_uring: fix nvme's 32b cqes on mixed cq io_uring/query: cap number of queries io_uring/query: prevent infinite loops io_uring/zcrx: account niov arrays to cgroup io_uring/zcrx: allow synchronous buffer return io_uring/zcrx: introduce io_parse_rqe() io_uring/zcrx: don't adjust free cache space io_uring/zcrx: use guards for the refill lock io_uring/zcrx: reduce netmem scope in refill io_uring/zcrx: protect netdev with pp_lock io_uring/zcrx: rename dma lock io_uring/zcrx: make niov size variable io_uring/zcrx: set sgt for umem area io_uring/zcrx: remove dmabuf_offset io_uring/zcrx: deduplicate area mapping io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() io_uring/zcrx: check all niovs filled with dma addresses io_uring/zcrx: move area reg checks into io_import_area io_uring/zcrx: don't pass slot to io_zcrx_create_area ...
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/io_uring.h38
-rw-r--r--include/uapi/linux/io_uring/query.h41
2 files changed, 78 insertions, 1 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6957dc539d83..a0cc1cc0dd01 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -225,6 +225,12 @@ enum io_uring_sqe_flags_bit {
/* Use hybrid poll in iopoll process */
#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
+/*
+ * Allow both 16b and 32b CQEs. If a 32b CQE is posted, it will have
+ * IORING_CQE_F_32 set in cqe->flags.
+ */
+#define IORING_SETUP_CQE_MIXED (1U << 18)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -298,9 +304,13 @@ enum io_uring_op {
* sqe->uring_cmd_flags top 8bits aren't available for userspace
* IORING_URING_CMD_FIXED use registered buffer; pass this flag
* along with setting sqe->buf_index.
+ * IORING_URING_CMD_MULTISHOT must be used with buffer select, like other
+ * multishot commands. Not compatible with
+ * IORING_URING_CMD_FIXED, for now.
*/
#define IORING_URING_CMD_FIXED (1U << 0)
-#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED
+#define IORING_URING_CMD_MULTISHOT (1U << 1)
+#define IORING_URING_CMD_MASK (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
/*
@@ -454,6 +464,7 @@ enum io_uring_msg_ring_flags {
#define IORING_NOP_FIXED_FILE (1U << 2)
#define IORING_NOP_FIXED_BUFFER (1U << 3)
#define IORING_NOP_TW (1U << 4)
+#define IORING_NOP_CQE32 (1U << 5)
/*
* IO completion data structure (Completion Queue Entry)
@@ -487,12 +498,22 @@ struct io_uring_cqe {
* other provided buffer type, all completions with a
* buffer passed back is automatically returned to the
* application.
+ * IORING_CQE_F_SKIP If set, then the application/liburing must ignore this
+ * CQE. It's only purpose is to fill a gap in the ring,
+ * if a large CQE is attempted posted when the ring has
+ * just a single small CQE worth of space left before
+ * wrapping.
+ * IORING_CQE_F_32 If set, this is a 32b/big-cqe posting. Use with rings
+ * setup in a mixed CQE mode, where both 16b and 32b
+ * CQEs may be posted to the CQ ring.
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
#define IORING_CQE_F_NOTIF (1U << 3)
#define IORING_CQE_F_BUF_MORE (1U << 4)
+#define IORING_CQE_F_SKIP (1U << 5)
+#define IORING_CQE_F_32 (1U << 15)
#define IORING_CQE_BUFFER_SHIFT 16
@@ -665,6 +686,12 @@ enum io_uring_register_op {
IORING_REGISTER_MEM_REGION = 34,
+ /* query various aspects of io_uring, see linux/io_uring/query.h */
+ IORING_REGISTER_QUERY = 35,
+
+ /* return zcrx buffers back into circulation */
+ IORING_REGISTER_ZCRX_REFILL = 36,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -1046,6 +1073,15 @@ struct io_uring_zcrx_ifq_reg {
__u64 __resv[3];
};
+struct io_uring_zcrx_sync_refill {
+ __u32 zcrx_id;
+ /* the number of entries to return */
+ __u32 nr_entries;
+ /* pointer to an array of struct io_uring_zcrx_rqe */
+ __u64 rqes;
+ __u64 __resv[2];
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
new file mode 100644
index 000000000000..5d754322a27c
--- /dev/null
+++ b/include/uapi/linux/io_uring/query.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring query interface.
+ */
+#ifndef LINUX_IO_URING_QUERY_H
+#define LINUX_IO_URING_QUERY_H
+
+#include <linux/types.h>
+
+struct io_uring_query_hdr {
+ __u64 next_entry;
+ __u64 query_data;
+ __u32 query_op;
+ __u32 size;
+ __s32 result;
+ __u32 __resv[3];
+};
+
+enum {
+ IO_URING_QUERY_OPCODES = 0,
+
+ __IO_URING_QUERY_MAX,
+};
+
+/* Doesn't require a ring */
+struct io_uring_query_opcode {
+ /* The number of supported IORING_OP_* opcodes */
+ __u32 nr_request_opcodes;
+ /* The number of supported IORING_[UN]REGISTER_* opcodes */
+ __u32 nr_register_opcodes;
+ /* Bitmask of all supported IORING_FEAT_* flags */
+ __u64 feature_flags;
+ /* Bitmask of all supported IORING_SETUP_* flags */
+ __u64 ring_setup_flags;
+ /* Bitmask of all supported IORING_ENTER_** flags */
+ __u64 enter_flags;
+ /* Bitmask of all supported IOSQE_* flags */
+ __u64 sqe_flags;
+};
+
+#endif