summaryrefslogtreecommitdiff
path: root/include/uapi/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-16 10:23:59 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-16 10:23:59 +0300
commit9b40ba14edcdf70240af8114092a76f75f070774 (patch)
tree7fd6958aa2c7ab93d86d2afed222b37c67e1c21b /include/uapi/linux
parentd29fd593e6836c96c6fd6df2b0cc6a47dda21b74 (diff)
parentd9b710f683dc68b5c0b7dd0c6c64aeb5d27a1ac4 (diff)
downloadlinux-9b40ba14edcdf70240af8114092a76f75f070774.tar.xz
Merge tag 'for-7.2/io_uring-20260615' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe: - Rework the task_work infrastructure. Both the local (DEFER_TASKRUN) and the normal (tctx) task_work lists were llist based, which is LIFO ordered, and hence each run had to do an O(n) list reversal pass first to restore queue order. Additionally, to cap the amount of task_work run, each method needed a retry list as well. Add a lockless MPCS FIFO queue (based on Dmitry Vyukov's intrusive MPSC algorithm) and switch both task_work lists to it. It performs better than llists and we can then also ditch the retry lists as well as entries are popped one-at-the-time. On top of those changes, run the tctx fallback task_work directly and remove the now-unused per-ctx fallback machinery entirely. - zcrx user notifications. Add a mechanism for zcrx to communicate conditions back to userspace via a dedicated CQE, with the initial users being notification on running out of buffers and on a frag copy fallback, plus shared-memory notification statistics. Alongside that, a series of zcrx reliability and cleanup fixes: more reliable scrubbing, poisoning pointers on unregistration, dropping an extra ifq close, adding a ctx back-pointer, reordering fd allocation in the export path, and killing a dead 'sock' member. - Allow using io_uring registered buffers for plain SEND and RECV, not just for the zero-copy send path. This enables targets like ublk's NBD backend to push/pull IO data directly to/from a registered buffer over a plain send/recv on a TCP socket. - Registered buffer improvements: account huge pages correctly, bump the io_mapped_ubuf length field to size_t, and raise the previous 1GB registered buffer size limit. - Restrict the ctx access exposed to io_uring BPF struct_ops programs by handing them an opaque type rather than the full io_ring_ctx, and add a separate MAINTAINERS entry for the bpf-ops code. - Allow opcode filtering on IORING_OP_CONNECT. - Validate ring-provided buffer addresses with access_ok(), and align the legacy buffer add limit with MAX_BIDS_PER_BGID. - Various other cleanups and minor fixes, including avoiding msghdr async data on connect/bind, dropping async_size for OP_LISTEN, making the POLL_FIRST receive side checks consistent, re-checking IO_WQ_BIT_EXIT for each linked work item, and using trace_call__##name() at guarded tracepoint call sites. * tag 'for-7.2/io_uring-20260615' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (31 commits) io_uring/bpf-ops: add a separate maintainer entry io_uring/net: make POLL_FIRST receive side checks consistent io_uring: remove the per-ctx fallback task_work machinery io_uring: run the tctx task_work fallback directly io_uring: switch normal task_work to a mpscq io_uring: switch local task_work to a mpscq io_uring/mpscq: add lockless multi-producer, single-consumer FIFO queue io_uring: grab RCU read lock marking task run io_uring/zcrx: kill dead 'sock' member in struct io_zcrx_args io_uring/kbuf: validate ring provided buffer addresses with access_ok() io_uring/net: support registered buffer for plain send and recv io_uring/nop: Drop a wrong comment in struct io_nop io_uring/net: Remove async_size for OP_LISTEN io_uring/net: Avoid msghdr on op_connect/op_bind async data io_uring/bpf-ops: restrict ctx access to BPF io_uring/io-wq: re-check IO_WQ_BIT_EXIT for each linked work item io_uring/kbuf: align legacy buffer add limit with MAX_BIDS_PER_BGID io_uring/zcrx: add shared-memory notification statistics io_uring/zcrx: notify user on frag copy fallback io_uring/zcrx: notify user when out of buffers ...
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/io_uring/bpf_filter.h16
-rw-r--r--include/uapi/linux/io_uring/query.h12
-rw-r--r--include/uapi/linux/io_uring/zcrx.h36
3 files changed, 63 insertions, 1 deletions
diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h
index 1b461d792a7b..ce7d78ab13b3 100644
--- a/include/uapi/linux/io_uring/bpf_filter.h
+++ b/include/uapi/linux/io_uring/bpf_filter.h
@@ -27,6 +27,22 @@ struct io_uring_bpf_ctx {
__u64 mode;
__u64 resolve;
} open;
+ /*
+ * For CONNECT: fields are populated only when addr_len covers
+ * them; unpopulated fields are zero from the caller-side memset
+ * in io_uring_populate_bpf_ctx(). port and v4_addr are network
+ * byte order. Filters may only issue BPF_LD|BPF_W|BPF_ABS at
+ * 4-byte aligned offsets; load + mask for sub-word fields.
+ */
+ struct {
+ __u32 family; /* sa_family_t zero-extended */
+ __be16 port;
+ __u8 pad[2];
+ union {
+ __be32 v4_addr;
+ __u8 v6_addr[16];
+ };
+ } connect;
};
};
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
index 95500759cc13..1a68eca7c6b4 100644
--- a/include/uapi/linux/io_uring/query.h
+++ b/include/uapi/linux/io_uring/query.h
@@ -23,6 +23,7 @@ enum {
IO_URING_QUERY_OPCODES = 0,
IO_URING_QUERY_ZCRX = 1,
IO_URING_QUERY_SCQ = 2,
+ IO_URING_QUERY_ZCRX_NOTIF = 3,
__IO_URING_QUERY_MAX,
};
@@ -62,6 +63,17 @@ struct io_uring_query_zcrx {
__u64 __resv2;
};
+struct io_uring_query_zcrx_notif {
+ /* Bitmask of supported ZCRX_NOTIF_* flags */
+ __u32 notif_flags;
+ /* Size of io_uring_zcrx_notif_stats */
+ __u32 notif_stats_size;
+ /* Required alignment for the stats struct within the region (ie stats_offset) */
+ __u32 notif_stats_off_alignment;
+ __u32 __resv1;
+ __u64 __resv2[4];
+};
+
struct io_uring_query_scq {
/* The SQ/CQ rings header size */
__u64 hdr_size;
diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
index 5ce02c7a6096..15c05c45ce36 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -65,6 +65,32 @@ enum zcrx_features {
* value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
*/
ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
+ ZCRX_FEATURE_NOTIFICATION = 1 << 1,
+};
+
+enum zcrx_notification_type {
+ ZCRX_NOTIF_NO_BUFFERS,
+ ZCRX_NOTIF_COPY,
+
+ __ZCRX_NOTIF_TYPE_LAST,
+};
+
+enum zcrx_notification_desc_flags {
+ /* If set, stats_offset holds a valid offset to a notif_stats struct */
+ ZCRX_NOTIF_DESC_FLAG_STATS = 1 << 0,
+};
+
+struct zcrx_notif_stats {
+ __u64 copy_count; /* cumulative copy-fallback CQEs */
+ __u64 copy_bytes; /* cumulative bytes copied */
+};
+
+struct zcrx_notification_desc {
+ __u64 user_data;
+ __u32 type_mask;
+ __u32 flags; /* see enum zcrx_notification_desc_flags */
+ __u64 stats_offset; /* offset from the beginning of refill ring region for stats */
+ __u64 __resv2[9];
};
/*
@@ -82,12 +108,14 @@ struct io_uring_zcrx_ifq_reg {
struct io_uring_zcrx_offsets offsets;
__u32 zcrx_id;
__u32 rx_buf_len;
- __u64 __resv[3];
+ __u64 notif_desc; /* see struct zcrx_notification_desc */
+ __u64 __resv[2];
};
enum zcrx_ctrl_op {
ZCRX_CTRL_FLUSH_RQ,
ZCRX_CTRL_EXPORT,
+ ZCRX_CTRL_ARM_NOTIFICATION,
__ZCRX_CTRL_LAST,
};
@@ -101,6 +129,11 @@ struct zcrx_ctrl_export {
__u32 __resv1[11];
};
+struct zcrx_ctrl_arm_notif {
+ __u32 notif_type;
+ __u32 __resv[11];
+};
+
struct zcrx_ctrl {
__u32 zcrx_id;
__u32 op; /* see enum zcrx_ctrl_op */
@@ -109,6 +142,7 @@ struct zcrx_ctrl {
union {
struct zcrx_ctrl_export zc_export;
struct zcrx_ctrl_flush_rq zc_flush;
+ struct zcrx_ctrl_arm_notif zc_arm_notif;
};
};