summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid Wei <dw@davidwei.uk>2025-02-15 03:09:36 +0300
committerJens Axboe <axboe@kernel.dk>2025-02-17 15:41:03 +0300
commit6f377873cb23905009759b7366b9fe85c2a6ff37 (patch)
tree174674c6e7cc5055cdf90ccdc99e7b3032c57483 /include
parent5c496ff11df179c32db960cf10af90a624a035eb (diff)
downloadlinux-6f377873cb23905009759b7366b9fe85c2a6ff37.tar.xz
io_uring/zcrx: add interface queue and refill queue
Add a new object called an interface queue (ifq) that represents a net rx queue that has been configured for zero copy. Each ifq is registered using a new registration opcode IORING_REGISTER_ZCRX_IFQ. The refill queue is allocated by the kernel and mapped by userspace using a new offset IORING_OFF_RQ_RING, in a similar fashion to the main SQ/CQ. It is used by userspace to return buffers that it is done with, which will then be re-used by the netdev again. The main CQ ring is used to notify userspace of received data by using the upper 16 bytes of a big CQE as a new struct io_uring_zcrx_cqe. Each entry contains the offset + len to the data. For now, each io_uring instance only has a single ifq. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: David Wei <dw@davidwei.uk> Acked-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20250215000947.789731-2-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include')
-rw-r--r--include/linux/io_uring_types.h6
-rw-r--r--include/uapi/linux/io_uring.h43
2 files changed, 48 insertions, 1 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 123e69368730..c0fe8a00fe53 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -40,6 +40,8 @@ enum io_uring_cmd_flags {
IO_URING_F_TASK_DEAD = (1 << 13),
};
+struct io_zcrx_ifq;
+
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -382,6 +384,8 @@ struct io_ring_ctx {
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
+ struct io_zcrx_ifq *ifq;
+
u32 pers_next;
struct xarray personalities;
@@ -434,6 +438,8 @@ struct io_ring_ctx {
struct io_mapped_region ring_region;
/* used for optimised request parameter and wait argument passing */
struct io_mapped_region param_region;
+ /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */
+ struct io_mapped_region zcrx_region;
};
/*
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e11c82638527..6a1632d0fba1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -639,7 +639,8 @@ enum io_uring_register_op {
/* send MSG_RING without having a ring */
IORING_REGISTER_SEND_MSG_RING = 31,
- /* 32 reserved for zc rx */
+ /* register a netdev hw rx queue for zerocopy */
+ IORING_REGISTER_ZCRX_IFQ = 32,
/* resize CQ ring */
IORING_REGISTER_RESIZE_RINGS = 33,
@@ -956,6 +957,46 @@ enum io_uring_socket_op {
SOCKET_URING_OP_SETSOCKOPT,
};
+/* Zero copy receive refill queue entry */
+struct io_uring_zcrx_rqe {
+ __u64 off;
+ __u32 len;
+ __u32 __pad;
+};
+
+struct io_uring_zcrx_cqe {
+ __u64 off;
+ __u64 __pad;
+};
+
+/* The bit from which area id is encoded into offsets */
+#define IORING_ZCRX_AREA_SHIFT 48
+#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
+
+struct io_uring_zcrx_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 rqes;
+ __u32 __resv2;
+ __u64 __resv[2];
+};
+
+/*
+ * Argument for IORING_REGISTER_ZCRX_IFQ
+ */
+struct io_uring_zcrx_ifq_reg {
+ __u32 if_idx;
+ __u32 if_rxq;
+ __u32 rq_entries;
+ __u32 flags;
+
+ __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
+ __u64 region_ptr; /* struct io_uring_region_desc * */
+
+ struct io_uring_zcrx_offsets offsets;
+ __u64 __resv[4];
+};
+
#ifdef __cplusplus
}
#endif