summaryrefslogtreecommitdiff
path: root/include/uapi/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 21:39:36 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 21:39:36 +0300
commit6f59de9bc0d576eb5a5edfea470527902315e924 (patch)
treeeeb477da0caa8e9569074f448169fb792fac90d9 /include/uapi/linux
parent3e406741b19890c3d8a2ed126aa7c23b106ca9e1 (diff)
parent533c87e2ed742454957f14d7bef9f48d5a72e72d (diff)
downloadlinux-6f59de9bc0d576eb5a5edfea470527902315e924.tar.xz
Merge tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - ublk updates: - Add support for updating the size of a ublk instance - Zero-copy improvements - Auto-registering of buffers for zero-copy - Series simplifying and improving GET_DATA and request lookup - Series adding quiesce support - Lots of selftests additions - Various cleanups - NVMe updates via Christoph: - add per-node DMA pools and use them for PRP/SGL allocations (Caleb Sander Mateos, Keith Busch) - nvme-fcloop refcounting fixes (Daniel Wagner) - support delayed removal of the multipath node and optionally support the multipath node for private namespaces (Nilay Shroff) - support shared CQs in the PCI endpoint target code (Wilfred Mallawa) - support admin-queue only authentication (Hannes Reinecke) - use the crc32c library instead of the crypto API (Eric Biggers) - misc cleanups (Christoph Hellwig, Marcelo Moreira, Hannes Reinecke, Leon Romanovsky, Gustavo A. R. Silva) - MD updates via Yu: - Fix that normal IO can be starved by sync IO, found by mkfs on newly created large raid5, with some clean up patches for bdev inflight counters - Clean up brd, getting rid of atomic kmaps and bvec poking - Add loop driver specifically for zoned IO testing - Eliminate blk-rq-qos calls with a static key, if not enabled - Improve hctx locking for when a plug has IO for multiple queues pending - Remove block layer bouncing support, which in turn means we can remove the per-node bounce stat as well - Improve blk-throttle support - Improve delay support for blk-throttle - Improve brd discard support - Unify IO scheduler switching. This should also fix a bunch of lockdep warnings we've been seeing, after enabling lockdep support for queue freezing/unfreezeing - Add support for block write streams via FDP (flexible data placement) on NVMe - Add a bunch of block helpers, facilitating the removal of a bunch of duplicated boilerplate code - Remove obsolete BLK_MQ pci and virtio Kconfig options - Add atomic/untorn write support to blktrace - Various little cleanups and fixes * tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux: (186 commits) selftests: ublk: add test for UBLK_F_QUIESCE ublk: add feature UBLK_F_QUIESCE selftests: ublk: add test case for UBLK_U_CMD_UPDATE_SIZE traceevent/block: Add REQ_ATOMIC flag to block trace events ublk: run auto buf unregisgering in same io_ring_ctx with registering io_uring: add helper io_uring_cmd_ctx_handle() ublk: remove io argument from ublk_auto_buf_reg_fallback() ublk: handle ublk_set_auto_buf_reg() failure correctly in ublk_fetch() selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK selftests: ublk: support UBLK_F_AUTO_BUF_REG ublk: support UBLK_AUTO_BUF_REG_FALLBACK ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG ublk: prepare for supporting to register request buffer automatically ublk: convert to refcount_t selftests: ublk: make IO & device removal test more stressful nvme: rename nvme_mpath_shutdown_disk to nvme_mpath_remove_disk nvme: introduce multipath_always_on module param nvme-multipath: introduce delayed removal of the multipath head node nvme-pci: derive and better document max segments limits nvme-pci: use struct_size for allocation struct nvme_dev ...
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/blktrace_api.h2
-rw-r--r--include/uapi/linux/io_uring.h4
-rw-r--r--include/uapi/linux/ublk_cmd.h128
3 files changed, 133 insertions, 1 deletions
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 690621b610e5..1bfb635e309b 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -49,7 +49,7 @@ enum blktrace_act {
__BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
__BLK_TA_INSERT, /* insert request */
__BLK_TA_SPLIT, /* bio was split */
- __BLK_TA_BOUNCE, /* bio was bounced */
+ __BLK_TA_BOUNCE, /* unused, was: bio was bounced */
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8f1fc12bac46..50e372ea97c5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -93,6 +93,10 @@ struct io_uring_sqe {
__u16 addr_len;
__u16 __pad3[1];
};
+ struct {
+ __u8 write_stream;
+ __u8 __pad4[3];
+ };
};
union {
struct {
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 583b86681c93..56c7e3fc666f 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -51,6 +51,10 @@
_IOR('u', 0x13, struct ublksrv_ctrl_cmd)
#define UBLK_U_CMD_DEL_DEV_ASYNC \
_IOR('u', 0x14, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_UPDATE_SIZE \
+ _IOWR('u', 0x15, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_QUIESCE_DEV \
+ _IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
/*
* 64bits are enough now, and it should be easy to extend in case of
@@ -211,6 +215,63 @@
*/
#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
+/*
+ * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE
+ * New size is passed in cmd->data[0] and is in units of sectors
+ */
+#define UBLK_F_UPDATE_SIZE (1ULL << 10)
+
+/*
+ * request buffer is registered automatically to uring_cmd's io_uring
+ * context before delivering this io command to ublk server, meantime
+ * it is un-registered automatically when completing this io command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table on the same `io_ring_ctx`
+ * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`.
+ * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's
+ * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF`
+ * manually, otherwise this ublk request won't complete.
+ *
+ * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
+ * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
+ * the definition of ublk_sqe_addr_to_auto_buf_reg()
+ *
+ * - pass buffer index from `ublk_auto_buf_reg.index`
+ *
+ * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
+ *
+ * - pass flags from `ublk_auto_buf_reg.flags` if needed
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ *
+ * If wrong data or flags are provided, both IO_FETCH_REQ and
+ * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
+ * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
+ * successfully
+ */
+#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
+
+/*
+ * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device,
+ * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or
+ * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for
+ * handling `UBLK_IO_RES_ABORT` correctly.
+ *
+ * Typical use case is for supporting to upgrade ublk server application,
+ * meantime keep ublk block device persistent during the period.
+ *
+ * This feature is only available when UBLK_F_USER_RECOVERY is enabled.
+ *
+ * Note, this command returns -EBUSY in case that all IO commands are being
+ * handled by ublk server and not completed in specified time period which
+ * is passed from the control command parameter.
+ */
+#define UBLK_F_QUIESCE (1ULL << 12)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -297,6 +358,17 @@ struct ublksrv_ctrl_dev_info {
#define UBLK_IO_F_FUA (1U << 13)
#define UBLK_IO_F_NOUNMAP (1U << 15)
#define UBLK_IO_F_SWAP (1U << 16)
+/*
+ * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
+ *
+ * This flag is set if auto buffer register is failed & ublk server passes
+ * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
+ * manually for handling the delivered IO command if this flag is observed
+ *
+ * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
+ * passed in.
+ */
+#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
/*
* io cmd is described by this structure, and stored in share memory, indexed
@@ -331,6 +403,62 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
return iod->op_flags >> 8;
}
+/*
+ * If this flag is set, fallback by completing the uring_cmd and setting
+ * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
+ * otherwise the client ublk request is failed silently
+ *
+ * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
+ * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
+ * ublk server needs to register io buffer manually for handling IO command.
+ */
+#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
+#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK
+
+struct ublk_auto_buf_reg {
+ /* index for registering the delivered request buffer */
+ __u16 index;
+ __u8 flags;
+ __u8 reserved0;
+
+ /*
+ * io_ring FD can be passed via the reserve field in future for
+ * supporting to register io buffer to external io_uring
+ */
+ __u32 reserved1;
+};
+
+/*
+ * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
+ * uring_cmd's sqe->addr:
+ *
+ * - bit0 ~ bit15: buffer index
+ * - bit16 ~ bit23: flags
+ * - bit24 ~ bit31: reserved0
+ * - bit32 ~ bit63: reserved1
+ */
+static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
+ __u64 sqe_addr)
+{
+ struct ublk_auto_buf_reg reg = {
+ .index = sqe_addr & 0xffff,
+ .flags = (sqe_addr >> 16) & 0xff,
+ .reserved0 = (sqe_addr >> 24) & 0xff,
+ .reserved1 = sqe_addr >> 32,
+ };
+
+ return reg;
+}
+
+static inline __u64
+ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
+{
+ __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
+ (__u64)buf->reserved1 << 32;
+
+ return addr;
+}
+
/* issued to ublk driver via /dev/ublkcN */
struct ublksrv_io_cmd {
__u16 q_id;