diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-05-26 21:39:36 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-05-26 21:39:36 +0300 |
| commit | 6f59de9bc0d576eb5a5edfea470527902315e924 (patch) | |
| tree | eeb477da0caa8e9569074f448169fb792fac90d9 /include/uapi/linux | |
| parent | 3e406741b19890c3d8a2ed126aa7c23b106ca9e1 (diff) | |
| parent | 533c87e2ed742454957f14d7bef9f48d5a72e72d (diff) | |
| download | linux-6f59de9bc0d576eb5a5edfea470527902315e924.tar.xz | |
Merge tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- ublk updates:
- Add support for updating the size of a ublk instance
- Zero-copy improvements
- Auto-registering of buffers for zero-copy
- Series simplifying and improving GET_DATA and request lookup
- Series adding quiesce support
- Lots of selftests additions
- Various cleanups
- NVMe updates via Christoph:
- add per-node DMA pools and use them for PRP/SGL allocations
(Caleb Sander Mateos, Keith Busch)
- nvme-fcloop refcounting fixes (Daniel Wagner)
- support delayed removal of the multipath node and optionally
support the multipath node for private namespaces (Nilay Shroff)
- support shared CQs in the PCI endpoint target code (Wilfred
Mallawa)
- support admin-queue only authentication (Hannes Reinecke)
- use the crc32c library instead of the crypto API (Eric Biggers)
- misc cleanups (Christoph Hellwig, Marcelo Moreira, Hannes
Reinecke, Leon Romanovsky, Gustavo A. R. Silva)
- MD updates via Yu:
- Fix that normal IO can be starved by sync IO, found by mkfs on
newly created large raid5, with some clean up patches for bdev
inflight counters
- Clean up brd, getting rid of atomic kmaps and bvec poking
- Add loop driver specifically for zoned IO testing
- Eliminate blk-rq-qos calls with a static key, if not enabled
- Improve hctx locking for when a plug has IO for multiple queues
pending
- Remove block layer bouncing support, which in turn means we can
remove the per-node bounce stat as well
- Improve blk-throttle support
- Improve delay support for blk-throttle
- Improve brd discard support
- Unify IO scheduler switching. This should also fix a bunch of lockdep
warnings we've been seeing, after enabling lockdep support for queue
freezing/unfreezeing
- Add support for block write streams via FDP (flexible data placement)
on NVMe
- Add a bunch of block helpers, facilitating the removal of a bunch of
duplicated boilerplate code
- Remove obsolete BLK_MQ pci and virtio Kconfig options
- Add atomic/untorn write support to blktrace
- Various little cleanups and fixes
* tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux: (186 commits)
selftests: ublk: add test for UBLK_F_QUIESCE
ublk: add feature UBLK_F_QUIESCE
selftests: ublk: add test case for UBLK_U_CMD_UPDATE_SIZE
traceevent/block: Add REQ_ATOMIC flag to block trace events
ublk: run auto buf unregisgering in same io_ring_ctx with registering
io_uring: add helper io_uring_cmd_ctx_handle()
ublk: remove io argument from ublk_auto_buf_reg_fallback()
ublk: handle ublk_set_auto_buf_reg() failure correctly in ublk_fetch()
selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK
selftests: ublk: support UBLK_F_AUTO_BUF_REG
ublk: support UBLK_AUTO_BUF_REG_FALLBACK
ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
ublk: prepare for supporting to register request buffer automatically
ublk: convert to refcount_t
selftests: ublk: make IO & device removal test more stressful
nvme: rename nvme_mpath_shutdown_disk to nvme_mpath_remove_disk
nvme: introduce multipath_always_on module param
nvme-multipath: introduce delayed removal of the multipath head node
nvme-pci: derive and better document max segments limits
nvme-pci: use struct_size for allocation struct nvme_dev
...
Diffstat (limited to 'include/uapi/linux')
| -rw-r--r-- | include/uapi/linux/blktrace_api.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/io_uring.h | 4 | ||||
| -rw-r--r-- | include/uapi/linux/ublk_cmd.h | 128 |
3 files changed, 133 insertions, 1 deletions
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 690621b610e5..1bfb635e309b 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -49,7 +49,7 @@ enum blktrace_act { __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */ __BLK_TA_INSERT, /* insert request */ __BLK_TA_SPLIT, /* bio was split */ - __BLK_TA_BOUNCE, /* bio was bounced */ + __BLK_TA_BOUNCE, /* unused, was: bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ __BLK_TA_DRV_DATA, /* driver-specific binary data */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8f1fc12bac46..50e372ea97c5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -93,6 +93,10 @@ struct io_uring_sqe { __u16 addr_len; __u16 __pad3[1]; }; + struct { + __u8 write_stream; + __u8 __pad4[3]; + }; }; union { struct { diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 583b86681c93..56c7e3fc666f 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -51,6 +51,10 @@ _IOR('u', 0x13, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_DEL_DEV_ASYNC \ _IOR('u', 0x14, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_UPDATE_SIZE \ + _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_QUIESCE_DEV \ + _IOWR('u', 0x16, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of @@ -211,6 +215,63 @@ */ #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) +/* + * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE + * New size is passed in cmd->data[0] and is in units of sectors + */ +#define UBLK_F_UPDATE_SIZE (1ULL << 10) + +/* + * request buffer is registered automatically to uring_cmd's io_uring + * context before delivering this io command to ublk server, meantime + * it is un-registered automatically when completing this io command. + * + * For using this feature: + * + * - ublk server has to create sparse buffer table on the same `io_ring_ctx` + * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`. + * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's + * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF` + * manually, otherwise this ublk request won't complete. + * + * - ublk server passes auto buf register data via uring_cmd's sqe->addr, + * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see + * the definition of ublk_sqe_addr_to_auto_buf_reg() + * + * - pass buffer index from `ublk_auto_buf_reg.index` + * + * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed + * + * - pass flags from `ublk_auto_buf_reg.flags` if needed + * + * This way avoids extra cost from two uring_cmd, but also simplifies backend + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and + * IO_UNREGISTER_IO_BUF becomes not necessary. + * + * If wrong data or flags are provided, both IO_FETCH_REQ and + * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request + * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued + * successfully + */ +#define UBLK_F_AUTO_BUF_REG (1ULL << 11) + +/* + * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device, + * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or + * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for + * handling `UBLK_IO_RES_ABORT` correctly. + * + * Typical use case is for supporting to upgrade ublk server application, + * meantime keep ublk block device persistent during the period. + * + * This feature is only available when UBLK_F_USER_RECOVERY is enabled. + * + * Note, this command returns -EBUSY in case that all IO commands are being + * handled by ublk server and not completed in specified time period which + * is passed from the control command parameter. + */ +#define UBLK_F_QUIESCE (1ULL << 12) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -297,6 +358,17 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FUA (1U << 13) #define UBLK_IO_F_NOUNMAP (1U << 15) #define UBLK_IO_F_SWAP (1U << 16) +/* + * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only. + * + * This flag is set if auto buffer register is failed & ublk server passes + * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer + * manually for handling the delivered IO command if this flag is observed + * + * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is + * passed in. + */ +#define UBLK_IO_F_NEED_REG_BUF (1U << 17) /* * io cmd is described by this structure, and stored in share memory, indexed @@ -331,6 +403,62 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) return iod->op_flags >> 8; } +/* + * If this flag is set, fallback by completing the uring_cmd and setting + * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure; + * otherwise the client ublk request is failed silently + * + * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF + * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set, + * ublk server needs to register io buffer manually for handling IO command. + */ +#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0) +#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK + +struct ublk_auto_buf_reg { + /* index for registering the delivered request buffer */ + __u16 index; + __u8 flags; + __u8 reserved0; + + /* + * io_ring FD can be passed via the reserve field in future for + * supporting to register io buffer to external io_uring + */ + __u32 reserved1; +}; + +/* + * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via + * uring_cmd's sqe->addr: + * + * - bit0 ~ bit15: buffer index + * - bit16 ~ bit23: flags + * - bit24 ~ bit31: reserved0 + * - bit32 ~ bit63: reserved1 + */ +static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( + __u64 sqe_addr) +{ + struct ublk_auto_buf_reg reg = { + .index = sqe_addr & 0xffff, + .flags = (sqe_addr >> 16) & 0xff, + .reserved0 = (sqe_addr >> 24) & 0xff, + .reserved1 = sqe_addr >> 32, + }; + + return reg; +} + +static inline __u64 +ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf) +{ + __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 | + (__u64)buf->reserved1 << 32; + + return addr; +} + /* issued to ublk driver via /dev/ublkcN */ struct ublksrv_io_cmd { __u16 q_id; |
