summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-11-05 18:07:41 +0300
committerJens Axboe <axboe@kernel.dk>2025-11-05 18:07:41 +0300
commit55de535e0769cbc61c2ac415673365ee0efa96e0 (patch)
tree001fe4ec17675dd053e75dc151618774bbb99691 /include
parentf68ff6bc0d0893d959aac39c662895096d866c84 (diff)
parente04ccfc28252f181ea8d469d834b48e7dece65b2 (diff)
downloadlinux-55de535e0769cbc61c2ac415673365ee0efa96e0.tar.xz
Merge branch 'cached-zones' into for-6.19/block
This patch series implements a cached report zones using information from the block layer zone write plugs and a new zone condition tracking. This avoids having to execute slow report zones commands on the device when for instance mounting file systems, which can significantly speed things up, especially in setups with multiple SMR HDDs (e.g. a BTRFS RAID volume). The first patch improves handling of zone management commands. Patch 2 fixes zone resource updates and the following 3 patches cleanup the zone code in preparation for introducing cached zone report support. From patch 6 to 13, cached report zones is implemented and made available to users with a new ioctl() command. Finally, patches 14 and 15 introduce the use of cached report zones in the mount operation of XFS and BTRFS. Link: https://lore.kernel.org/linux-block/20251104212249.1075412-1-dlemoal@kernel.org/ Signed-off-by: Jens Axboe <axboe@kernel.dk> * cached-zones: xfs: use blkdev_report_zones_cached() btrfs: use blkdev_report_zones_cached() block: add zone write plug condition to debugfs zone_wplugs block: improve zone_wplugs debugfs attribute output block: introduce BLKREPORTZONESV2 ioctl block: introduce blkdev_report_zones_cached() block: introduce blkdev_get_zone_info() block: refactor blkdev_report_zones() code block: track zone conditions block: use zone condition to determine conventional zones block: reorganize struct blk_zone_wplug block: introduce disk_report_zone() block: cleanup blkdev_report_zones() block: freeze queue when updating zone resources block: handle zone management operations completions
Diffstat (limited to 'include')
-rw-r--r--include/linux/blkdev.h49
-rw-r--r--include/linux/device-mapper.h10
-rw-r--r--include/uapi/linux/blkzoned.h46
-rw-r--r--include/uapi/linux/fs.h2
4 files changed, 70 insertions, 37 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99be263b31ab..f0ab02e0a673 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct blk_flush_queue;
struct kiocb;
struct pr_ops;
struct rq_qos;
+struct blk_report_zones_args;
struct blk_queue_stats;
struct blk_stat_callback;
struct blk_crypto_profile;
@@ -195,7 +196,7 @@ struct gendisk {
unsigned int nr_zones;
unsigned int zone_capacity;
unsigned int last_zone_capacity;
- unsigned long __rcu *conv_zones_bitmap;
+ u8 __rcu *zones_cond;
unsigned int zone_wplugs_hash_bits;
atomic_t nr_zone_wplugs;
spinlock_t zone_wplugs_lock;
@@ -432,9 +433,17 @@ struct queue_limits {
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void *data);
+int disk_report_zone(struct gendisk *disk, struct blk_zone *zone,
+ unsigned int idx, struct blk_report_zones_args *args);
+
+int blkdev_get_zone_info(struct block_device *bdev, sector_t sector,
+ struct blk_zone *zone);
+
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sectors, sector_t nr_sectors);
int blk_revalidate_disk_zones(struct gendisk *disk);
@@ -921,12 +930,20 @@ static inline unsigned int bdev_zone_capacity(struct block_device *bdev,
{
return disk_zone_capacity(bdev->bd_disk, pos);
}
+
+bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector);
+
#else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
return 0;
}
+static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
+{
+ return false;
+}
+
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
{
return false;
@@ -1529,33 +1546,6 @@ static inline bool bdev_is_zone_aligned(struct block_device *bdev,
return bdev_is_zone_start(bdev, sector);
}
-/**
- * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
- * @bdev: block device to check
- * @sector: sector number
- *
- * Check if @sector on @bdev is contained in a sequential write required zone.
- */
-static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
-{
- bool is_seq = false;
-
-#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
- if (bdev_is_zoned(bdev)) {
- struct gendisk *disk = bdev->bd_disk;
- unsigned long *bitmap;
-
- rcu_read_lock();
- bitmap = rcu_dereference(disk->conv_zones_bitmap);
- is_seq = !bitmap ||
- !test_bit(disk_zone_no(disk, sector), bitmap);
- rcu_read_unlock();
- }
-#endif
-
- return is_seq;
-}
-
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
@@ -1662,7 +1652,8 @@ struct block_device_operations {
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data);
+ unsigned int nr_zones,
+ struct blk_report_zones_args *args);
char *(*devnode)(struct gendisk *disk, umode_t *mode);
/* returns the length of the identifier or a negative errno: */
int (*get_unique_id)(struct gendisk *disk, u8 id[16],
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 84fdc3a6a19a..38f625af6ab4 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -538,12 +538,18 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
#ifdef CONFIG_BLK_DEV_ZONED
struct dm_report_zones_args {
struct dm_target *tgt;
+ struct gendisk *disk;
sector_t next_sector;
- void *orig_data;
- report_zones_cb orig_cb;
unsigned int zone_idx;
+ /* for block layer ->report_zones */
+ struct blk_report_zones_args *rep_args;
+
+ /* for internal users */
+ report_zones_cb cb;
+ void *data;
+
/* must be filled by ->report_zones before calling dm_report_zones_cb */
sector_t start;
};
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index f85743ef6e7d..e33f02703350 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -48,6 +48,8 @@ enum blk_zone_type {
* FINISH ZONE command.
* @BLK_ZONE_COND_READONLY: The zone is read-only.
* @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written).
+ * @BLK_ZONE_COND_ACTIVE: The zone is either implicitly open, explicitly open,
+ * or closed.
*
* The Zone Condition state machine in the ZBC/ZAC standards maps the above
* deinitions as:
@@ -61,6 +63,13 @@ enum blk_zone_type {
*
* Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
* be considered invalid.
+ *
+ * The condition BLK_ZONE_COND_ACTIVE is used only with cached zone reports.
+ * It is used to report any of the BLK_ZONE_COND_IMP_OPEN,
+ * BLK_ZONE_COND_EXP_OPEN and BLK_ZONE_COND_CLOSED conditions. Conversely, a
+ * regular zone report will never report a zone condition using
+ * BLK_ZONE_COND_ACTIVE and instead use the conditions BLK_ZONE_COND_IMP_OPEN,
+ * BLK_ZONE_COND_EXP_OPEN or BLK_ZONE_COND_CLOSED as reported by the device.
*/
enum blk_zone_cond {
BLK_ZONE_COND_NOT_WP = 0x0,
@@ -71,15 +80,27 @@ enum blk_zone_cond {
BLK_ZONE_COND_READONLY = 0xD,
BLK_ZONE_COND_FULL = 0xE,
BLK_ZONE_COND_OFFLINE = 0xF,
+
+ BLK_ZONE_COND_ACTIVE = 0xFF,
};
/**
* enum blk_zone_report_flags - Feature flags of reported zone descriptors.
*
- * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field.
+ * @BLK_ZONE_REP_CAPACITY: Output only. Indicates that zone descriptors in a
+ * zone report have a valid capacity field.
+ * @BLK_ZONE_REP_CACHED: Input only. Indicates that the zone report should be
+ * generated using cached zone information. In this case,
+ * the implicit open, explicit open and closed zone
+ * conditions are all reported with the
+ * BLK_ZONE_COND_ACTIVE condition.
*/
enum blk_zone_report_flags {
- BLK_ZONE_REP_CAPACITY = (1 << 0),
+ /* Output flags */
+ BLK_ZONE_REP_CAPACITY = (1U << 0),
+
+ /* Input flags */
+ BLK_ZONE_REP_CACHED = (1U << 31),
};
/**
@@ -122,6 +143,10 @@ struct blk_zone {
* @sector: starting sector of report
* @nr_zones: IN maximum / OUT actual
* @flags: one or more flags as defined by enum blk_zone_report_flags.
+ * @flags: one or more flags as defined by enum blk_zone_report_flags.
+ * With BLKREPORTZONE, this field is ignored as an input and is valid
+ * only as an output. Using BLKREPORTZONEV2, this field is used as both
+ * input and output.
* @zones: Space to hold @nr_zones @zones entries on reply.
*
* The array of at most @nr_zones must follow this structure in memory.
@@ -148,9 +173,19 @@ struct blk_zone_range {
/**
* Zoned block device ioctl's:
*
- * @BLKREPORTZONE: Get zone information. Takes a zone report as argument.
- * The zone report will start from the zone containing the
- * sector specified in the report request structure.
+ * @BLKREPORTZONE: Get zone information from a zoned device. Takes a zone report
+ * as argument. The zone report will start from the zone
+ * containing the sector specified in struct blk_zone_report.
+ * The flags field of struct blk_zone_report is used as an
+ * output only and ignored as an input.
+ * DEPRECATED, use BLKREPORTZONEV2 instead.
+ * @BLKREPORTZONEV2: Same as @BLKREPORTZONE but uses the flags field of
+ * struct blk_zone_report as an input, allowing to get a zone
+ * report using cached zone information if the flag
+ * BLK_ZONE_REP_CACHED is set. In such case, the zone report
+ * may include zones with the condition @BLK_ZONE_COND_ACTIVE
+ * (c.f. the description of this condition above for more
+ * details).
* @BLKRESETZONE: Reset the write pointer of the zones in the specified
* sector range. The sector range must be zone aligned.
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
@@ -169,5 +204,6 @@ struct blk_zone_range {
#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
+#define BLKREPORTZONEV2 _IOWR(0x12, 142, struct blk_zone_report)
#endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 957ce3343a4f..66ca526cf786 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -298,7 +298,7 @@ struct file_attr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
-/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
+/* 130-136 and 142 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
#define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2)