From 982c3b3058433f20aba9fb032599cee5dfc17328 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Oct 2023 15:01:08 +0200 Subject: bdev: rename freeze and thaw helpers We have bdev_mark_dead() etc and we're going to move block device freezing to holder ops in the next patch. Make the naming consistent: * freeze_bdev() -> bdev_freeze() * thaw_bdev() -> bdev_thaw() Also document the return code. Link: https://lore.kernel.org/r/20231024-vfs-super-freeze-v2-2-599c19f4faac@kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 51fa7ffdee83..7a3da7f44afb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1541,8 +1541,8 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) } #endif /* CONFIG_BLOCK */ -int freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev); +int bdev_freeze(struct block_device *bdev); +int bdev_thaw(struct block_device *bdev); struct io_comp_batch { struct request *req_list; -- cgit v1.2.3 From a30561a9be69d446d8d542a4f9735fe5ca9573df Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Oct 2023 15:01:10 +0200 Subject: bdev: add freeze and thaw holder operations Add block device freeze and thaw holder operations. Follow-up patches will implement block device freeze and thaw based on stuct blk_holder_ops. Link: https://lore.kernel.org/r/20231024-vfs-super-freeze-v2-4-599c19f4faac@kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7a3da7f44afb..1bc776335ff8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1468,6 +1468,16 @@ struct blk_holder_ops { * Sync the file system mounted on the block device. */ void (*sync)(struct block_device *bdev); + + /* + * Freeze the file system mounted on the block device. + */ + int (*freeze)(struct block_device *bdev); + + /* + * Thaw the file system mounted on the block device. + */ + int (*thaw)(struct block_device *bdev); }; extern const struct blk_holder_ops fs_holder_ops; -- cgit v1.2.3 From e419cf3ebaee694a826ddcfb350f1b1ebaf1e599 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Oct 2023 15:01:16 +0200 Subject: blkdev: comment fs_holder_ops Add a comment to @fs_holder_ops that @holder must point to a superblock. Link: https://lore.kernel.org/r/20231024-vfs-super-freeze-v2-10-599c19f4faac@kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1bc776335ff8..abf71cce785c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1480,6 +1480,11 @@ struct blk_holder_ops { int (*thaw)(struct block_device *bdev); }; +/* + * For filesystems using @fs_holder_ops, the @holder argument passed to + * helpers used to open and claim block devices via + * bd_prepare_to_claim() must point to a superblock. + */ extern const struct blk_holder_ops fs_holder_ops; /* -- cgit v1.2.3 From cd34758c5238ae6976b10fe15bba7031b409c969 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2023 18:43:07 +0100 Subject: block: Remove blkdev_get_by_*() functions blkdev_get_by_*() and blkdev_put() functions are now unused. Remove them. Acked-by: Christoph Hellwig Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20231101174325.10596-2-jack@suse.cz Reviewed-by: Christian Brauner Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/bdev.c | 94 ++++++++++++++++---------------------------------- include/linux/blkdev.h | 5 --- 2 files changed, 30 insertions(+), 69 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bdev.c b/block/bdev.c index 53a2c653a259..0daf14e50dd0 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -732,7 +732,7 @@ void blkdev_put_no_open(struct block_device *bdev) } /** - * blkdev_get_by_dev - open a block device by device number + * bdev_open_by_dev - open a block device by device number * @dev: device number of block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier @@ -744,32 +744,40 @@ void blkdev_put_no_open(struct block_device *bdev) * * Use this interface ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a device - * number. Everything else should use blkdev_get_by_path(). + * number. Everything else should use bdev_open_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * Reference to the block_device on success, ERR_PTR(-errno) on failure. + * Handle with a reference to the block_device on success, ERR_PTR(-errno) on + * failure. */ -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops) { - bool unblock_events = true; + struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle), + GFP_KERNEL); struct block_device *bdev; + bool unblock_events = true; struct gendisk *disk; int ret; + if (!handle) + return ERR_PTR(-ENOMEM); + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, MAJOR(dev), MINOR(dev), ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) - return ERR_PTR(ret); + goto free_handle; bdev = blkdev_get_no_open(dev); - if (!bdev) - return ERR_PTR(-ENXIO); + if (!bdev) { + ret = -ENXIO; + goto free_handle; + } disk = bdev->bd_disk; if (holder) { @@ -818,7 +826,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (unblock_events) disk_unblock_events(disk); - return bdev; + handle->bdev = bdev; + handle->holder = holder; + handle->mode = mode; + return handle; put_module: module_put(disk->fops->owner); abort_claiming: @@ -828,34 +839,14 @@ abort_claiming: disk_unblock_events(disk); put_blkdev: blkdev_put_no_open(bdev); +free_handle: + kfree(handle); return ERR_PTR(ret); } -EXPORT_SYMBOL(blkdev_get_by_dev); - -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) -{ - struct bdev_handle *handle = kmalloc(sizeof(*handle), GFP_KERNEL); - struct block_device *bdev; - - if (!handle) - return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_dev(dev, mode, holder, hops); - if (IS_ERR(bdev)) { - kfree(handle); - return ERR_CAST(bdev); - } - handle->bdev = bdev; - handle->holder = holder; - if (holder) - mode |= BLK_OPEN_EXCL; - handle->mode = mode; - return handle; -} EXPORT_SYMBOL(bdev_open_by_dev); /** - * blkdev_get_by_path - open a block device by name + * bdev_open_by_path - open a block device by name * @path: path to the block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier @@ -869,29 +860,9 @@ EXPORT_SYMBOL(bdev_open_by_dev); * Might sleep. * * RETURNS: - * Reference to the block_device on success, ERR_PTR(-errno) on failure. + * Handle with a reference to the block_device on success, ERR_PTR(-errno) on + * failure. */ -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops) -{ - struct block_device *bdev; - dev_t dev; - int error; - - error = lookup_bdev(path, &dev); - if (error) - return ERR_PTR(error); - - bdev = blkdev_get_by_dev(dev, mode, holder, hops); - if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, holder); - return ERR_PTR(-EACCES); - } - - return bdev; -} -EXPORT_SYMBOL(blkdev_get_by_path); - struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { @@ -914,8 +885,9 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_open_by_path); -void blkdev_put(struct block_device *bdev, void *holder) +void bdev_release(struct bdev_handle *handle) { + struct block_device *bdev = handle->bdev; struct gendisk *disk = bdev->bd_disk; /* @@ -929,8 +901,8 @@ void blkdev_put(struct block_device *bdev, void *holder) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - if (holder) - bd_end_claim(bdev, holder); + if (handle->holder) + bd_end_claim(bdev, handle->holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -947,12 +919,6 @@ void blkdev_put(struct block_device *bdev, void *holder) module_put(disk->fops->owner); blkdev_put_no_open(bdev); -} -EXPORT_SYMBOL(blkdev_put); - -void bdev_release(struct bdev_handle *handle) -{ - blkdev_put(handle->bdev, handle->holder); kfree(handle); } EXPORT_SYMBOL(bdev_release); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index abf71cce785c..7afc10315dd5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1500,10 +1500,6 @@ struct bdev_handle { blk_mode_t mode; }; -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops); -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops); struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, @@ -1511,7 +1507,6 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, void *holder); void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ -- cgit v1.2.3 From ed5cc702d311c14b653323d76062b0294effa66e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2023 18:43:08 +0100 Subject: block: Add config option to not allow writing to mounted devices Writing to mounted devices is dangerous and can lead to filesystem corruption as well as crashes. Furthermore syzbot comes with more and more involved examples how to corrupt block device under a mounted filesystem leading to kernel crashes and reports we can do nothing about. Add tracking of writers to each block device and a kernel cmdline argument which controls whether other writeable opens to block devices open with BLK_OPEN_RESTRICT_WRITES flag are allowed. We will make filesystems use this flag for used devices. Note that this effectively only prevents modification of the particular block device's page cache by other writers. The actual device content can still be modified by other means - e.g. by issuing direct scsi commands, by doing writes through devices lower in the storage stack (e.g. in case loop devices, DM, or MD are involved) etc. But blocking direct modifications of the block device page cache is enough to give filesystems a chance to perform data validation when loading data from the underlying storage and thus prevent kernel crashes. Syzbot can use this cmdline argument option to avoid uninteresting crashes. Also users whose userspace setup does not need writing to mounted block devices can set this option for hardening. Link: https://lore.kernel.org/all/60788e5d-5c7c-1142-e554-c21d709acfd9@linaro.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20231101174325.10596-3-jack@suse.cz Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/Kconfig | 20 +++++++++++++ block/bdev.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 2 ++ 4 files changed, 97 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/block/Kconfig b/block/Kconfig index 55ae2286a4de..1de4682d48cc 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10 select CRC_T10DIF select CRC64_ROCKSOFT +config BLK_DEV_WRITE_MOUNTED + bool "Allow writing to mounted block devices" + default y + help + When a block device is mounted, writing to its buffer cache is very + likely going to cause filesystem corruption. It is also rather easy to + crash the kernel in this way since the filesystem has no practical way + of detecting these writes to buffer cache and verifying its metadata + integrity. However there are some setups that need this capability + like running fsck on read-only mounted root device, modifying some + features on mounted ext4 filesystem, and similar. If you say N, the + kernel will prevent processes from writing to block devices that are + mounted by filesystems which provides some more protection from runaway + privileged processes and generally makes it much harder to crash + filesystem drivers. Note however that this does not prevent + underlying device(s) from being modified by other means, e.g. by + directly submitting SCSI commands or through access to lower layers of + storage stack. If in doubt, say Y. The configuration can be overridden + with the bdev_allow_write_mounted boot option. + config BLK_DEV_ZONED bool "Zoned block device support" select MQ_IOSCHED_DEADLINE diff --git a/block/bdev.c b/block/bdev.c index 0daf14e50dd0..fc8d28d77495 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -30,6 +30,9 @@ #include "../fs/internal.h" #include "blk.h" +/* Should we allow writing to mounted block devices? */ +static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED); + struct bdev_inode { struct block_device bdev; struct inode vfs_inode; @@ -730,7 +733,58 @@ void blkdev_put_no_open(struct block_device *bdev) { put_device(&bdev->bd_device); } - + +static bool bdev_writes_blocked(struct block_device *bdev) +{ + return bdev->bd_writers == -1; +} + +static void bdev_block_writes(struct block_device *bdev) +{ + bdev->bd_writers = -1; +} + +static void bdev_unblock_writes(struct block_device *bdev) +{ + bdev->bd_writers = 0; +} + +static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return true; + /* Writes blocked? */ + if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev)) + return false; + if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0) + return false; + return true; +} + +static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return; + + /* Claim exclusive or shared write access. */ + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_block_writes(bdev); + else if (mode & BLK_OPEN_WRITE) + bdev->bd_writers++; +} + +static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return; + + /* Yield exclusive or shared write access. */ + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_unblock_writes(bdev); + else if (mode & BLK_OPEN_WRITE) + bdev->bd_writers--; +} + /** * bdev_open_by_dev - open a block device by device number * @dev: device number of block device to open @@ -773,6 +827,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (ret) goto free_handle; + /* Blocking writes requires exclusive opener */ + if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) + return ERR_PTR(-EINVAL); + bdev = blkdev_get_no_open(dev); if (!bdev) { ret = -ENXIO; @@ -800,12 +858,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, goto abort_claiming; if (!try_module_get(disk->fops->owner)) goto abort_claiming; + ret = -EBUSY; + if (!bdev_may_open(bdev, mode)) + goto abort_claiming; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; + bdev_claim_write_access(bdev, mode); if (holder) { bd_finish_claiming(bdev, holder, hops); @@ -901,6 +963,8 @@ void bdev_release(struct bdev_handle *handle) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); + bdev_yield_write_access(bdev, handle->mode); + if (handle->holder) bd_end_claim(bdev, handle->holder); @@ -1069,3 +1133,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) blkdev_put_no_open(bdev); } + +static int __init setup_bdev_allow_write_mounted(char *str) +{ + if (kstrtobool(str, &bdev_allow_write_mounted)) + pr_warn("Invalid option string for bdev_allow_write_mounted:" + " '%s'\n", str); + return 1; +} +__setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 749203277fee..52e264d5a830 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -66,6 +66,7 @@ struct block_device { #ifdef CONFIG_FAIL_MAKE_REQUEST bool bd_make_it_fail; #endif + int bd_writers; /* * keep this out-of-line as it's both big and not needed in the fast * path diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7afc10315dd5..0e0c0186aa32 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,8 @@ typedef unsigned int __bitwise blk_mode_t; #define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) /* open for "writes" only for ioctls (specialy hack for floppy.c) */ #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) +/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ +#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) struct gendisk { /* -- cgit v1.2.3 From 6f861765464f43a71462d52026fbddfc858239a5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2023 18:43:10 +0100 Subject: fs: Block writes to mounted block devices Ask block layer to block writes to block devices mounted by filesystems. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20231101174325.10596-5-jack@suse.cz Reviewed-by: Christian Brauner Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e0c0186aa32..9f6c3373f9fc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1494,7 +1494,8 @@ extern const struct blk_holder_ops fs_holder_ops; * as stored in sb->s_flags. */ #define sb_open_mode(flags) \ - (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) + (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ + (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) struct bdev_handle { struct block_device *bdev; -- cgit v1.2.3 From 668bfeeabb5e402e3b36992f7859c284cc6e594d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Nov 2023 08:20:02 +0100 Subject: block: move a few definitions out of CONFIG_BLK_DEV_ZONED Allow using a few symbols with IS_ENABLED instead of #idef by moving the declarations out of #idef CONFIG_BLK_DEV_ZONED, and move bdev_nr_zones into the remaining #idef CONFIG_BLK_DEV_ZONED, #else block below. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231127072002.1332685-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 51fa7ffdee83..17c0a7d0d319 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -331,22 +331,13 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); -#ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, - unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int bdev_nr_zones(struct block_device *bdev); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, - gfp_t gfp_mask); + unsigned int nr_zones, report_zones_cb cb, void *data); +int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, + sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return 0; -} -#endif /* CONFIG_BLK_DEV_ZONED */ + void (*update_driver_data)(struct gendisk *disk)); /* * Independent access ranges: struct blk_independent_access_range describes @@ -643,6 +634,8 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED +unsigned int bdev_nr_zones(struct block_device *bdev); + static inline unsigned int disk_nr_zones(struct gendisk *disk) { return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; @@ -687,6 +680,11 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev) } #else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return 0; +} + static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; -- cgit v1.2.3 From 0c734c5ea76e333fbb8dd83b5bab46291b38096b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Dec 2023 11:08:15 -0700 Subject: block: improve struct request_queue layout It's clearly been a while since someone looked at this, so I gave it a quick shot. There are few issues in here: - Random bundling of members that are mostly read-only and often written - Random holes that need not be there This moves the most frequently used bits into cacheline 1 and 2, with the 2nd one being more write intensive than the first one, which is basically read-only. Outside of making this work a bit more efficiently, it also reduces the size of struct request_queue for my test setup from 864 bytes (spanning 14 cachelines!) to 832 bytes and 13 cachelines. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/d2b7b61c-4868-45c0-9060-4f9c73de9d7e@kernel.dk Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 89 ++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 43 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 17c0a7d0d319..185ed3770e3a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -367,59 +367,51 @@ struct blk_independent_access_ranges { }; struct request_queue { - struct request *last_merge; - struct elevator_queue *elevator; - - struct percpu_ref q_usage_counter; + /* + * The queue owner gets to use this for whatever they like. + * ll_rw_blk doesn't touch it. + */ + void *queuedata; - struct blk_queue_stats *stats; - struct rq_qos *rq_qos; - struct mutex rq_qos_mutex; + struct elevator_queue *elevator; const struct blk_mq_ops *mq_ops; /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; + /* + * various queue flags, see QUEUE_* below + */ + unsigned long queue_flags; + + unsigned int rq_timeout; + unsigned int queue_depth; + refcount_t refs; + /* hw dispatch queues */ - struct xarray hctx_table; unsigned int nr_hw_queues; + struct xarray hctx_table; - /* - * The queue owner gets to use this for whatever they like. - * ll_rw_blk doesn't touch it. - */ - void *queuedata; - - /* - * various queue flags, see QUEUE_* below - */ - unsigned long queue_flags; - /* - * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM requests are processed. - */ - atomic_t pm_only; + struct percpu_ref q_usage_counter; - /* - * ida allocated id for this queue. Used to index queues from - * ioctx. - */ - int id; + struct request *last_merge; spinlock_t queue_lock; - struct gendisk *disk; + int quiesce_depth; - refcount_t refs; + struct gendisk *disk; /* * mq queue kobject */ struct kobject *mq_kobj; + struct queue_limits limits; + #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -430,24 +422,40 @@ struct request_queue { #endif /* - * queue settings + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM requests are processed. */ - unsigned long nr_requests; /* Max # of requests */ + atomic_t pm_only; + + struct blk_queue_stats *stats; + struct rq_qos *rq_qos; + struct mutex rq_qos_mutex; + + /* + * ida allocated id for this queue. Used to index queues from + * ioctx. + */ + int id; unsigned int dma_pad_mask; + /* + * queue settings + */ + unsigned long nr_requests; /* Max # of requests */ + #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; struct kobject *crypto_kobject; #endif - unsigned int rq_timeout; - struct timer_list timeout; struct work_struct timeout_work; atomic_t nr_active_requests_shared_tags; + unsigned int required_elevator_features; + struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; @@ -458,11 +466,12 @@ struct request_queue { struct mutex blkcg_mutex; #endif - struct queue_limits limits; + int node; - unsigned int required_elevator_features; + spinlock_t requeue_lock; + struct list_head requeue_list; + struct delayed_work requeue_work; - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -472,10 +481,6 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; - struct list_head requeue_list; - spinlock_t requeue_lock; - struct delayed_work requeue_work; - struct mutex sysfs_lock; struct mutex sysfs_dir_lock; @@ -500,8 +505,6 @@ struct request_queue { */ struct mutex mq_freeze_lock; - int quiesce_depth; - struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; -- cgit v1.2.3 From 7437bb73f087e5f216f9c6603f5149d354e315af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Dec 2023 17:53:57 +0100 Subject: block: remove support for the host aware zone model When zones were first added the SCSI and ATA specs, two different models were supported (in addition to the drive managed one that is invisible to the host): - host managed where non-conventional zones there is strict requirement to write at the write pointer, or else an error is returned - host aware where a write point is maintained if writes always happen at it, otherwise it is left in an under-defined state and the sequential write preferred zones behave like conventional zones (probably very badly performing ones, though) Not surprisingly this lukewarm model didn't prove to be very useful and was finally removed from the ZBC and SBC specs (NVMe never implemented it). Due to to the easily disappearing write pointer host software could never rely on the write pointer to actually be useful for say recovery. Fortunately only a few HDD prototypes shipped using this model which never made it to mass production. Drop the support before it is too late. Note that any such host aware prototype HDD can still be used with Linux as we'll now treat it as a conventional HDD. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20231217165359.604246-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 67 ++++++------------------------------------ block/blk-sysfs.c | 9 ++---- block/partitions/core.c | 12 ++------ drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/md/dm-kcopyd.c | 2 +- drivers/md/dm-table.c | 45 +++++++++++++--------------- drivers/md/dm-zoned-metadata.c | 7 ++--- drivers/md/dm-zoned-target.c | 4 +-- drivers/nvme/host/zns.c | 2 +- drivers/scsi/scsi_debug.c | 27 ++++++++--------- drivers/scsi/sd.c | 45 +++++++++++++--------------- drivers/scsi/sd_zbc.c | 16 +--------- fs/btrfs/zoned.c | 23 ++------------- fs/btrfs/zoned.h | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/super.c | 17 +++++------ include/linux/blkdev.h | 37 ++--------------------- 19 files changed, 92 insertions(+), 231 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 09e3a4d5e4d2..50e9efb59f67 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -56,7 +56,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; - lim->zoned = BLK_ZONED_NONE; + lim->zoned = false; lim->zone_write_granularity = 0; lim->dma_alignment = 511; } @@ -880,79 +880,30 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); -static bool disk_has_partitions(struct gendisk *disk) -{ - unsigned long idx; - struct block_device *part; - bool ret = false; - - rcu_read_lock(); - xa_for_each(&disk->part_tbl, idx, part) { - if (bdev_is_partition(part)) { - ret = true; - break; - } - } - rcu_read_unlock(); - - return ret; -} - /** * disk_set_zoned - configure the zoned model for a disk * @disk: the gendisk of the queue to configure - * @model: the zoned model to set - * - * Set the zoned model of @disk to @model. + * @zoned: zoned or not. * - * When @model is BLK_ZONED_HM (host managed), this should be called only - * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option). - * If @model specifies BLK_ZONED_HA (host aware), the effective model used - * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions - * on the disk. + * When @zoned is %true, this should be called only if zoned block device + * support is enabled (CONFIG_BLK_DEV_ZONED option). */ -void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) +void disk_set_zoned(struct gendisk *disk, bool zoned) { struct request_queue *q = disk->queue; - unsigned int old_model = q->limits.zoned; - switch (model) { - case BLK_ZONED_HM: - /* - * Host managed devices are supported only if - * CONFIG_BLK_DEV_ZONED is enabled. - */ + if (zoned) { WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)); - break; - case BLK_ZONED_HA: - /* - * Host aware devices can be treated either as regular block - * devices (similar to drive managed devices) or as zoned block - * devices to take advantage of the zone command set, similarly - * to host managed devices. We try the latter if there are no - * partitions and zoned block device support is enabled, else - * we do nothing special as far as the block layer is concerned. - */ - if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || - disk_has_partitions(disk)) - model = BLK_ZONED_NONE; - break; - case BLK_ZONED_NONE: - default: - if (WARN_ON_ONCE(model != BLK_ZONED_NONE)) - model = BLK_ZONED_NONE; - break; - } - q->limits.zoned = model; - if (model != BLK_ZONED_NONE) { /* * Set the zone write granularity to the device logical block * size by default. The driver can change this value if needed. */ + q->limits.zoned = true; blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); - } else if (old_model != BLK_ZONED_NONE) { + } else if (q->limits.zoned) { + q->limits.zoned = false; disk_clear_zone_settings(disk); } } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 63e481262336..d5e669a401b0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -309,14 +309,9 @@ QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); static ssize_t queue_zoned_show(struct request_queue *q, char *page) { - switch (blk_queue_zoned_model(q)) { - case BLK_ZONED_HA: - return sprintf(page, "host-aware\n"); - case BLK_ZONED_HM: + if (blk_queue_is_zoned(q)) return sprintf(page, "host-managed\n"); - default: - return sprintf(page, "none\n"); - } + return sprintf(page, "none\n"); } static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) diff --git a/block/partitions/core.c b/block/partitions/core.c index f47ffcfdfcec..e6ac73617f3e 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -305,18 +305,10 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, * Partitions are not supported on zoned block devices that are used as * such. */ - switch (disk->queue->limits.zoned) { - case BLK_ZONED_HM: + if (bdev_is_zoned(disk->part0)) { pr_warn("%s: partitions not supported on host managed zoned block device\n", disk->disk_name); return ERR_PTR(-ENXIO); - case BLK_ZONED_HA: - pr_info("%s: disabling host aware zoned block device support due to partitions\n", - disk->disk_name); - disk_set_zoned(disk, BLK_ZONED_NONE); - break; - case BLK_ZONED_NONE: - break; } if (xa_load(&disk->part_tbl, partno)) @@ -613,7 +605,7 @@ static int blk_add_partitions(struct gendisk *disk) /* * Partitions are not supported on host managed zoned block devices. */ - if (disk->queue->limits.zoned == BLK_ZONED_HM) { + if (bdev_is_zoned(disk->part0)) { pr_warn("%s: ignoring partition table on host managed zoned block device\n", disk->disk_name); ret = 0; diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 55c5b48bc276..369eb1e78bb5 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -159,7 +159,7 @@ int null_register_zoned_dev(struct nullb *nullb) struct nullb_device *dev = nullb->dev; struct request_queue *q = nullb->q; - disk_set_zoned(nullb->disk, BLK_ZONED_HM); + disk_set_zoned(nullb->disk, true); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); blk_queue_chunk_sectors(q, dev->zone_size_sects); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 83600b45e12a..24fb95f19d52 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -250,7 +250,7 @@ static int ublk_dev_param_zoned_apply(struct ublk_device *ub) { const struct ublk_param_zoned *p = &ub->params.zoned; - disk_set_zoned(ub->ub_disk, BLK_ZONED_HM); + disk_set_zoned(ub->ub_disk, true); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); blk_queue_required_elevator_features(ub->ub_disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a28f1687066b..19a4f20bd1c2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -730,7 +730,7 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - disk_set_zoned(vblk->disk, BLK_ZONED_HM); + disk_set_zoned(vblk->disk, true); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); virtio_cread(vdev, struct virtio_blk_config, diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index d01807c50f20..36bcfdccae04 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -807,7 +807,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, */ if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { for (i = 0; i < job->num_dests; i++) { - if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { + if (bdev_is_zoned(dests[i].bdev)) { job->flags |= BIT(DM_KCOPYD_WRITE_SEQ); break; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 198d38b53322..260b5b8f2b0d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1579,21 +1579,18 @@ bool dm_table_has_no_data_devices(struct dm_table *t) return true; } -static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - enum blk_zoned_model *zoned_model = data; + bool *zoned = data; - return blk_queue_zoned_model(q) != *zoned_model; + return bdev_is_zoned(dev->bdev) != *zoned; } static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return blk_queue_zoned_model(q) != BLK_ZONED_NONE; + return bdev_is_zoned(dev->bdev); } /* @@ -1603,8 +1600,7 @@ static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any * zoned model with all zoned devices having the same zone size. */ -static bool dm_table_supports_zoned_model(struct dm_table *t, - enum blk_zoned_model zoned_model) +static bool dm_table_supports_zoned(struct dm_table *t, bool zoned) { for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1623,11 +1619,11 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, if (dm_target_supports_zoned_hm(ti->type)) { if (!ti->type->iterate_devices || - ti->type->iterate_devices(ti, device_not_zoned_model, - &zoned_model)) + ti->type->iterate_devices(ti, device_not_zoned, + &zoned)) return false; } else if (!dm_target_supports_mixed_zoned_model(ti->type)) { - if (zoned_model == BLK_ZONED_HM) + if (zoned) return false; } } @@ -1650,14 +1646,13 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev * * zone sectors, if the destination device is a zoned block device, it shall * have the specified zone_sectors. */ -static int validate_hardware_zoned_model(struct dm_table *t, - enum blk_zoned_model zoned_model, - unsigned int zone_sectors) +static int validate_hardware_zoned(struct dm_table *t, bool zoned, + unsigned int zone_sectors) { - if (zoned_model == BLK_ZONED_NONE) + if (!zoned) return 0; - if (!dm_table_supports_zoned_model(t, zoned_model)) { + if (!dm_table_supports_zoned(t, zoned)) { DMERR("%s: zoned model is not consistent across all devices", dm_device_name(t->md)); return -EINVAL; @@ -1683,8 +1678,8 @@ int dm_calculate_queue_limits(struct dm_table *t, struct queue_limits *limits) { struct queue_limits ti_limits; - enum blk_zoned_model zoned_model = BLK_ZONED_NONE; unsigned int zone_sectors = 0; + bool zoned = false; blk_set_stacking_limits(limits); @@ -1706,12 +1701,12 @@ int dm_calculate_queue_limits(struct dm_table *t, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); - if (zoned_model == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { + if (!zoned && ti_limits.zoned) { /* * After stacking all limits, validate all devices * in table support this zoned model and zone sectors. */ - zoned_model = ti_limits.zoned; + zoned = ti_limits.zoned; zone_sectors = ti_limits.chunk_sectors; } @@ -1744,18 +1739,18 @@ combine_limits: * Verify that the zoned model and zone sectors, as determined before * any .io_hints override, are the same across all devices in the table. * - this is especially relevant if .io_hints is emulating a disk-managed - * zoned model (aka BLK_ZONED_NONE) on host-managed zoned block devices. + * zoned model on host-managed zoned block devices. * BUT... */ - if (limits->zoned != BLK_ZONED_NONE) { + if (limits->zoned) { /* * ...IF the above limits stacking determined a zoned model * validate that all of the table's devices conform to it. */ - zoned_model = limits->zoned; + zoned = limits->zoned; zone_sectors = limits->chunk_sectors; } - if (validate_hardware_zoned_model(t, zoned_model, zone_sectors)) + if (validate_hardware_zoned(t, zoned, zone_sectors)) return -EINVAL; return validate_hardware_logical_block_alignment(t, limits); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 60a4dc01ea18..fdfe30f7b697 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -2836,12 +2836,11 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num) { struct dmz_dev *dev = &zmd->dev[num]; - if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) + if (!bdev_is_zoned(dev->bdev)) dmz_dev_info(dev, "Regular block device"); else - dmz_dev_info(dev, "Host-%s zoned block device", - bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? - "aware" : "managed"); + dmz_dev_info(dev, "Host-managed zoned block device"); + if (zmd->sb_version > 1) { sector_t sector_offset = dev->zone_offset << zmd->zone_nr_sectors_shift; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index b487f7acc860..621794a9edd6 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -702,7 +702,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path, } bdev = ddev->bdev; - if (bdev_zoned_model(bdev) == BLK_ZONED_NONE) { + if (!bdev_is_zoned(bdev)) { if (nr_devs == 1) { ti->error = "Invalid regular device"; goto err; @@ -1010,7 +1010,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->max_sectors = chunk_sectors; /* We are exposing a drive-managed zoned block device */ - limits->zoned = BLK_ZONED_NONE; + limits->zoned = false; } /* diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index ec8557810c21..6d4c440e97e2 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,7 +108,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - disk_set_zoned(ns->disk, BLK_ZONED_HM); + disk_set_zoned(ns->disk, true); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 6d8218a44122..d03d66f11493 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -339,7 +339,7 @@ struct sdebug_dev_info { bool used; /* For ZBC devices */ - enum blk_zoned_model zmodel; + bool zoned; unsigned int zcap; unsigned int zsize; unsigned int zsize_shift; @@ -844,8 +844,11 @@ static bool write_since_sync; static bool sdebug_statistics = DEF_STATISTICS; static bool sdebug_wp; static bool sdebug_allow_restart; -/* Following enum: 0: no zbc, def; 1: host aware; 2: host managed */ -static enum blk_zoned_model sdeb_zbc_model = BLK_ZONED_NONE; +static enum { + BLK_ZONED_NONE = 0, + BLK_ZONED_HA = 1, + BLK_ZONED_HM = 2, +} sdeb_zbc_model = BLK_ZONED_NONE; static char *sdeb_zbc_model_s; enum sam_lun_addr_method {SAM_LUN_AM_PERIPHERAL = 0x0, @@ -1815,8 +1818,6 @@ static int inquiry_vpd_b1(struct sdebug_dev_info *devip, unsigned char *arr) arr[1] = 1; /* non rotating medium (e.g. solid state) */ arr[2] = 0; arr[3] = 5; /* less than 1.8" */ - if (devip->zmodel == BLK_ZONED_HA) - arr[4] = 1 << 4; /* zoned field = 01b */ return 0x3c; } @@ -1883,7 +1884,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) if (! arr) return DID_REQUEUE << 16; is_disk = (sdebug_ptype == TYPE_DISK); - is_zbc = (devip->zmodel != BLK_ZONED_NONE); + is_zbc = devip->zoned; is_disk_zbc = (is_disk || is_zbc); have_wlun = scsi_is_wlun(scp->device->lun); if (have_wlun) @@ -2195,7 +2196,7 @@ static int resp_readcap16(struct scsi_cmnd *scp, * Since the scsi_debug READ CAPACITY implementation always reports the * total disk capacity, set RC BASIS = 1 for host-managed ZBC devices. */ - if (devip->zmodel == BLK_ZONED_HM) + if (devip->zoned) arr[12] |= 1 << 4; arr[15] = sdebug_lowest_aligned & 0xff; @@ -2648,7 +2649,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, msense_6 = (MODE_SENSE == cmd[0]); llbaa = msense_6 ? false : !!(cmd[1] & 0x10); is_disk = (sdebug_ptype == TYPE_DISK); - is_zbc = (devip->zmodel != BLK_ZONED_NONE); + is_zbc = devip->zoned; if ((is_disk || is_zbc) && !dbd) bd_len = llbaa ? 16 : 8; else @@ -3194,8 +3195,6 @@ static int check_zbc_access_params(struct scsi_cmnd *scp, struct sdeb_zone_state *zsp_end = zbc_zone(devip, lba + num - 1); if (!write) { - if (devip->zmodel == BLK_ZONED_HA) - return 0; /* For host-managed, reads cannot cross zone types boundaries */ if (zsp->z_type != zsp_end->z_type) { mk_sense_buffer(scp, ILLEGAL_REQUEST, @@ -5322,7 +5321,7 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) if (devip->zcap < devip->zsize) devip->nr_zones += devip->nr_seq_zones; - if (devip->zmodel == BLK_ZONED_HM) { + if (devip->zoned) { /* zbc_max_open_zones can be 0, meaning "not reported" */ if (sdeb_zbc_max_open >= devip->nr_zones - 1) devip->max_open = (devip->nr_zones - 1) / 2; @@ -5347,7 +5346,7 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) zsp->z_size = min_t(u64, devip->zsize, capacity - zstart); } else if ((zstart & (devip->zsize - 1)) == 0) { - if (devip->zmodel == BLK_ZONED_HM) + if (devip->zoned) zsp->z_type = ZBC_ZTYPE_SWR; else zsp->z_type = ZBC_ZTYPE_SWP; @@ -5390,13 +5389,13 @@ static struct sdebug_dev_info *sdebug_device_create( } devip->sdbg_host = sdbg_host; if (sdeb_zbc_in_use) { - devip->zmodel = sdeb_zbc_model; + devip->zoned = sdeb_zbc_model == BLK_ZONED_HM; if (sdebug_device_create_zones(devip)) { kfree(devip); return NULL; } } else { - devip->zmodel = BLK_ZONED_NONE; + devip->zoned = false; } devip->create_ts = ktime_get_boottime(); atomic_set(&devip->stopped, (sdeb_tur_ms_to_ready > 0 ? 2 : 0)); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fa00dd503cbf..19a19eb277f5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3117,7 +3117,6 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) struct request_queue *q = sdkp->disk->queue; struct scsi_vpd *vpd; u16 rot; - u8 zoned; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb1); @@ -3128,7 +3127,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) } rot = get_unaligned_be16(&vpd->data[4]); - zoned = (vpd->data[8] >> 4) & 3; + sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); if (rot == 1) { @@ -3138,37 +3137,33 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) if (sdkp->device->type == TYPE_ZBC) { /* - * Host-managed: Per ZBC and ZAC specifications, writes in - * sequential write required zones of host-managed devices must - * be aligned to the device physical block size. + * Host-managed. + */ + disk_set_zoned(sdkp->disk, true); + + /* + * Per ZBC and ZAC specifications, writes in sequential write + * required zones of host-managed devices must be aligned to + * the device physical block size. */ - disk_set_zoned(sdkp->disk, BLK_ZONED_HM); blk_queue_zone_write_granularity(q, sdkp->physical_block_size); } else { - sdkp->zoned = zoned; - if (sdkp->zoned == 1) { - /* Host-aware */ - disk_set_zoned(sdkp->disk, BLK_ZONED_HA); - } else { - /* Regular disk or drive managed disk */ - disk_set_zoned(sdkp->disk, BLK_ZONED_NONE); - } + /* + * Anything else. This includes host-aware device that we treat + * as conventional. + */ + disk_set_zoned(sdkp->disk, false); } if (!sdkp->first_scan) return; - if (blk_queue_is_zoned(q)) { - sd_printk(KERN_NOTICE, sdkp, "Host-%s zoned block device\n", - q->limits.zoned == BLK_ZONED_HM ? "managed" : "aware"); - } else { - if (sdkp->zoned == 1) - sd_printk(KERN_NOTICE, sdkp, - "Host-aware SMR disk used as regular disk\n"); - else if (sdkp->zoned == 2) - sd_printk(KERN_NOTICE, sdkp, - "Drive-managed SMR disk\n"); - } + if (blk_queue_is_zoned(q)) + sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n"); + else if (sdkp->zoned == 1) + sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n"); + else if (sdkp->zoned == 2) + sd_printk(KERN_NOTICE, sdkp, "Drive-managed SMR disk\n"); } /** diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index a25215507668..26af5ab7d7c1 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -836,10 +836,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) /* * For all zoned disks, initialize zone append emulation data if not - * already done. This is necessary also for host-aware disks used as - * regular disks due to the presence of partitions as these partitions - * may be deleted and the disk zoned model changed back from - * BLK_ZONED_NONE to BLK_ZONED_HA. + * already done. */ if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) { ret = sd_zbc_init_disk(sdkp); @@ -932,17 +929,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) sdkp->device->use_10_for_rw = 0; sdkp->device->use_16_for_sync = 1; - if (!blk_queue_is_zoned(q)) { - /* - * This can happen for a host aware disk with partitions. - * The block device zone model was already cleared by - * disk_set_zoned(). Only free the scsi disk zone - * information and exit early. - */ - sd_zbc_free_zone_info(sdkp); - return 0; - } - /* Check zoned block device characteristics (unconstrained reads) */ ret = sd_zbc_check_zoned_characteristics(sdkp, buf); if (ret) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 188378ca19c7..23c1e6b19a65 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -578,26 +578,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) kvfree(zones); - switch (bdev_zoned_model(bdev)) { - case BLK_ZONED_HM: + if (bdev_is_zoned(bdev)) { model = "host-managed zoned"; emulated = ""; - break; - case BLK_ZONED_HA: - model = "host-aware zoned"; - emulated = ""; - break; - case BLK_ZONED_NONE: + } else { model = "regular"; emulated = "emulated "; - break; - default: - /* Just in case */ - btrfs_err_in_rcu(fs_info, "zoned: unsupported model %d on %s", - bdev_zoned_model(bdev), - rcu_str_deref(device->name)); - ret = -EOPNOTSUPP; - goto out_free_zone_info; } btrfs_info_in_rcu(fs_info, @@ -609,9 +595,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) out: kvfree(zones); -out_free_zone_info: btrfs_destroy_dev_zone_info(device); - return ret; } @@ -688,8 +672,7 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info) struct btrfs_device *device; list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { - if (device->bdev && - bdev_zoned_model(device->bdev) == BLK_ZONED_HM) { + if (device->bdev && bdev_is_zoned(device->bdev)) { btrfs_err(fs_info, "zoned: mode not enabled but zoned device found: %pg", device->bdev); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index b9cec523b778..bc1b540c1597 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -324,7 +324,7 @@ static inline bool btrfs_check_device_zone_type(const struct btrfs_fs_info *fs_i } /* Do not allow Host Manged zoned device */ - return bdev_zoned_model(bdev) != BLK_ZONED_HM; + return !bdev_is_zoned(bdev); } static inline bool btrfs_check_super_location(struct btrfs_device *device, u64 pos) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4e42b5f24deb..9b62549a29ce 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -995,7 +995,7 @@ static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) } blkaddr -= FDEV(devi).start_blk; } - return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM && + return bdev_is_zoned(FDEV(devi).bdev) && f2fs_blkz_is_seq(sbi, devi, blkaddr) && (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 033af907c3b1..850c87ae7d98 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4282,24 +4282,21 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->aligned_blksize = false; #ifdef CONFIG_BLK_DEV_ZONED - if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_has_blkzoned(sbi)) { - f2fs_err(sbi, "Zoned block device feature not enabled"); - return -EINVAL; - } - if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { + if (bdev_is_zoned(FDEV(i).bdev)) { + if (!f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Zoned block device feature not enabled"); + return -EINVAL; + } if (init_blkz_info(sbi, i)) { f2fs_err(sbi, "Failed to initialize F2FS blkzone information"); return -EINVAL; } if (max_devices == 1) break; - f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: Host-managed)", i, FDEV(i).path, FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk, - bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? - "Host-aware" : "Host-managed"); + FDEV(i).start_blk, FDEV(i).end_blk); continue; } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 185ed3770e3a..28cda9fb239e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -263,18 +263,6 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } -/* - * Zoned block device models (zoned limit). - * - * Note: This needs to be ordered from the least to the most severe - * restrictions for the inheritance in blk_stack_limits() to work. - */ -enum blk_zoned_model { - BLK_ZONED_NONE = 0, /* Regular block device */ - BLK_ZONED_HA, /* Host-aware zoned block device */ - BLK_ZONED_HM, /* Host-managed zoned block device */ -}; - /* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages @@ -316,7 +304,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - enum blk_zoned_model zoned; + bool zoned; /* * Drivers that set dma_alignment to less than 511 must be prepared to @@ -329,7 +317,7 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk, bool zoned); #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, @@ -617,23 +605,9 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) } #endif -static inline enum blk_zoned_model -blk_queue_zoned_model(struct request_queue *q) -{ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) - return q->limits.zoned; - return BLK_ZONED_NONE; -} - static inline bool blk_queue_is_zoned(struct request_queue *q) { - switch (blk_queue_zoned_model(q)) { - case BLK_ZONED_HA: - case BLK_ZONED_HM: - return true; - default: - return false; - } + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; } #ifdef CONFIG_BLK_DEV_ZONED @@ -1260,11 +1234,6 @@ static inline bool bdev_nowait(struct block_device *bdev) return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); } -static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) -{ - return blk_queue_zoned_model(bdev_get_queue(bdev)); -} - static inline bool bdev_is_zoned(struct block_device *bdev) { return blk_queue_is_zoned(bdev_get_queue(bdev)); -- cgit v1.2.3 From d73e93b4dfab10c80688b061c30048df05585c7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Dec 2023 17:53:58 +0100 Subject: block: simplify disk_set_zoned Only use disk_set_zoned to actually enable zoned device support. For clearing it, call disk_clear_zoned, which is renamed from disk_clear_zone_settings and now directly clears the zoned flag as well. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20231217165359.604246-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 32 +++++++++++--------------------- block/blk-zoned.c | 3 ++- block/blk.h | 2 -- drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/nvme/host/zns.c | 2 +- drivers/scsi/sd.c | 7 +++++-- include/linux/blkdev.h | 3 ++- 9 files changed, 24 insertions(+), 31 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 50e9efb59f67..bb94a3d471f4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -881,31 +881,21 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); /** - * disk_set_zoned - configure the zoned model for a disk - * @disk: the gendisk of the queue to configure - * @zoned: zoned or not. - * - * When @zoned is %true, this should be called only if zoned block device - * support is enabled (CONFIG_BLK_DEV_ZONED option). + * disk_set_zoned - inidicate a zoned device + * @disk: gendisk to configure */ -void disk_set_zoned(struct gendisk *disk, bool zoned) +void disk_set_zoned(struct gendisk *disk) { struct request_queue *q = disk->queue; - if (zoned) { - WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)); - - /* - * Set the zone write granularity to the device logical block - * size by default. The driver can change this value if needed. - */ - q->limits.zoned = true; - blk_queue_zone_write_granularity(q, - queue_logical_block_size(q)); - } else if (q->limits.zoned) { - q->limits.zoned = false; - disk_clear_zone_settings(disk); - } + WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)); + + /* + * Set the zone write granularity to the device logical block + * size by default. The driver can change this value if needed. + */ + q->limits.zoned = true; + blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); } EXPORT_SYMBOL_GPL(disk_set_zoned); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 619ee41a51cc..580a58e53efd 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -616,12 +616,13 @@ int blk_revalidate_disk_zones(struct gendisk *disk, } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); -void disk_clear_zone_settings(struct gendisk *disk) +void disk_clear_zoned(struct gendisk *disk) { struct request_queue *q = disk->queue; blk_mq_freeze_queue(q); + q->limits.zoned = false; disk_free_zone_bitmaps(disk); blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q); q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE; diff --git a/block/blk.h b/block/blk.h index 08a358bc0919..1ef920f72e0f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -395,14 +395,12 @@ static inline struct bio *blk_queue_bounce(struct bio *bio, #ifdef CONFIG_BLK_DEV_ZONED void disk_free_zone_bitmaps(struct gendisk *disk); -void disk_clear_zone_settings(struct gendisk *disk); int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg); int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ static inline void disk_free_zone_bitmaps(struct gendisk *disk) {} -static inline void disk_clear_zone_settings(struct gendisk *disk) {} static inline int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) { diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 369eb1e78bb5..6f5e0994862e 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -159,7 +159,7 @@ int null_register_zoned_dev(struct nullb *nullb) struct nullb_device *dev = nullb->dev; struct request_queue *q = nullb->q; - disk_set_zoned(nullb->disk, true); + disk_set_zoned(nullb->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); blk_queue_chunk_sectors(q, dev->zone_size_sects); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 24fb95f19d52..d50d69b2c023 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -250,7 +250,7 @@ static int ublk_dev_param_zoned_apply(struct ublk_device *ub) { const struct ublk_param_zoned *p = &ub->params.zoned; - disk_set_zoned(ub->ub_disk, true); + disk_set_zoned(ub->ub_disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); blk_queue_required_elevator_features(ub->ub_disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 19a4f20bd1c2..7d7a19b2b9a8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -730,7 +730,7 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - disk_set_zoned(vblk->disk, true); + disk_set_zoned(vblk->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); virtio_cread(vdev, struct virtio_blk_config, diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 6d4c440e97e2..3d98e435821e 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,7 +108,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - disk_set_zoned(ns->disk, true); + disk_set_zoned(ns->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 19a19eb277f5..dbed075cdb98 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3135,11 +3135,13 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } + +#ifdef CONFIG_BLK_DEV_ZONED /* sd_probe rejects ZBD devices early otherwise */ if (sdkp->device->type == TYPE_ZBC) { /* * Host-managed. */ - disk_set_zoned(sdkp->disk, true); + disk_set_zoned(sdkp->disk); /* * Per ZBC and ZAC specifications, writes in sequential write @@ -3152,8 +3154,9 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) * Anything else. This includes host-aware device that we treat * as conventional. */ - disk_set_zoned(sdkp->disk, false); + disk_clear_zoned(sdkp->disk); } +#endif /* CONFIG_BLK_DEV_ZONED */ if (!sdkp->first_scan) return; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 28cda9fb239e..bc236e77d85e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -317,7 +317,8 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk, bool zoned); +void disk_set_zoned(struct gendisk *disk); +void disk_clear_zoned(struct gendisk *disk); #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, -- cgit v1.2.3 From 02d374f3418df577c850f0cd45c3da9245ead547 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:15:24 +0000 Subject: block: renumber QUEUE_FLAG_HW_WC For the QUEUE_FLAG_HW_WC to actually work, it needs to have a separate number from QUEUE_FLAG_FUA, doh. Fixes: 43c9835b144c ("block: don't allow enabling a cache on devices that don't support it") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231226081524.180289-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 51fa7ffdee83..88e9dd4b71fb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -538,7 +538,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */ +#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -- cgit v1.2.3 From d6b9f4e6f7fb589d8024a31cc4883d15d0c8def4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 27 Dec 2023 09:23:05 +0000 Subject: block: rename and document BLK_DEF_MAX_SECTORS Give BLK_DEF_MAX_SECTORS a _CAP postfix and document what it is used for. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231227092305.279567-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- block/blk-sysfs.c | 2 +- drivers/scsi/sd.c | 2 +- include/linux/blkdev.h | 9 ++++++++- 4 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 33b3f767b81e..ba6e0e97118c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -139,7 +139,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto if (limits->max_user_sectors) max_sectors = min(max_sectors, limits->max_user_sectors); else - max_sectors = min(max_sectors, BLK_DEF_MAX_SECTORS); + max_sectors = min(max_sectors, BLK_DEF_MAX_SECTORS_CAP); max_sectors = round_down(max_sectors, limits->logical_block_size >> SECTOR_SHIFT); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d5e669a401b0..40bab5975c56 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -241,7 +241,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) if (max_sectors_kb == 0) { q->limits.max_user_sectors = 0; max_sectors_kb = min(max_hw_sectors_kb, - BLK_DEF_MAX_SECTORS >> 1); + BLK_DEF_MAX_SECTORS_CAP >> 1); } else { if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8c8ac5cd1833..6bedd2d5298f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3500,7 +3500,7 @@ static int sd_revalidate_disk(struct gendisk *disk) } else { q->limits.io_opt = 0; rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), - (sector_t)BLK_DEF_MAX_SECTORS); + (sector_t)BLK_DEF_MAX_SECTORS_CAP); } /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc236e77d85e..94701a63ad8a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1056,7 +1056,14 @@ enum blk_default_limits { BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; -#define BLK_DEF_MAX_SECTORS 2560u +/* + * Default upper limit for the software max_sectors limit used for + * regular file system I/O. This can be increased through sysfs. + * + * Not to be confused with the max_hw_sector limit that is entirely + * controlled by the driver, usually based on hardware limits. + */ +#define BLK_DEF_MAX_SECTORS_CAP 2560u static inline unsigned long queue_segment_boundary(const struct request_queue *q) { -- cgit v1.2.3 From 4e33b071bb8e8415fb9847249ffcf300fa7d8cac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Dec 2023 07:51:41 +0000 Subject: block: remove disk_clear_zoned disk_clear_zoned is unused now that the last warts of the host-aware model support in sd are gone. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20231228075141.362560-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-zoned.c | 21 --------------------- include/linux/blkdev.h | 1 - 2 files changed, 22 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index c59d44ee6b23..623879d875a4 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -615,24 +615,3 @@ int blk_revalidate_disk_zones(struct gendisk *disk, return ret; } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); - -void disk_clear_zoned(struct gendisk *disk) -{ - struct request_queue *q = disk->queue; - - blk_mq_freeze_queue(q); - - q->limits.zoned = false; - disk_free_zone_bitmaps(disk); - blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q); - q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE; - disk->nr_zones = 0; - disk->max_open_zones = 0; - disk->max_active_zones = 0; - q->limits.chunk_sectors = 0; - q->limits.zone_write_granularity = 0; - q->limits.max_zone_append_sectors = 0; - - blk_mq_unfreeze_queue(q); -} -EXPORT_SYMBOL_GPL(disk_clear_zoned); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94701a63ad8a..e1e705aef51e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -318,7 +318,6 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); void disk_set_zoned(struct gendisk *disk); -void disk_clear_zoned(struct gendisk *disk); #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, -- cgit v1.2.3