From 6f491a8d4b92d1a840fd9209cba783c84437d0b7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Nov 2024 21:51:28 +0800 Subject: block: track disk DEAD state automatically for modeling queue freeze lockdep Now we only verify the outmost freeze & unfreeze in current context in case that !q->mq_freeze_depth, so it is reliable to save disk DEAD state when we want to lock the freeze queue since the state is one per-task variable now. Doing this way can kill lots of false positive when freeze queue is called before adding disk[1]. [1] https://lore.kernel.org/linux-block/6741f6b2.050a0220.1cc393.0017.GAE@google.com/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241127135133.3952153-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 378d3a1a22fc..522cf8eef66c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -581,6 +581,8 @@ struct request_queue { #ifdef CONFIG_LOCKDEP struct task_struct *mq_freeze_owner; int mq_freeze_owner_depth; + /* Records disk state in current context, used in unfreeze queue */ + bool mq_freeze_disk_dead; #endif wait_queue_head_t mq_freeze_wq; /* -- cgit v1.2.3 From f6661b1d0525f3764596a1b65eeed9e75aecafa7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Nov 2024 21:51:30 +0800 Subject: block: track queue dying state automatically for modeling queue freeze lockdep Now we only verify the outmost freeze & unfreeze in current context in case that !q->mq_freeze_depth, so it is reliable to save queue lying state when we want to lock the freeze queue since the state is one per-task variable now. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241127135133.3952153-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- block/blk.h | 12 ++++++------ block/genhd.c | 7 +++---- include/linux/blkdev.h | 6 +++++- 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-mq.c b/block/blk-mq.c index 0c6a319fb936..fca2ec64a06b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -134,6 +134,7 @@ static bool blk_freeze_set_owner(struct request_queue *q, q->mq_freeze_disk_dead = !q->disk || test_bit(GD_DEAD, &q->disk->state) || !blk_queue_registered(q); + q->mq_freeze_queue_dying = blk_queue_dying(q); return true; } @@ -190,7 +191,7 @@ bool __blk_freeze_queue_start(struct request_queue *q, void blk_freeze_queue_start(struct request_queue *q) { if (__blk_freeze_queue_start(q, current)) - blk_freeze_acquire_lock(q, false); + blk_freeze_acquire_lock(q); } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -238,7 +239,7 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) void blk_mq_unfreeze_queue(struct request_queue *q) { if (__blk_mq_unfreeze_queue(q, false)) - blk_unfreeze_release_lock(q, false); + blk_unfreeze_release_lock(q); } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); diff --git a/block/blk.h b/block/blk.h index 8708168d50e4..cbf6a676ffe9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -721,26 +721,26 @@ void blk_integrity_prepare(struct request *rq); void blk_integrity_complete(struct request *rq, unsigned int nr_bytes); #ifdef CONFIG_LOCKDEP -static inline void blk_freeze_acquire_lock(struct request_queue *q, bool queue_dying) +static inline void blk_freeze_acquire_lock(struct request_queue *q) { if (!q->mq_freeze_disk_dead) rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_); - if (!queue_dying) + if (!q->mq_freeze_queue_dying) rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_); } -static inline void blk_unfreeze_release_lock(struct request_queue *q, bool queue_dying) +static inline void blk_unfreeze_release_lock(struct request_queue *q) { - if (!queue_dying) + if (!q->mq_freeze_queue_dying) rwsem_release(&q->q_lockdep_map, _RET_IP_); if (!q->mq_freeze_disk_dead) rwsem_release(&q->io_lockdep_map, _RET_IP_); } #else -static inline void blk_freeze_acquire_lock(struct request_queue *q, bool queue_dying) +static inline void blk_freeze_acquire_lock(struct request_queue *q) { } -static inline void blk_unfreeze_release_lock(struct request_queue *q, bool queue_dying) +static inline void blk_unfreeze_release_lock(struct request_queue *q) { } #endif diff --git a/block/genhd.c b/block/genhd.c index 59ac299909b3..5678194b6b1a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -661,7 +661,7 @@ void del_gendisk(struct gendisk *disk) struct request_queue *q = disk->queue; struct block_device *part; unsigned long idx; - bool start_drain, queue_dying; + bool start_drain; might_sleep(); @@ -690,9 +690,8 @@ void del_gendisk(struct gendisk *disk) */ mutex_lock(&disk->open_mutex); start_drain = __blk_mark_disk_dead(disk); - queue_dying = blk_queue_dying(q); if (start_drain) - blk_freeze_acquire_lock(q, queue_dying); + blk_freeze_acquire_lock(q); xa_for_each_start(&disk->part_tbl, idx, part, 1) drop_partition(part); mutex_unlock(&disk->open_mutex); @@ -748,7 +747,7 @@ void del_gendisk(struct gendisk *disk) blk_mq_exit_queue(q); if (start_drain) - blk_unfreeze_release_lock(q, queue_dying); + blk_unfreeze_release_lock(q); } EXPORT_SYMBOL(del_gendisk); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 522cf8eef66c..5d40af2ef971 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -581,8 +581,12 @@ struct request_queue { #ifdef CONFIG_LOCKDEP struct task_struct *mq_freeze_owner; int mq_freeze_owner_depth; - /* Records disk state in current context, used in unfreeze queue */ + /* + * Records disk & queue state in current context, used in unfreeze + * queue + */ bool mq_freeze_disk_dead; + bool mq_freeze_queue_dying; #endif wait_queue_head_t mq_freeze_wq; /* -- cgit v1.2.3 From 9c96821b44f893fb63f021a28625d3b32c68e8b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 10 Jan 2025 06:47:09 +0100 Subject: block: fix docs for freezing of queue limits updates queue_limits_commit_update is the function that needs to operate on a frozen queue, not queue_limits_start_update. Update the kerneldoc comments to reflect that. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Nilay Shroff Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Link: https://lore.kernel.org/r/20250110054726.1499538-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 ++- include/linux/blkdev.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 8f09e33f41f6..89d8366fd43c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -413,7 +413,8 @@ int blk_set_default_limits(struct queue_limits *lim) * @lim: limits to apply * * Apply the limits in @lim that were obtained from queue_limits_start_update() - * and updated by the caller to @q. + * and updated by the caller to @q. The caller must have frozen the queue or + * ensure that there are no outstanding I/Os by other means. * * Returns 0 if successful, else a negative error code. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5d40af2ef971..e781d4e6f92d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -944,8 +944,7 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset, * the caller can modify. The caller must call queue_limits_commit_update() * to finish the update. * - * Context: process context. The caller must have frozen the queue or ensured - * that there is outstanding I/O by other means. + * Context: process context. */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) -- cgit v1.2.3 From aa427d7b73b196f657d6d2cf0e94eff6b883fdef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 10 Jan 2025 06:47:10 +0100 Subject: block: add a queue_limits_commit_update_frozen helper Add a helper that freezes the queue, updates the queue limits and unfreezes the queue and convert all open coded versions of that to the new helper. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Reviewed-by: Nilay Shroff Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250110054726.1499538-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-integrity.c | 4 +--- block/blk-settings.c | 24 ++++++++++++++++++++++++ block/blk-zoned.c | 7 +------ drivers/block/virtio_blk.c | 4 +--- drivers/scsi/sd.c | 17 +++++------------ drivers/scsi/sr.c | 5 +---- include/linux/blkdev.h | 2 ++ 7 files changed, 35 insertions(+), 28 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index b180cac61a9d..013469faa5e7 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -218,9 +218,7 @@ static ssize_t flag_store(struct device *dev, const char *page, size_t count, else lim.integrity.flags |= flag; - blk_mq_freeze_queue(q); - err = queue_limits_commit_update(q, &lim); - blk_mq_unfreeze_queue(q); + err = queue_limits_commit_update_frozen(q, &lim); if (err) return err; return count; diff --git a/block/blk-settings.c b/block/blk-settings.c index 89d8366fd43c..6c96a73261d1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -444,6 +444,30 @@ out_unlock: } EXPORT_SYMBOL_GPL(queue_limits_commit_update); +/** + * queue_limits_commit_update_frozen - commit an atomic update of queue limits + * @q: queue to update + * @lim: limits to apply + * + * Apply the limits in @lim that were obtained from queue_limits_start_update() + * and updated with the new values by the caller to @q. Freezes the queue + * before the update and unfreezes it after. + * + * Returns 0 if successful, else a negative error code. + */ +int queue_limits_commit_update_frozen(struct request_queue *q, + struct queue_limits *lim) +{ + int ret; + + blk_mq_freeze_queue(q); + ret = queue_limits_commit_update(q, lim); + blk_mq_unfreeze_queue(q); + + return ret; +} +EXPORT_SYMBOL_GPL(queue_limits_commit_update_frozen); + /** * queue_limits_set - apply queue limits to queue * @q: queue to update diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 4b0be40a8ea7..9d08a54c201e 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1444,7 +1444,6 @@ static int disk_update_zone_resources(struct gendisk *disk, unsigned int nr_seq_zones, nr_conv_zones; unsigned int pool_size; struct queue_limits lim; - int ret; disk->nr_zones = args->nr_zones; disk->zone_capacity = args->zone_capacity; @@ -1495,11 +1494,7 @@ static int disk_update_zone_resources(struct gendisk *disk, } commit: - blk_mq_freeze_queue(q); - ret = queue_limits_commit_update(q, &lim); - blk_mq_unfreeze_queue(q); - - return ret; + return queue_limits_commit_update_frozen(q, &lim); } static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 71a7ffeafb32..bbaa26b523b8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1105,9 +1105,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr, lim.features |= BLK_FEAT_WRITE_CACHE; else lim.features &= ~BLK_FEAT_WRITE_CACHE; - blk_mq_freeze_queue(disk->queue); - i = queue_limits_commit_update(disk->queue, &lim); - blk_mq_unfreeze_queue(disk->queue); + i = queue_limits_commit_update_frozen(disk->queue, &lim); if (i) return i; return count; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8947dab132d7..af62a8ed8620 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -177,9 +177,8 @@ cache_type_store(struct device *dev, struct device_attribute *attr, lim = queue_limits_start_update(sdkp->disk->queue); sd_set_flush_flag(sdkp, &lim); - blk_mq_freeze_queue(sdkp->disk->queue); - ret = queue_limits_commit_update(sdkp->disk->queue, &lim); - blk_mq_unfreeze_queue(sdkp->disk->queue); + ret = queue_limits_commit_update_frozen(sdkp->disk->queue, + &lim); if (ret) return ret; return count; @@ -483,9 +482,7 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, lim = queue_limits_start_update(sdkp->disk->queue); sd_config_discard(sdkp, &lim, mode); - blk_mq_freeze_queue(sdkp->disk->queue); - err = queue_limits_commit_update(sdkp->disk->queue, &lim); - blk_mq_unfreeze_queue(sdkp->disk->queue); + err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; @@ -594,9 +591,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, lim = queue_limits_start_update(sdkp->disk->queue); sd_config_write_same(sdkp, &lim); - blk_mq_freeze_queue(sdkp->disk->queue); - err = queue_limits_commit_update(sdkp->disk->queue, &lim); - blk_mq_unfreeze_queue(sdkp->disk->queue); + err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; @@ -3803,9 +3798,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_config_write_same(sdkp, &lim); kfree(buffer); - blk_mq_freeze_queue(sdkp->disk->queue); - err = queue_limits_commit_update(sdkp->disk->queue, &lim); - blk_mq_unfreeze_queue(sdkp->disk->queue); + err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 198bec87bb8e..b17796d5ee66 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -797,10 +797,7 @@ static int get_sectorsize(struct scsi_cd *cd) lim = queue_limits_start_update(q); lim.logical_block_size = sector_size; - blk_mq_freeze_queue(q); - err = queue_limits_commit_update(q, &lim); - blk_mq_unfreeze_queue(q); - return err; + return queue_limits_commit_update_frozen(q, &lim); } static int get_capabilities(struct scsi_cd *cd) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e781d4e6f92d..13d353351c37 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -952,6 +952,8 @@ queue_limits_start_update(struct request_queue *q) mutex_lock(&q->limits_lock); return q->limits; } +int queue_limits_commit_update_frozen(struct request_queue *q, + struct queue_limits *lim); int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim); int queue_limits_set(struct request_queue *q, struct queue_limits *lim); -- cgit v1.2.3 From 6564862d646e7d630929ba1ff330740bb215bdac Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 9 Jan 2025 11:39:59 +0000 Subject: block: Ensure start sector is aligned for stacking atomic writes For stacking atomic writes, ensure that the start sector is aligned with the device atomic write unit min and any boundary. Otherwise, we may permit misaligned atomic writes. Rework bdev_can_atomic_write() into a common helper to resuse the alignment check. There also use atomic_write_hw_unit_min, which is more proper (than atomic_write_unit_min). Fixes: d7f36dc446e89 ("block: Support atomic writes limits for stacked devices") Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250109114000.2299896-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 7 +++++-- include/linux/blkdev.h | 21 ++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 6c96a73261d1..c2b99262db26 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -609,7 +609,7 @@ static bool blk_stack_atomic_writes_head(struct queue_limits *t, } static void blk_stack_atomic_writes_limits(struct queue_limits *t, - struct queue_limits *b) + struct queue_limits *b, sector_t start) { if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED)) goto unsupported; @@ -617,6 +617,9 @@ static void blk_stack_atomic_writes_limits(struct queue_limits *t, if (!b->atomic_write_unit_min) goto unsupported; + if (!blk_atomic_write_start_sect_aligned(start, b)) + goto unsupported; + /* * If atomic_write_hw_max is set, we have already stacked 1x bottom * device, so check for compliance. @@ -799,7 +802,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = 0; t->max_zone_append_sectors = 0; } - blk_stack_atomic_writes_limits(t, b); + blk_stack_atomic_writes_limits(t, b, start); return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 13d353351c37..7ac153e4423a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1706,6 +1706,15 @@ struct io_comp_batch { void (*complete)(struct io_comp_batch *); }; +static inline bool blk_atomic_write_start_sect_aligned(sector_t sector, + struct queue_limits *limits) +{ + unsigned int alignment = max(limits->atomic_write_hw_unit_min, + limits->atomic_write_hw_boundary); + + return IS_ALIGNED(sector, alignment >> SECTOR_SHIFT); +} + static inline bool bdev_can_atomic_write(struct block_device *bdev) { struct request_queue *bd_queue = bdev->bd_queue; @@ -1714,15 +1723,9 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev) if (!limits->atomic_write_unit_min) return false; - if (bdev_is_partition(bdev)) { - sector_t bd_start_sect = bdev->bd_start_sect; - unsigned int alignment = - max(limits->atomic_write_unit_min, - limits->atomic_write_hw_boundary); - - if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT)) - return false; - } + if (bdev_is_partition(bdev)) + return blk_atomic_write_start_sect_aligned(bdev->bd_start_sect, + limits); return true; } -- cgit v1.2.3 From 6a7e17b22062c84a111d7073c67cc677c4190f32 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Jan 2025 17:02:54 +0000 Subject: block: Add common atomic writes enable flag Currently only stacked devices need to explicitly enable atomic writes by setting BLK_FEAT_ATOMIC_WRITES_STACKED flag. This does not work well for device mapper stacking devices, as there many sets of limits are stacked and what is the 'bottom' and 'top' device can swapped. This means that BLK_FEAT_ATOMIC_WRITES_STACKED needs to be set for many queue limits, which is messy. Generalize enabling atomic writes enabling by ensuring that all devices must explicitly set a flag - that includes NVMe, SCSI sd, and md raid. Signed-off-by: John Garry Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20250116170301.474130-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 ++++-- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 2 +- drivers/md/raid10.c | 2 +- drivers/nvme/host/core.c | 1 + drivers/scsi/sd.c | 1 + include/linux/blkdev.h | 4 ++-- 7 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index c8368ee8de2e..db12396ff5c7 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -175,6 +175,9 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim) { unsigned int boundary_sectors; + if (!(lim->features & BLK_FEAT_ATOMIC_WRITES)) + goto unsupported; + if (!lim->atomic_write_hw_max) goto unsupported; @@ -611,7 +614,7 @@ static bool blk_stack_atomic_writes_head(struct queue_limits *t, static void blk_stack_atomic_writes_limits(struct queue_limits *t, struct queue_limits *b, sector_t start) { - if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED)) + if (!(b->features & BLK_FEAT_ATOMIC_WRITES)) goto unsupported; if (!b->atomic_write_hw_unit_min) @@ -639,7 +642,6 @@ unsupported: t->atomic_write_hw_unit_max = 0; t->atomic_write_hw_unit_min = 0; t->atomic_write_hw_boundary = 0; - t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED; } /** diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7049ec7fb8eb..8fc9339b00c7 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -384,7 +384,7 @@ static int raid0_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; - lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; + lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a5cd6522fc2d..9d57a88dbd26 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3217,7 +3217,7 @@ static int raid1_set_limits(struct mddev *mddev) md_init_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; - lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; + lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e1e6cd7fb125..efe93b979167 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4018,7 +4018,7 @@ static int raid10_set_queue_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = 0; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); - lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; + lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d21258e2283..2147069775c6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2002,6 +2002,7 @@ static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns, lim->atomic_write_hw_boundary = boundary; lim->atomic_write_hw_unit_min = bs; lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs); + lim->features |= BLK_FEAT_ATOMIC_WRITES; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index af62a8ed8620..a48c4d5edfa3 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -991,6 +991,7 @@ static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) lim->atomic_write_hw_boundary = 0; lim->atomic_write_hw_unit_min = unit_min * logical_block_size; lim->atomic_write_hw_unit_max = unit_max * logical_block_size; + lim->features |= BLK_FEAT_ATOMIC_WRITES; } static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7ac153e4423a..76f0a4e7c2e5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -331,8 +331,8 @@ typedef unsigned int __bitwise blk_features_t; #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) -/* stacked device can/does support atomic writes */ -#define BLK_FEAT_ATOMIC_WRITES_STACKED \ +/* atomic writes enabled */ +#define BLK_FEAT_ATOMIC_WRITES \ ((__force blk_features_t)(1u << 16)) /* -- cgit v1.2.3 From fe6628608627424fb4a6d4c8d2235822457c5d9c Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 28 Jan 2025 20:04:13 +0530 Subject: block: get rid of request queue ->sysfs_dir_lock The request queue uses ->sysfs_dir_lock for protecting the addition/ deletion of kobject entries under sysfs while we register/unregister blk-mq. However kobject addition/deletion is already protected with kernfs/sysfs internal synchronization primitives. So use of q->sysfs_ dir_lock seems redundant. Moreover, q->sysfs_dir_lock is also used at few other callsites along with q->sysfs_lock for protecting the addition/deletion of kojects. One such example is when we register with sysfs a set of independent access ranges for a disk. Here as well we could get rid off q->sysfs_ dir_lock and only use q->sysfs_lock. The only variable which q->sysfs_dir_lock appears to protect is q-> mq_sysfs_init_done which is set/unset while registering/unregistering blk-mq with sysfs. But use of q->mq_sysfs_init_done could be easily replaced using queue registered bit QUEUE_FLAG_REGISTERED. So with this patch we remove q->sysfs_dir_lock from each callsite and replace q->mq_sysfs_init_done using QUEUE_FLAG_REGISTERED. Reviewed-by: Christoph Hellwig Signed-off-by: Nilay Shroff Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250128143436.874357-2-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-ia-ranges.c | 4 ---- block/blk-mq-sysfs.c | 23 +++++------------------ block/blk-sysfs.c | 5 ----- include/linux/blkdev.h | 3 --- 5 files changed, 5 insertions(+), 31 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-core.c b/block/blk-core.c index 32fb28a6372c..d6c4fa3943b5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -430,7 +430,6 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); - mutex_init(&q->sysfs_dir_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); spin_lock_init(&q->queue_lock); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index c9eb4241e048..d479f5481b66 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -111,7 +111,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk) struct request_queue *q = disk->queue; int i, ret; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -155,7 +154,6 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk) struct blk_independent_access_ranges *iars = disk->ia_ranges; int i; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -289,7 +287,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk, { struct request_queue *q = disk->queue; - mutex_lock(&q->sysfs_dir_lock); mutex_lock(&q->sysfs_lock); if (iars && !disk_check_ia_ranges(disk, iars)) { kfree(iars); @@ -313,6 +310,5 @@ void disk_set_independent_access_ranges(struct gendisk *disk, disk_register_independent_access_ranges(disk); unlock: mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); } EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 156e9bb07abf..6113328abd70 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -223,8 +223,6 @@ int blk_mq_sysfs_register(struct gendisk *disk) unsigned long i, j; int ret; - lockdep_assert_held(&q->sysfs_dir_lock); - ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq"); if (ret < 0) goto out; @@ -237,7 +235,6 @@ int blk_mq_sysfs_register(struct gendisk *disk) goto unreg; } - q->mq_sysfs_init_done = true; out: return ret; @@ -259,15 +256,12 @@ void blk_mq_sysfs_unregister(struct gendisk *disk) struct blk_mq_hw_ctx *hctx; unsigned long i; - lockdep_assert_held(&q->sysfs_dir_lock); queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); - - q->mq_sysfs_init_done = false; } void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) @@ -275,15 +269,11 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + return; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - -unlock: - mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) @@ -292,9 +282,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) unsigned long i; int ret = 0; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + goto out; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -302,8 +291,6 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) break; } -unlock: - mutex_unlock(&q->sysfs_dir_lock); - +out: return ret; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e09b455874bf..7b970e6765e7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -764,7 +764,6 @@ int blk_register_queue(struct gendisk *disk) struct request_queue *q = disk->queue; int ret; - mutex_lock(&q->sysfs_dir_lock); kobject_init(&disk->queue_kobj, &blk_queue_ktype); ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) @@ -805,7 +804,6 @@ int blk_register_queue(struct gendisk *disk) if (q->elevator) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); /* * SCSI probing may synchronously create and destroy a lot of @@ -830,7 +828,6 @@ out_debugfs_remove: mutex_unlock(&q->sysfs_lock); out_put_queue_kobj: kobject_put(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); return ret; } @@ -861,7 +858,6 @@ void blk_unregister_queue(struct gendisk *disk) blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); mutex_unlock(&q->sysfs_lock); - mutex_lock(&q->sysfs_dir_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. @@ -878,7 +874,6 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); kobject_del(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); blk_debugfs_remove(disk); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76f0a4e7c2e5..248416ecd01c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,7 +561,6 @@ struct request_queue { struct list_head flush_list; struct mutex sysfs_lock; - struct mutex sysfs_dir_lock; struct mutex limits_lock; /* @@ -605,8 +604,6 @@ struct request_queue { * Serializes all debugfs metadata operations using the above dentries. */ struct mutex debugfs_mutex; - - bool mq_sysfs_init_done; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ -- cgit v1.2.3