From 7afe93946dff63aa57c6db81f5eb43ac8233364e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:03 -0800
Subject: block: add a bdev_has_integrity_csum helper

Factor out a helper to see if the block device has an integrity checksum
from bdev_stable_writes so that it can be reused for other checks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d463b9b5a0a5..dec0acaed6e6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1477,14 +1477,18 @@ static inline bool bdev_synchronous(struct block_device *bdev)
 	return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
 }
 
-static inline bool bdev_stable_writes(struct block_device *bdev)
+static inline bool bdev_has_integrity_csum(struct block_device *bdev)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
+	struct queue_limits *lim = bdev_limits(bdev);
 
-	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
-	    q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE)
-		return true;
-	return q->limits.features & BLK_FEAT_STABLE_WRITES;
+	return IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+		lim->integrity.csum_type != BLK_INTEGRITY_CSUM_NONE;
+}
+
+static inline bool bdev_stable_writes(struct block_device *bdev)
+{
+	return bdev_has_integrity_csum(bdev) ||
+		(bdev_limits(bdev)->features & BLK_FEAT_STABLE_WRITES);
 }
 
 static inline bool blk_queue_write_cache(struct request_queue *q)
-- 
cgit v1.2.3


From 8c56ef10150ed7650cf4105539242c94c156148c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 23 Feb 2026 05:20:05 -0800
Subject: block: make max_integrity_io_size public

File systems that generate integrity will need this, so move it out
of the block private or blk-mq specific headers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c          | 13 -------------
 include/linux/blk-integrity.h |  5 -----
 include/linux/blkdev.h        | 18 ++++++++++++++++++
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index a9e65dc090da..dabfab97fbab 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -123,19 +123,6 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
 	return 0;
 }
 
-/*
- * Maximum size of I/O that needs a block layer integrity buffer.  Limited
- * by the number of intervals for which we can fit the integrity buffer into
- * the buffer size.  Because the buffer is a single segment it is also limited
- * by the maximum segment size.
- */
-static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
-{
-	return min_t(unsigned int, lim->max_segment_size,
-		(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
-			lim->integrity.interval_exp);
-}
-
 static int blk_validate_integrity_limits(struct queue_limits *lim)
 {
 	struct blk_integrity *bi = &lim->integrity;
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index fd3f3c8c0fcd..ea6d7d322ae3 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -8,11 +8,6 @@
 
 struct request;
 
-/*
- * Maximum contiguous integrity buffer allocation.
- */
-#define BLK_INTEGRITY_MAX_SIZE		SZ_2M
-
 enum blk_integrity_flags {
 	BLK_INTEGRITY_NOVERIFY		= 1 << 0,
 	BLK_INTEGRITY_NOGENERATE	= 1 << 1,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dec0acaed6e6..11857ae13d10 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1881,6 +1881,24 @@ static inline int bio_split_rw_at(struct bio *bio,
 	return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
 }
 
+/*
+ * Maximum contiguous integrity buffer allocation.
+ */
+#define BLK_INTEGRITY_MAX_SIZE		SZ_2M
+
+/*
+ * Maximum size of I/O that needs a block layer integrity buffer.  Limited
+ * by the number of intervals for which we can fit the integrity buffer into
+ * the buffer size.  Because the buffer is a single segment it is also limited
+ * by the maximum segment size.
+ */
+static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
+{
+	return min_t(unsigned int, lim->max_segment_size,
+		(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
+			lim->integrity.interval_exp);
+}
+
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 0ee8ab5d4dc51704be1157470f3df8090629f9fc Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Wed, 25 Feb 2026 20:51:05 +0000
Subject: block: annotate struct request_queue with __counted_by_ptr

The queue_hw_ctx field in struct request_queue is an array of pointers
to struct blk_mq_hw_ctx. The number of elements in this array is tracked
by the nr_hw_queues field.

The array is allocated in __blk_mq_realloc_hw_ctxs() using
kcalloc_node() with set->nr_hw_queues elements. q->nr_hw_queues is
subsequently updated to set->nr_hw_queues.

When growing the array, the new array is assigned to queue_hw_ctx before
nr_hw_queues is updated. This is safe because nr_hw_queues (the old
smaller count) is used for bounds checking, which is within the new
larger allocation.

When shrinking the array, nr_hw_queues is updated to the smaller value,
while queue_hw_ctx retains the larger allocation. This is also safe as
the count is within the allocation bounds.

Annotating queue_hw_ctx with __counted_by_ptr(nr_hw_queues) allows the
compiler (with kSAN) to verify that accesses to queue_hw_ctx are within
the valid range defined by nr_hw_queues.

This patch was generated by CodeMender and reviewed by Bill Wendling.
Tested by running blktests.

Reviewed-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Bill Wendling <morbo@google.com>
[axboe: massage commit message]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d463b9b5a0a5..540c2c6c9afd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -502,7 +502,7 @@ struct request_queue {
 
 	/* hw dispatch queues */
 	unsigned int		nr_hw_queues;
-	struct blk_mq_hw_ctx * __rcu *queue_hw_ctx;
+	struct blk_mq_hw_ctx * __rcu *queue_hw_ctx __counted_by_ptr(nr_hw_queues);
 
 	struct percpu_ref	q_usage_counter;
 	struct lock_class_key	io_lock_cls_key;
-- 
cgit v1.2.3


From b7cbc30e93e3a64ea058230f6d0c764d6d80276f Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Fri, 27 Feb 2026 22:19:48 +0900
Subject: block: rename struct gendisk zone_wplugs_lock field

Rename struct gendisk zone_wplugs_lock field to zone_wplugs_hash_lock to
clearly indicates that this is the spinlock used for manipulating the
hash table of zone write plugs.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c      | 23 ++++++++++++-----------
 include/linux/blkdev.h |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 26c2aa79faf6..78810e726222 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -514,10 +514,11 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
 	 * are racing with other submission context, so we may already have a
 	 * zone write plug for the same zone.
 	 */
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
 	hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) {
 		if (zwplg->zone_no == zwplug->zone_no) {
-			spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+			spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock,
+					       flags);
 			return false;
 		}
 	}
@@ -529,7 +530,7 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
 	 * necessarilly in the active condition.
 	 */
 	zones_cond = rcu_dereference_check(disk->zones_cond,
-				lockdep_is_held(&disk->zone_wplugs_lock));
+				lockdep_is_held(&disk->zone_wplugs_hash_lock));
 	if (zones_cond)
 		zwplug->cond = zones_cond[zwplug->zone_no];
 	else
@@ -537,7 +538,7 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
 
 	hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
 	atomic_inc(&disk->nr_zone_wplugs);
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
 
 	return true;
 }
@@ -590,13 +591,13 @@ static void disk_free_zone_wplug(struct blk_zone_wplug *zwplug)
 	WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
 	WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
 
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
 	blk_zone_set_cond(rcu_dereference_check(disk->zones_cond,
-				lockdep_is_held(&disk->zone_wplugs_lock)),
+				lockdep_is_held(&disk->zone_wplugs_hash_lock)),
 			  zwplug->zone_no, zwplug->cond);
 	hlist_del_init_rcu(&zwplug->node);
 	atomic_dec(&disk->nr_zone_wplugs);
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
 
 	call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
 }
@@ -1739,7 +1740,7 @@ put_zwplug:
 
 void disk_init_zone_resources(struct gendisk *disk)
 {
-	spin_lock_init(&disk->zone_wplugs_lock);
+	spin_lock_init(&disk->zone_wplugs_hash_lock);
 }
 
 /*
@@ -1829,10 +1830,10 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
 	zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond,
-				lockdep_is_held(&disk->zone_wplugs_lock));
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+				lockdep_is_held(&disk->zone_wplugs_hash_lock));
+	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
 
 	kfree_rcu_mightsleep(zones_cond);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 540c2c6c9afd..a49a1e38c6e7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -200,7 +200,7 @@ struct gendisk {
 	u8 __rcu		*zones_cond;
 	unsigned int		zone_wplugs_hash_bits;
 	atomic_t		nr_zone_wplugs;
-	spinlock_t		zone_wplugs_lock;
+	spinlock_t		zone_wplugs_hash_lock;
 	struct mempool		*zone_wplugs_pool;
 	struct hlist_head	*zone_wplugs_hash;
 	struct workqueue_struct *zone_wplugs_wq;
-- 
cgit v1.2.3


From 1365b6904fd050bf22ab9f3df375a396de5837a1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Fri, 27 Feb 2026 22:19:49 +0900
Subject: block: allow submitting all zone writes from a single context

In order to maintain sequential write patterns per zone with zoned block
devices, zone write plugging issues only a single write BIO per zone at
any time. This works well but has the side effect that when large
sequential write streams are issued by the user and these streams cross
zone boundaries, the device ends up receiving a discontiguous set of
write commands for different zones. The same also happens when a user
writes simultaneously at high queue depth multiple zones: the device
does not see all sequential writes per zone and receives discontiguous
writes to different zones. While this does not affect the performance of
solid state zoned block devices, when using an SMR HDD, this pattern
change from sequential writes to discontiguous writes to different zones
significantly increases head seek which results in degraded write
throughput.

In order to reduce this seek overhead for rotational media devices,
introduce a per disk zone write plugs kernel thread to issue all write
BIOs to zones. This single zone write issuing context is enabled for
any zoned block device that has a request queue flagged with the new
QUEUE_ZONED_QD1_WRITES flag.

The flag QUEUE_ZONED_QD1_WRITES is visible as the sysfs queue attribute
zoned_qd1_writes for zoned devices. For regular block devices, this
attribute is not visible. For zoned block devices, a user can override
the default value set to force the global write maximum queue depth of
1 for a zoned block device, or clear this attribute to fallback to the
default behavior of zone write plugging which limits writes to QD=1 per
sequential zone.

Writing to a zoned block device flagged with QUEUE_ZONED_QD1_WRITES is
implemented using a list of zone write plugs that have a non-empty BIO
list. Listed zone write plugs are processed by the disk zone write plugs
worker kthread in FIFO order, and all BIOs of a zone write plug are all
processed before switching to the next listed zone write plug. A newly
submitted BIO for a non-FULL zone write plug that is not yet listed
causes the addition of the zone write plug at the end of the disk list
of zone write plugs.

Since the write BIOs queued in a zone write plug BIO list are
necessarilly sequential, for rotational media, using the single zone
write plugs kthread to issue all BIOs maintains a sequential write
pattern and thus reduces seek overhead and improves write throughput.
This processing essentially result in always writing to HDDs at QD=1,
which is not an issue for HDDs operating with write caching enabled.
Performance with write cache disabled is also not degraded thanks to
the efficient write handling of modern SMR HDDs.

A disk list of zone write plugs is defined using the new struct gendisk
zone_wplugs_list, and accesses to this list is protected using the
zone_wplugs_list_lock spinlock.  The per disk kthread
(zone_wplugs_worker) code is implemented by the function
disk_zone_wplugs_worker(). A reference on listed zone write plugs is
always held until all BIOs of the zone write plug are processed by the
worker kthread. BIO issuing at QD=1 is driven using a completion
structure (zone_wplugs_worker_bio_done) and calls to blk_io_wait().

With this change, performance when sequentially writing the zones of a
30 TB SMR SATA HDD connected to an AHCI adapter changes as follows
(1MiB direct I/Os, results in MB/s unit):

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 | Sequential write | Baseline | Patched |
 |  Queue Depth     | 6.19-rc8 |         |
 +------------------+----------+---------+
 | 1                | 244      | 245     |
 | 2                | 244      | 245     |
 | 4                | 245      | 245     |
 | 8                | 242      | 245     |
 | 16               | 222      | 246     |
 | 32               | 211      | 245     |
 | 64               | 193      | 244     |
 | 128              | 112      | 246     |
 +------------------+----------+---------+

With the current code (baseline), as the sequential write stream crosses
a zone boundary, higher queue depth creates a gap between the
last IO to the previous zone and the first IOs to the following zones,
causing head seeks and degrading performance. Using the disk zone
write plugs worker thread, this pattern disappears and the maximum
throughput of the drive is maintained, leading to over 100%
improvements in throughput for high queue depth write.

Using 16 fio jobs all writing to randomly chosen zones at QD=32 with 1
MiB direct IOs, write throughput also increases significantly.

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 |   Random write   | Baseline | Patched |
 |  Number of zones | 6.19-rc7 |         |
 +------------------+----------+---------+
 | 1                | 191      | 192     |
 | 2                | 101      | 128     |
 | 4                | 115      | 123     |
 | 8                | 90       | 120     |
 | 16               | 64       | 115     |
 | 32               | 58       | 105     |
 | 64               | 56       | 101     |
 | 128              | 55       | 99      |
 +------------------+----------+---------+

Tests using XFS shows that buffered write speed with 8 jobs writing
files increases by 12% to 35% depending on the workload.

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 |     Workload     | Baseline | Patched |
 |                  | 6.19-rc7 |         |
 +------------------+----------+---------+
 | 256MiB file size | 212      | 238     |
 +------------------+----------+---------+
 | 4MiB .. 128 MiB  | 213      | 243     |
 | random file size |          |         |
 +------------------+----------+---------+
 | 2MiB .. 8 MiB    | 179      | 242     |
 | random file size |          |         |
 +------------------+----------+---------+

Performance gains are even more significant when using an HBA that
limits the maximum size of commands to a small value, e.g. HBAs
controlled with the mpi3mr driver limit commands to a maximum of 1 MiB.
In such case, the write throughput gains are over 40%.

                    +--------------------+
		    |   Write BW (MB/s)  |
 +------------------+----------+---------+
 |     Workload     | Baseline | Patched |
 |                  | 6.19-rc7 |         |
 +------------------+----------+---------+
 | 256MiB file size | 175      | 245     |
 +------------------+----------+---------+
 | 4MiB .. 128 MiB  | 174      | 244     |
 | random file size |          |         |
 +------------------+----------+---------+
 | 2MiB .. 8 MiB    | 171      | 243     |
 | random file size |          |         |
 +------------------+----------+---------+

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c |   1 +
 block/blk-sysfs.c      |  35 ++++++++-
 block/blk-zoned.c      | 190 +++++++++++++++++++++++++++++++++++++++++++------
 include/linux/blkdev.h |   8 +++
 4 files changed, 212 insertions(+), 22 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 28167c9baa55..047ec887456b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -97,6 +97,7 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(NO_ELV_SWITCH),
 	QUEUE_FLAG_NAME(QOS_ENABLED),
 	QUEUE_FLAG_NAME(BIO_ISSUE_TIME),
+	QUEUE_FLAG_NAME(ZONED_QD1_WRITES),
 };
 #undef QUEUE_FLAG_NAME
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 55a1bbfef7d4..ca8033e6d699 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -390,6 +390,36 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
 	return queue_var_show(disk_nr_zones(disk), page);
 }
 
+static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page)
+{
+	return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue),
+			      page);
+}
+
+static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk,
+					    const char *page, size_t count)
+{
+	struct request_queue *q = disk->queue;
+	unsigned long qd1_writes;
+	unsigned int memflags;
+	ssize_t ret;
+
+	ret = queue_var_store(&qd1_writes, page, count);
+	if (ret < 0)
+		return ret;
+
+	memflags = blk_mq_freeze_queue(q);
+	blk_mq_quiesce_queue(q);
+	if (qd1_writes)
+		blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q);
+	blk_mq_unquiesce_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
+
+	return count;
+}
+
 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
 {
 	return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
@@ -617,6 +647,7 @@ QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
 
 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
+QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes");
 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
@@ -754,6 +785,7 @@ static struct attribute *queue_attrs[] = {
 	&queue_nomerges_entry.attr,
 	&queue_poll_entry.attr,
 	&queue_poll_delay_entry.attr,
+	&queue_zoned_qd1_writes_entry.attr,
 
 	NULL,
 };
@@ -786,7 +818,8 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
 	struct request_queue *q = disk->queue;
 
 	if ((attr == &queue_max_open_zones_entry.attr ||
-	     attr == &queue_max_active_zones_entry.attr) &&
+	     attr == &queue_max_active_zones_entry.attr ||
+	     attr == &queue_zoned_qd1_writes_entry.attr) &&
 	    !blk_queue_is_zoned(q))
 		return 0;
 
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 78810e726222..e1a23c8b676d 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -16,6 +16,8 @@
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
 #include <linux/mempool.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <trace/events/block.h>
 
@@ -40,6 +42,8 @@ static const char *const zone_cond_name[] = {
 /*
  * Per-zone write plug.
  * @node: hlist_node structure for managing the plug using a hash table.
+ * @entry: list_head structure for listing the plug in the disk list of active
+ *         zone write plugs.
  * @bio_list: The list of BIOs that are currently plugged.
  * @bio_work: Work struct to handle issuing of plugged BIOs
  * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -62,6 +66,7 @@ static const char *const zone_cond_name[] = {
  */
 struct blk_zone_wplug {
 	struct hlist_node	node;
+	struct list_head	entry;
 	struct bio_list		bio_list;
 	struct work_struct	bio_work;
 	struct rcu_head		rcu_head;
@@ -623,7 +628,19 @@ static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug)
 	}
 }
 
-static void blk_zone_wplug_bio_work(struct work_struct *work);
+static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
+				       struct blk_zone_wplug *zwplug);
+
+static void blk_zone_wplug_bio_work(struct work_struct *work)
+{
+	struct blk_zone_wplug *zwplug =
+		container_of(work, struct blk_zone_wplug, bio_work);
+
+	disk_zone_wplug_submit_bio(zwplug->disk, zwplug);
+
+	/* Drop the reference we took in disk_zone_wplug_schedule_work(). */
+	disk_put_zone_wplug(zwplug);
+}
 
 /*
  * Get a zone write plug for the zone containing @sector.
@@ -658,6 +675,7 @@ again:
 	zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
 	bio_list_init(&zwplug->bio_list);
 	INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
+	INIT_LIST_HEAD(&zwplug->entry);
 	zwplug->disk = disk;
 
 	/*
@@ -690,6 +708,7 @@ static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug,
  */
 static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
 {
+	struct gendisk *disk = zwplug->disk;
 	struct bio *bio;
 
 	lockdep_assert_held(&zwplug->lock);
@@ -703,6 +722,20 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
 		blk_zone_wplug_bio_io_error(zwplug, bio);
 
 	zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
+
+	/*
+	 * If we are using the per disk zone write plugs worker thread, remove
+	 * the zone write plug from the work list and drop the reference we
+	 * took when the zone write plug was added to that list.
+	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue)) {
+		spin_lock(&disk->zone_wplugs_list_lock);
+		if (!list_empty(&zwplug->entry)) {
+			list_del_init(&zwplug->entry);
+			disk_put_zone_wplug(zwplug);
+		}
+		spin_unlock(&disk->zone_wplugs_list_lock);
+	}
 }
 
 /*
@@ -1137,8 +1170,8 @@ void blk_zone_mgmt_bio_endio(struct bio *bio)
 	}
 }
 
-static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
-					      struct blk_zone_wplug *zwplug)
+static void disk_zone_wplug_schedule_work(struct gendisk *disk,
+					  struct blk_zone_wplug *zwplug)
 {
 	lockdep_assert_held(&zwplug->lock);
 
@@ -1151,6 +1184,7 @@ static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
 	 * and we also drop this reference if the work is already scheduled.
 	 */
 	WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
+	WARN_ON_ONCE(blk_queue_zoned_qd1_writes(disk->queue));
 	refcount_inc(&zwplug->ref);
 	if (!queue_work(disk->zone_wplugs_wq, &zwplug->bio_work))
 		disk_put_zone_wplug(zwplug);
@@ -1190,6 +1224,22 @@ static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
 	bio_list_add(&zwplug->bio_list, bio);
 	trace_disk_zone_wplug_add_bio(zwplug->disk->queue, zwplug->zone_no,
 				      bio->bi_iter.bi_sector, bio_sectors(bio));
+
+	/*
+	 * If we are using the disk zone write plugs worker instead of the per
+	 * zone write plug BIO work, add the zone write plug to the work list
+	 * if it is not already there. Make sure to also get an extra reference
+	 * on the zone write plug so that it does not go away until it is
+	 * removed from the work list.
+	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue)) {
+		spin_lock(&disk->zone_wplugs_list_lock);
+		if (list_empty(&zwplug->entry)) {
+			list_add_tail(&zwplug->entry, &disk->zone_wplugs_list);
+			refcount_inc(&zwplug->ref);
+		}
+		spin_unlock(&disk->zone_wplugs_list_lock);
+	}
 }
 
 /*
@@ -1423,6 +1473,13 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
 		goto queue_bio;
 	}
 
+	/*
+	 * For rotational devices, we will use the gendisk zone write plugs
+	 * work instead of the per zone write plug BIO work, so queue the BIO.
+	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue))
+		goto queue_bio;
+
 	/* If the zone is already plugged, add the BIO to the BIO plug list. */
 	if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
 		goto queue_bio;
@@ -1445,7 +1502,10 @@ queue_bio:
 
 	if (!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)) {
 		zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
-		disk_zone_wplug_schedule_bio_work(disk, zwplug);
+		if (blk_queue_zoned_qd1_writes(disk->queue))
+			wake_up_process(disk->zone_wplugs_worker);
+		else
+			disk_zone_wplug_schedule_work(disk, zwplug);
 	}
 
 	spin_unlock_irqrestore(&zwplug->lock, flags);
@@ -1586,16 +1646,22 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
 
 	spin_lock_irqsave(&zwplug->lock, flags);
 
-	/* Schedule submission of the next plugged BIO if we have one. */
-	if (!bio_list_empty(&zwplug->bio_list)) {
-		disk_zone_wplug_schedule_bio_work(disk, zwplug);
-		spin_unlock_irqrestore(&zwplug->lock, flags);
-		return;
-	}
+	/*
+	 * For rotational devices, signal the BIO completion to the zone write
+	 * plug work. Otherwise, schedule submission of the next plugged BIO
+	 * if we have one.
+	 */
+	if (bio_list_empty(&zwplug->bio_list))
+		zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
+
+	if (blk_queue_zoned_qd1_writes(disk->queue))
+		complete(&disk->zone_wplugs_worker_bio_done);
+	else if (!bio_list_empty(&zwplug->bio_list))
+		disk_zone_wplug_schedule_work(disk, zwplug);
 
-	zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
 	if (!zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug))
 		disk_mark_zone_wplug_dead(zwplug);
+
 	spin_unlock_irqrestore(&zwplug->lock, flags);
 }
 
@@ -1685,10 +1751,9 @@ void blk_zone_write_plug_finish_request(struct request *req)
 	disk_put_zone_wplug(zwplug);
 }
 
-static void blk_zone_wplug_bio_work(struct work_struct *work)
+static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
+				       struct blk_zone_wplug *zwplug)
 {
-	struct blk_zone_wplug *zwplug =
-		container_of(work, struct blk_zone_wplug, bio_work);
 	struct block_device *bdev;
 	unsigned long flags;
 	struct bio *bio;
@@ -1704,7 +1769,7 @@ again:
 	if (!bio) {
 		zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
 		spin_unlock_irqrestore(&zwplug->lock, flags);
-		goto put_zwplug;
+		return false;
 	}
 
 	trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no,
@@ -1718,14 +1783,15 @@ again:
 		goto again;
 	}
 
-	bdev = bio->bi_bdev;
-
 	/*
 	 * blk-mq devices will reuse the extra reference on the request queue
 	 * usage counter we took when the BIO was plugged, but the submission
 	 * path for BIO-based devices will not do that. So drop this extra
 	 * reference here.
 	 */
+	if (blk_queue_zoned_qd1_writes(disk->queue))
+		reinit_completion(&disk->zone_wplugs_worker_bio_done);
+	bdev = bio->bi_bdev;
 	if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) {
 		bdev->bd_disk->fops->submit_bio(bio);
 		blk_queue_exit(bdev->bd_disk->queue);
@@ -1733,14 +1799,78 @@ again:
 		blk_mq_submit_bio(bio);
 	}
 
-put_zwplug:
-	/* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
-	disk_put_zone_wplug(zwplug);
+	return true;
+}
+
+static struct blk_zone_wplug *disk_get_zone_wplugs_work(struct gendisk *disk)
+{
+	struct blk_zone_wplug *zwplug;
+
+	spin_lock_irq(&disk->zone_wplugs_list_lock);
+	zwplug = list_first_entry_or_null(&disk->zone_wplugs_list,
+					  struct blk_zone_wplug, entry);
+	if (zwplug)
+		list_del_init(&zwplug->entry);
+	spin_unlock_irq(&disk->zone_wplugs_list_lock);
+
+	return zwplug;
+}
+
+static int disk_zone_wplugs_worker(void *data)
+{
+	struct gendisk *disk = data;
+	struct blk_zone_wplug *zwplug;
+	unsigned int noio_flag;
+
+	noio_flag = memalloc_noio_save();
+	set_user_nice(current, MIN_NICE);
+	set_freezable();
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
+
+		zwplug = disk_get_zone_wplugs_work(disk);
+		if (zwplug) {
+			/*
+			 * Process all BIOs of this zone write plug and then
+			 * drop the reference we took when adding the zone write
+			 * plug to the active list.
+			 */
+			set_current_state(TASK_RUNNING);
+			while (disk_zone_wplug_submit_bio(disk, zwplug))
+				blk_wait_io(&disk->zone_wplugs_worker_bio_done);
+			disk_put_zone_wplug(zwplug);
+			continue;
+		}
+
+		/*
+		 * Only sleep if nothing sets the state to running. Else check
+		 * for zone write plugs work again as a newly submitted BIO
+		 * might have added a zone write plug to the work list.
+		 */
+		if (get_current_state() == TASK_RUNNING) {
+			try_to_freeze();
+		} else {
+			if (kthread_should_stop()) {
+				set_current_state(TASK_RUNNING);
+				break;
+			}
+			schedule();
+		}
+	}
+
+	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
+	memalloc_noio_restore(noio_flag);
+
+	return 0;
 }
 
 void disk_init_zone_resources(struct gendisk *disk)
 {
 	spin_lock_init(&disk->zone_wplugs_hash_lock);
+	spin_lock_init(&disk->zone_wplugs_list_lock);
+	INIT_LIST_HEAD(&disk->zone_wplugs_list);
+	init_completion(&disk->zone_wplugs_worker_bio_done);
 }
 
 /*
@@ -1756,6 +1886,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
 				     unsigned int pool_size)
 {
 	unsigned int i;
+	int ret = -ENOMEM;
 
 	atomic_set(&disk->nr_zone_wplugs, 0);
 	disk->zone_wplugs_hash_bits =
@@ -1781,8 +1912,21 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
 	if (!disk->zone_wplugs_wq)
 		goto destroy_pool;
 
+	disk->zone_wplugs_worker =
+		kthread_create(disk_zone_wplugs_worker, disk,
+			       "%s_zwplugs_worker", disk->disk_name);
+	if (IS_ERR(disk->zone_wplugs_worker)) {
+		ret = PTR_ERR(disk->zone_wplugs_worker);
+		disk->zone_wplugs_worker = NULL;
+		goto destroy_wq;
+	}
+	wake_up_process(disk->zone_wplugs_worker);
+
 	return 0;
 
+destroy_wq:
+	destroy_workqueue(disk->zone_wplugs_wq);
+	disk->zone_wplugs_wq = NULL;
 destroy_pool:
 	mempool_destroy(disk->zone_wplugs_pool);
 	disk->zone_wplugs_pool = NULL;
@@ -1790,7 +1934,7 @@ free_hash:
 	kfree(disk->zone_wplugs_hash);
 	disk->zone_wplugs_hash = NULL;
 	disk->zone_wplugs_hash_bits = 0;
-	return -ENOMEM;
+	return ret;
 }
 
 static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
@@ -1840,6 +1984,10 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
 
 void disk_free_zone_resources(struct gendisk *disk)
 {
+	if (disk->zone_wplugs_worker)
+		kthread_stop(disk->zone_wplugs_worker);
+	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
+
 	if (disk->zone_wplugs_wq) {
 		destroy_workqueue(disk->zone_wplugs_wq);
 		disk->zone_wplugs_wq = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a49a1e38c6e7..ef6457487d23 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -13,6 +13,7 @@
 #include <linux/minmax.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/completion.h>
 #include <linux/wait.h>
 #include <linux/bio.h>
 #include <linux/gfp.h>
@@ -204,6 +205,10 @@ struct gendisk {
 	struct mempool		*zone_wplugs_pool;
 	struct hlist_head	*zone_wplugs_hash;
 	struct workqueue_struct *zone_wplugs_wq;
+	spinlock_t		zone_wplugs_list_lock;
+	struct list_head	zone_wplugs_list;
+	struct task_struct	*zone_wplugs_worker;
+	struct completion	zone_wplugs_worker_bio_done;
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 #if IS_ENABLED(CONFIG_CDROM)
@@ -668,6 +673,7 @@ enum {
 	QUEUE_FLAG_NO_ELV_SWITCH,	/* can't switch elevator any more */
 	QUEUE_FLAG_QOS_ENABLED,		/* qos is enabled */
 	QUEUE_FLAG_BIO_ISSUE_TIME,	/* record bio->issue_time_ns */
+	QUEUE_FLAG_ZONED_QD1_WRITES,	/* Limit zoned devices writes to QD=1 */
 	QUEUE_FLAG_MAX
 };
 
@@ -707,6 +713,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
 #define blk_queue_no_elv_switch(q)	\
 	test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
+#define blk_queue_zoned_qd1_writes(q)	\
+	test_bit(QUEUE_FLAG_ZONED_QD1_WRITES, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From ecd92cfec5349876d6a80f8188ea98c5920094b6 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 26 Feb 2026 16:54:48 +0900
Subject: block: remove bdev_nonrot()

bdev_nonrot() is simply the negative return value of bdev_rot().
So replace all call sites of bdev_nonrot() with calls to bdev_rot()
and remove bdev_nonrot().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/raid1.c                  | 2 +-
 drivers/md/raid10.c                 | 2 +-
 drivers/md/raid5.c                  | 2 +-
 drivers/target/target_core_file.c   | 2 +-
 drivers/target/target_core_iblock.c | 2 +-
 fs/btrfs/volumes.c                  | 4 ++--
 fs/ext4/mballoc-test.c              | 2 +-
 fs/ext4/mballoc.c                   | 2 +-
 include/linux/blkdev.h              | 5 -----
 mm/swapfile.c                       | 2 +-
 10 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 181400e147c0..cda6af0712b9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1878,7 +1878,7 @@ static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk,
 	if (info->rdev)
 		return false;
 
-	if (bdev_nonrot(rdev->bdev)) {
+	if (!bdev_rot(rdev->bdev)) {
 		set_bit(Nonrot, &rdev->flags);
 		WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks + 1);
 	}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0653b5d8545a..cfbd345805ca 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -806,7 +806,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
 		if (!do_balance)
 			break;
 
-		nonrot = bdev_nonrot(rdev->bdev);
+		nonrot = !bdev_rot(rdev->bdev);
 		has_nonrot_disk |= nonrot;
 		pending = atomic_read(&rdev->nr_pending);
 		if (min_pending > pending && nonrot) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a8e8d431071b..ba9d6d05b089 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7541,7 +7541,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	rdev_for_each(rdev, mddev) {
 		if (test_bit(Journal, &rdev->flags))
 			continue;
-		if (bdev_nonrot(rdev->bdev)) {
+		if (!bdev_rot(rdev->bdev)) {
 			conf->batch_bio_dispatch = false;
 			break;
 		}
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 3ae1f7137d9d..d6e3e5214652 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -173,7 +173,7 @@ static int fd_configure_device(struct se_device *dev)
 		 */
 		dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-		if (bdev_nonrot(bdev))
+		if (!bdev_rot(bdev))
 			dev->dev_attrib.is_nonrot = 1;
 	} else {
 		if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) {
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 3c92f94497b4..1087d1d17c36 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -148,7 +148,7 @@ static int iblock_configure_device(struct se_device *dev)
 	else
 		dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-	if (bdev_nonrot(bd))
+	if (!bdev_rot(bd))
 		dev->dev_attrib.is_nonrot = 1;
 
 	target_configure_write_atomic_from_bdev(&dev->dev_attrib, bd);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 648bb09fc416..353c9caa8ab9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -694,7 +694,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 			set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	}
 
-	if (!bdev_nonrot(file_bdev(bdev_file)))
+	if (bdev_rot(file_bdev(bdev_file)))
 		fs_devices->rotating = true;
 
 	if (bdev_max_discard_sectors(file_bdev(bdev_file)))
@@ -2919,7 +2919,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 	atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
 
-	if (!bdev_nonrot(device->bdev))
+	if (bdev_rot(device->bdev))
 		fs_devices->rotating = true;
 
 	orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 9fbdf6a09489..b9f22e3a8d5c 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -72,7 +72,7 @@ static int mbt_mb_init(struct super_block *sb)
 	ext4_fsblk_t block;
 	int ret;
 
-	/* needed by ext4_mb_init->bdev_nonrot(sb->s_bdev) */
+	/* needed by ext4_mb_init->bdev_rot(sb->s_bdev) */
 	sb->s_bdev = kzalloc_obj(*sb->s_bdev);
 	if (sb->s_bdev == NULL)
 		return -ENOMEM;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..8a4dfe19878c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3836,7 +3836,7 @@ int ext4_mb_init(struct super_block *sb)
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
 
-	if (bdev_nonrot(sb->s_bdev))
+	if (!bdev_rot(sb->s_bdev))
 		sbi->s_mb_max_linear_groups = 0;
 	else
 		sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ef6457487d23..8d93d8e356d8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1475,11 +1475,6 @@ static inline bool bdev_rot(struct block_device *bdev)
 	return blk_queue_rot(bdev_get_queue(bdev));
 }
 
-static inline bool bdev_nonrot(struct block_device *bdev)
-{
-	return !bdev_rot(bdev);
-}
-
 static inline bool bdev_synchronous(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 94af29d1de88..60e21414624b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3460,7 +3460,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (si->bdev && bdev_synchronous(si->bdev))
 		si->flags |= SWP_SYNCHRONOUS_IO;
 
-	if (si->bdev && bdev_nonrot(si->bdev)) {
+	if (si->bdev && !bdev_rot(si->bdev)) {
 		si->flags |= SWP_SOLIDSTATE;
 	} else {
 		atomic_inc(&nr_rotate_swap);
-- 
cgit v1.2.3


From 3d6bb84f6bb3f4c05fc47fd02ce75ce3032a4ce1 Mon Sep 17 00:00:00 2001
From: Yuto Ohnuki <ytohnuki@amazon.com>
Date: Thu, 26 Feb 2026 20:18:58 +0000
Subject: fs: remove stale and duplicate forward declarations

Remove the following unnecessary forward declarations from fs.h, which
improves maintainability.

- struct hd_geometry: became unused in fs.h when
  block_device_operations was moved to blkdev.h in commit 08f858512151
  ("[PATCH] move block_device_operations to blkdev.h"). The forward
  declaration is now added to blkdev.h where it is actually used.

- struct iovec: became unused when aio_read/aio_write were removed in
  commit 8436318205b9 ("->aio_read and ->aio_write removed")

- struct iov_iter: duplicate forward declaration. This removes the
  redundant second declaration, added in commit 293bc9822fa9
  ("new methods: ->read_iter() and ->write_iter()")

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512301303.s7YWTZHA-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302139.Wl0soAlz-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302105.pmzYfmcV-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302125.FNgHwu5z-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512302108.nIV8r5ES-lkp@intel.com/
Signed-off-by: Yuto Ohnuki <ytohnuki@amazon.com>
Link: https://patch.msgid.link/20260226201857.27310-2-ytohnuki@amazon.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/blkdev.h | 1 +
 include/linux/fs.h     | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99ef8cd7673c..54cdd71aab07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct blk_flush_queue;
 struct kiocb;
 struct pr_ops;
 struct rq_qos;
+struct hd_geometry;
 struct blk_report_zones_args;
 struct blk_queue_stats;
 struct blk_stat_callback;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a2af5ddd5323..280d43c9f04a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -55,8 +55,6 @@ struct bdi_writeback;
 struct bio;
 struct io_comp_batch;
 struct fiemap_extent_info;
-struct hd_geometry;
-struct iovec;
 struct kiocb;
 struct kobject;
 struct pipe_inode_info;
@@ -1917,7 +1915,6 @@ struct dir_context {
  */
 #define COPY_FILE_SPLICE		(1 << 0)
 
-struct iov_iter;
 struct io_uring_cmd;
 struct offset_ctx;
 
-- 
cgit v1.2.3