From 1122c0c1cc71f740fa4d5f14f239194e06a1d5e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:40 +0200 Subject: block: move cache control settings out of queue->flags Move the cache control settings into the queue_limits so that the flags can be set atomically with the device queue frozen. Add new features and flags field for the driver set flags, and internal (usually sysfs-controlled) flags in the block layer. Note that we'll eventually remove enough field from queue_limits to bring it back to the previous size. The disable flag is inverted compared to the previous meaning, which means it now survives a rescan, similar to the max_sectors and max_discard_sectors user limits. The FLUSH and FUA flags are now inherited by blk_stack_limits, which simplified the code in dm a lot, but also causes a slight behavior change in that dm-switch and dm-unstripe now advertise a write cache despite setting num_flush_bios to 0. The I/O path will handle this gracefully, but as far as I can tell the lack of num_flush_bios and thus flush support is a pre-existing data integrity bug in those targets that really needs fixing, after which a non-zero num_flush_bios should be required in dm for targets that map to underlying devices. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..acdfe5122faa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -282,6 +282,28 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } +/* flags set by the driver in queue_limits.features */ +enum { + /* supports a volatile write cache */ + BLK_FEAT_WRITE_CACHE = (1u << 0), + + /* supports passing on the FUA bit */ + BLK_FEAT_FUA = (1u << 1), +}; + +/* + * Flags automatically inherited when stacking limits. + */ +#define BLK_FEAT_INHERIT_MASK \ + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) + + +/* internal flags in queue_limits.flags */ +enum { + /* do not send FLUSH or FUA command despite advertised write cache */ + BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), +}; + /* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages @@ -292,6 +314,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int features; + unsigned int flags; enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; @@ -536,12 +560,9 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 17 /* Write back caching */ -#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ @@ -951,7 +972,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); struct blk_independent_access_ranges * disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); @@ -1304,14 +1324,20 @@ static inline bool bdev_stable_writes(struct block_device *bdev) return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } +static inline bool blk_queue_write_cache(struct request_queue *q) +{ + return (q->limits.features & BLK_FEAT_WRITE_CACHE) && + !(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED); +} + static inline bool bdev_write_cache(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); + return blk_queue_write_cache(bdev_get_queue(bdev)); } static inline bool bdev_fua(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); + return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA; } static inline bool bdev_nowait(struct block_device *bdev) -- cgit v1.2.3 From bd4a633b6f7c3c6b6ebc1a07317643270e751a94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:41 +0200 Subject: block: move the nonrot flag to queue_limits Move the nonrot flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Use the chance to switch to defaulting to non-rotational and require the driver to opt into rotational, which matches the polarity of the sysfs interface. For the z2ram, ps3vram, 2x memstick, ubiblock and dcssblk the new rotational flag is not set as they clearly are not rotational despite this being a behavior change. There are some other drivers that unconditionally set the rotational flag to keep the existing behavior as they arguably can be used on rotational devices even if that is probably not their main use today (e.g. virtio_blk and drbd). The flag is automatically inherited in blk_stack_limits matching the existing behavior in dm and md. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-15-hch@lst.de Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 1 + arch/um/drivers/ubd_kern.c | 1 - arch/xtensa/platforms/iss/simdisk.c | 5 ++++- block/blk-mq-debugfs.c | 1 - block/blk-sysfs.c | 39 ++++++++++++++++++++++++++++++++++--- drivers/block/amiflop.c | 5 ++++- drivers/block/aoe/aoeblk.c | 1 + drivers/block/ataflop.c | 5 ++++- drivers/block/brd.c | 2 -- drivers/block/drbd/drbd_main.c | 3 ++- drivers/block/floppy.c | 3 ++- drivers/block/loop.c | 8 +++----- drivers/block/mtip32xx/mtip32xx.c | 1 - drivers/block/n64cart.c | 2 -- drivers/block/nbd.c | 5 ----- drivers/block/null_blk/main.c | 1 - drivers/block/pktcdvd.c | 1 + drivers/block/ps3disk.c | 3 ++- drivers/block/rbd.c | 3 --- drivers/block/rnbd/rnbd-clt.c | 4 ---- drivers/block/sunvdc.c | 1 + drivers/block/swim.c | 5 ++++- drivers/block/swim3.c | 5 ++++- drivers/block/ublk_drv.c | 9 +++------ drivers/block/virtio_blk.c | 4 +++- drivers/block/xen-blkfront.c | 1 - drivers/block/zram/zram_drv.c | 2 -- drivers/cdrom/gdrom.c | 1 + drivers/md/bcache/super.c | 2 -- drivers/md/dm-table.c | 12 ------------ drivers/md/md.c | 13 ------------- drivers/mmc/core/queue.c | 1 - drivers/mtd/mtd_blkdevs.c | 1 - drivers/nvdimm/btt.c | 1 - drivers/nvdimm/pmem.c | 1 - drivers/nvme/host/core.c | 1 - drivers/nvme/host/multipath.c | 1 - drivers/s390/block/dasd_genhd.c | 1 - drivers/s390/block/scm_blk.c | 1 - drivers/scsi/sd.c | 4 ++-- include/linux/blkdev.h | 10 +++++----- 41 files changed, 83 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 642fb80c5c4e..8eea7ef91151 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -98,6 +98,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) { struct queue_limits lim = { .logical_block_size = bsize, + .features = BLK_FEAT_ROTATIONAL, }; struct nfhd_device *dev; int dev_id = id - NFHD_DEV_OFFSET; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 19e01691ea0e..9f1e76ddda5a 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -882,7 +882,6 @@ static int ubd_add(int n, char **error_out) goto out_cleanup_tags; } - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); disk->major = UBD_MAJOR; disk->first_minor = n << UBD_SHIFT; disk->minors = 1 << UBD_SHIFT; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index defc67909a9c..d6d2b533a574 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -263,6 +263,9 @@ static const struct proc_ops simdisk_proc_ops = { static int __init simdisk_setup(struct simdisk *dev, int which, struct proc_dir_entry *procdir) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; char tmp[2] = { '0' + which, 0 }; int err; @@ -271,7 +274,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, spin_lock_init(&dev->lock); dev->users = 0; - dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE); + dev->gd = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(dev->gd)) { err = PTR_ERR(dev->gd); goto out; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e8b9db7c30c4..4d0e62ec88f0 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -84,7 +84,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(NOMERGES), QUEUE_FLAG_NAME(SAME_COMP), QUEUE_FLAG_NAME(FAIL_IO), - QUEUE_FLAG_NAME(NONROT), QUEUE_FLAG_NAME(IO_STAT), QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(ADD_RANDOM), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4f524c1d5e08..637ed3bbbfb4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -263,6 +263,39 @@ static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page) return queue_var_show(queue_dma_alignment(q), page); } +static ssize_t queue_feature_store(struct request_queue *q, const char *page, + size_t count, unsigned int feature) +{ + struct queue_limits lim; + unsigned long val; + ssize_t ret; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + lim = queue_limits_start_update(q); + if (val) + lim.features |= feature; + else + lim.features &= ~feature; + ret = queue_limits_commit_update(q, &lim); + if (ret) + return ret; + return count; +} + +#define QUEUE_SYSFS_FEATURE(_name, _feature) \ +static ssize_t queue_##_name##_show(struct request_queue *q, char *page) \ +{ \ + return sprintf(page, "%u\n", !!(q->limits.features & _feature)); \ +} \ +static ssize_t queue_##_name##_store(struct request_queue *q, \ + const char *page, size_t count) \ +{ \ + return queue_feature_store(q, page, count, _feature); \ +} + #define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ static ssize_t \ queue_##name##_show(struct request_queue *q, char *page) \ @@ -289,7 +322,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ return ret; \ } -QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); +QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); @@ -526,7 +559,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_logical_block_size_show, }; -QUEUE_RW_ENTRY(queue_nonrot, "rotational"); +QUEUE_RW_ENTRY(queue_rotational, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); @@ -624,7 +657,7 @@ static struct attribute *queue_attrs[] = { &queue_write_zeroes_max_entry.attr, &queue_zone_append_max_entry.attr, &queue_zone_write_granularity_entry.attr, - &queue_nonrot_entry.attr, + &queue_rotational_entry.attr, &queue_zoned_entry.attr, &queue_nr_zones_entry.attr, &queue_max_open_zones_entry.attr, diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index a25414228e47..ff45701f7a5e 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1776,10 +1776,13 @@ static const struct blk_mq_ops amiflop_mq_ops = { static int fd_alloc_disk(int drive, int system) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index b6dac8cee70f..2028795ec61c 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -337,6 +337,7 @@ aoeblk_gdalloc(void *vp) struct queue_limits lim = { .max_hw_sectors = aoe_maxsectors, .io_opt = SZ_2M, + .features = BLK_FEAT_ROTATIONAL, }; ulong flags; int late = 0; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index cacc4ba942a8..4ee10a742bdb 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1992,9 +1992,12 @@ static const struct blk_mq_ops ataflop_mq_ops = { static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; struct gendisk *disk; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 558d8e670566..b25dc463b5e3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -366,8 +366,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - /* Tell the block layer that this is not a rotational device */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bf42a46781fa..2ef29a478075 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2697,7 +2697,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig * connect. */ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, - .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | + BLK_FEAT_ROTATIONAL, }; device = minor_to_device(minor); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 25c9d85667f1..6d7f7df97c3a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4516,7 +4516,8 @@ static bool floppy_available(int drive) static int floppy_alloc_disk(unsigned int drive, unsigned int type) { struct queue_limits lim = { - .max_hw_sectors = 64, + .max_hw_sectors = 64, + .features = BLK_FEAT_ROTATIONAL, }; struct gendisk *disk; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 08d0fc7f17b7..86b5d956dc4e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -985,13 +985,11 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize) lim.logical_block_size = bsize; lim.physical_block_size = bsize; lim.io_min = bsize; - lim.features &= ~BLK_FEAT_WRITE_CACHE; + lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL); if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY)) lim.features |= BLK_FEAT_WRITE_CACHE; - if (!backing_bdev || bdev_nonrot(backing_bdev)) - blk_queue_flag_set(QUEUE_FLAG_NONROT, lo->lo_queue); - else - blk_queue_flag_clear(QUEUE_FLAG_NONROT, lo->lo_queue); + if (backing_bdev && !bdev_nonrot(backing_bdev)) + lim.features |= BLK_FEAT_ROTATIONAL; loop_config_discard(lo, &lim); return queue_limits_commit_update(lo->lo_queue, &lim); } diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 43a187609ef7..1dbbf72659d5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3485,7 +3485,6 @@ skip_create_disk: goto start_service_thread; /* Set device limits. */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 27b2187e7a6d..b9fdeff31caf 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -150,8 +150,6 @@ static int __init n64cart_probe(struct platform_device *pdev) set_capacity(disk, size >> SECTOR_SHIFT); set_disk_ro(disk, 1); - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - err = add_disk(disk); if (err) goto out_cleanup_disk; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index cb1c86a6a3fb..6cddf5baffe0 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1867,11 +1867,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) goto out_err_disk; } - /* - * Tell the block layer that we are not a rotational device - */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); /* diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 21f9d256e884..83a4ebe4763a 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1948,7 +1948,6 @@ static int null_add_dev(struct nullb_device *dev) } nullb->q->queuedata = nullb; - blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); rv = ida_alloc(&nullb_indexes, GFP_KERNEL); if (rv < 0) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 8a2ce8070010..7cece5884b9c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2622,6 +2622,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) struct queue_limits lim = { .max_hw_sectors = PACKET_MAX_SECTORS, .logical_block_size = CD_FRAMESIZE, + .features = BLK_FEAT_ROTATIONAL, }; int idx; int ret = -ENOMEM; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 8b73cf459b59..ff45ed766469 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -388,7 +388,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) .max_segments = -1, .max_segment_size = dev->bounce_size, .dma_alignment = dev->blk_size - 1, - .features = BLK_FEAT_WRITE_CACHE, + .features = BLK_FEAT_WRITE_CACHE | + BLK_FEAT_ROTATIONAL, }; struct gendisk *gendisk; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 22ad704f81d8..ec1f1c7d4275 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4997,9 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 02c4b1731827..4918b0f68b46 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1352,10 +1352,6 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, if (dev->access_mode == RNBD_ACCESS_RO) set_disk_ro(dev->gd, true); - /* - * Network device does not need rotational - */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); err = add_disk(dev->gd); if (err) put_disk(dev->gd); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5286cb8e0824..2d38331ee667 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -791,6 +791,7 @@ static int probe_disk(struct vdc_port *port) .seg_boundary_mask = PAGE_SIZE - 1, .max_segment_size = PAGE_SIZE, .max_segments = port->ring_cookies, + .features = BLK_FEAT_ROTATIONAL, }; struct request_queue *q; struct gendisk *g; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 6731678f3a41..126f151c4f2c 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -787,6 +787,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs) static int swim_floppy_init(struct swim_priv *swd) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; int err; int drive; struct swim __iomem *base = swd->base; @@ -820,7 +823,7 @@ static int swim_floppy_init(struct swim_priv *swd) goto exit_put_disks; swd->unit[drive].disk = - blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL, + blk_mq_alloc_disk(&swd->unit[drive].tag_set, &lim, &swd->unit[drive]); if (IS_ERR(swd->unit[drive].disk)) { blk_mq_free_tag_set(&swd->unit[drive].tag_set); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index a04756ac778e..90be1017f7bf 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1189,6 +1189,9 @@ static int swim3_add_device(struct macio_dev *mdev, int index) static int swim3_attach(struct macio_dev *mdev, const struct of_device_id *match) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; struct floppy_state *fs; struct gendisk *disk; int rc; @@ -1210,7 +1213,7 @@ static int swim3_attach(struct macio_dev *mdev, if (rc) goto out_unregister; - disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs); + disk = blk_mq_alloc_disk(&fs->tag_set, &lim, fs); if (IS_ERR(disk)) { rc = PTR_ERR(disk); goto out_free_tag_set; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e45c65c1848d..4fcde0999358 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -484,14 +484,8 @@ static inline unsigned ublk_pos_to_tag(loff_t pos) static void ublk_dev_param_basic_apply(struct ublk_device *ub) { - struct request_queue *q = ub->ub_disk->queue; const struct ublk_param_basic *p = &ub->params.basic; - if (p->attrs & UBLK_ATTR_ROTATIONAL) - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); - else - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - if (p->attrs & UBLK_ATTR_READ_ONLY) set_disk_ro(ub->ub_disk, true); @@ -2214,6 +2208,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) lim.features |= BLK_FEAT_FUA; } + if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL) + lim.features |= BLK_FEAT_ROTATIONAL; + if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b1a3c2935285..13a2f24f1766 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1451,7 +1451,9 @@ static int virtblk_read_limits(struct virtio_blk *vblk, static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; - struct queue_limits lim = { }; + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; int err, index; unsigned int queue_depth; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9aafce3e5987..fa3a2ba52545 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1146,7 +1146,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, err = PTR_ERR(gd); goto out_free_tag_set; } - blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue); strcpy(gd->disk_name, DEV_NAME); ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3acd7006ad2c..aad840fc7e18 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2245,8 +2245,6 @@ static int zram_add(void) /* Actual capacity set using sysfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); - /* zram devices sort of resembles non-rotational disks */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index eefdd422ad8e..71cfe7a85913 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -744,6 +744,7 @@ static int probe_gdrom(struct platform_device *devptr) .max_segments = 1, /* set a large max size to get most from DMA */ .max_segment_size = 0x40000, + .features = BLK_FEAT_ROTATIONAL, }; int err; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index cb6595c8b551..baa364eedd00 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -974,8 +974,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->minors = BCACHE_MINORS; d->disk->fops = ops; d->disk->private_data = d; - - blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); return 0; out_bioset_exit: diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 03abdae64682..c062af329709 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1716,12 +1716,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, return false; } -static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - return !bdev_nonrot(dev->bdev); -} - static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1870,12 +1864,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) dax_write_cache(t->md->dax_dev, true); - /* Ensure that all underlying devices are non-rotational. */ - if (dm_table_any_dev_attr(t, device_is_rotational, NULL)) - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); - else - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - /* * Some devices don't use blk_integrity but still want stable pages * because they do their own checksumming. diff --git a/drivers/md/md.c b/drivers/md/md.c index 2f4c5d1755d8..c23423c51fb7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6151,20 +6151,7 @@ int md_run(struct mddev *mddev) if (!mddev_is_dm(mddev)) { struct request_queue *q = mddev->gendisk->queue; - bool nonrot = true; - rdev_for_each(rdev, mddev) { - if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) { - nonrot = false; - break; - } - } - if (mddev->degraded) - nonrot = false; - if (nonrot) - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - else - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q); /* Set the NOWAIT flags if all underlying devices support it */ diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 97ff993d3157..b4f62fa84586 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -387,7 +387,6 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); blk_queue_rq_timeout(mq->queue, 60 * HZ); - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 1b9f57f231e8..bf8369ce7ddf 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -375,7 +375,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) spin_lock_init(&new->queue_lock); INIT_LIST_HEAD(&new->rq_list); - blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); gd->queue = new->rq; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index c5f8451b494d..e474afa8e9f6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1518,7 +1518,6 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; - blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue); set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index aff818469c11..501cf226df01 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -546,7 +546,6 @@ static int pmem_attach_disk(struct device *dev, } pmem->virt_addr = addr; - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q); if (pmem->pfn_flags & PFN_MAP) blk_queue_flag_set(QUEUE_FLAG_DAX, q); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9fc5e36fe2e5..0d753fe71f35 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3744,7 +3744,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (ctrl->opts && ctrl->opts->data_digest) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); - blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); if (ctrl->ops->supports_pci_p2pdma && ctrl->ops->supports_pci_p2pdma(ctrl)) blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3d0e23a0a4dd..58c13304e558 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -549,7 +549,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); - blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue); /* diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 4533dd055ca8..1aa426b1dedd 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -68,7 +68,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) blk_mq_free_tag_set(&block->tag_set); return PTR_ERR(gdp); } - blk_queue_flag_set(QUEUE_FLAG_NONROT, gdp->queue); /* Initialize gendisk structure. */ gdp->major = DASD_MAJOR; diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 1d456a5a3bfb..2e2309fa9a0b 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -475,7 +475,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) goto out_tag; } rq = bdev->rq = bdev->gendisk->queue; - blk_queue_flag_set(QUEUE_FLAG_NONROT, rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq); bdev->gendisk->private_data = scmdev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d8ee4a4d4a62..a42c3c45e868 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3318,7 +3318,7 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, rcu_read_unlock(); if (rot == 1) { - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + lim->features &= ~BLK_FEAT_ROTATIONAL; blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } @@ -3646,7 +3646,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + lim.features |= BLK_FEAT_ROTATIONAL; blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); if (scsi_device_supports_vpd(sdp)) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index acdfe5122faa..988e3248cffe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -289,14 +289,16 @@ enum { /* supports passing on the FUA bit */ BLK_FEAT_FUA = (1u << 1), + + /* rotational device (hard drive or floppy) */ + BLK_FEAT_ROTATIONAL = (1u << 2), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) - + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) /* internal flags in queue_limits.flags */ enum { @@ -553,8 +555,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ -#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ @@ -589,7 +589,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ -- cgit v1.2.3 From 39a9f1c334f9f27b3b3e6d0005c10ed667268346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:42 +0200 Subject: block: move the add_random flag to queue_limits Move the add_random flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Note that this also removes code from dm to clear the flag based on the underlying devices, which can't be reached as dm devices will always start out without the flag set. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-16-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-sysfs.c | 6 +++--- drivers/block/mtip32xx/mtip32xx.c | 1 - drivers/md/dm-table.c | 18 ------------------ drivers/mmc/core/queue.c | 2 -- drivers/mtd/mtd_blkdevs.c | 3 --- drivers/s390/block/scm_blk.c | 4 ---- drivers/scsi/scsi_lib.c | 3 +-- drivers/scsi/sd.c | 11 +++-------- include/linux/blkdev.h | 5 +++-- 10 files changed, 10 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4d0e62ec88f0..6b7edb50bfd3 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -86,7 +86,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(FAIL_IO), QUEUE_FLAG_NAME(IO_STAT), QUEUE_FLAG_NAME(NOXMERGES), - QUEUE_FLAG_NAME(ADD_RANDOM), QUEUE_FLAG_NAME(SYNCHRONOUS), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 637ed3bbbfb4..9174aca3b855 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -323,7 +323,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ } QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) -QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); +QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); #undef QUEUE_SYSFS_BIT_FNS @@ -561,7 +561,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { QUEUE_RW_ENTRY(queue_rotational, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); -QUEUE_RW_ENTRY(queue_random, "add_random"); +QUEUE_RW_ENTRY(queue_add_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); #ifdef CONFIG_BLK_WBT @@ -665,7 +665,7 @@ static struct attribute *queue_attrs[] = { &queue_nomerges_entry.attr, &queue_iostats_entry.attr, &queue_stable_writes_entry.attr, - &queue_random_entry.attr, + &queue_add_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, &queue_fua_entry.attr, diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 1dbbf72659d5..c6ef0546ffc9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3485,7 +3485,6 @@ skip_create_disk: goto start_service_thread; /* Set device limits. */ - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); /* Set the capacity of the device in 512 byte sectors. */ diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c062af329709..0a3838e45aff 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1716,14 +1716,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, return false; } -static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_add_random(q); -} - static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1876,16 +1868,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); - /* - * Determine whether or not this queue's I/O timings contribute - * to the entropy pool, Only request-based targets use this. - * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not - * have it set. - */ - if (blk_queue_add_random(q) && - dm_table_any_dev_attr(t, device_is_not_random, NULL)) - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); - /* * For a zoned target, setup the zones related queue attributes * and resources necessary for zone append emulation if necessary. diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index b4f62fa84586..da00904d4a3c 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -387,8 +387,6 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); blk_queue_rq_timeout(mq->queue, 60 * HZ); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); - dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index bf8369ce7ddf..47ead84407cd 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -374,9 +374,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) /* Create the request queue */ spin_lock_init(&new->queue_lock); INIT_LIST_HEAD(&new->rq_list); - - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); - gd->queue = new->rq; if (new->readonly) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 2e2309fa9a0b..3fcfe029db1b 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -439,7 +439,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) .logical_block_size = 1 << 12, }; unsigned int devindex; - struct request_queue *rq; int len, ret; lim.max_segments = min(scmdev->nr_max_block, @@ -474,9 +473,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) ret = PTR_ERR(bdev->gendisk); goto out_tag; } - rq = bdev->rq = bdev->gendisk->queue; - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq); - bdev->gendisk->private_data = scmdev; bdev->gendisk->fops = &scm_blk_devops; bdev->gendisk->major = scm_major; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ec39acc986d6..54f771ec8cfb 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -631,8 +631,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (blk_update_request(req, error, bytes)) return true; - // XXX: - if (blk_queue_add_random(q)) + if (q->limits.features & BLK_FEAT_ADD_RANDOM) add_disk_randomness(req->q->disk); WARN_ON_ONCE(!blk_rq_is_passthrough(req) && diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a42c3c45e868..a27f1c7f1b61 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3301,7 +3301,6 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) static void sd_read_block_characteristics(struct scsi_disk *sdkp, struct queue_limits *lim) { - struct request_queue *q = sdkp->disk->queue; struct scsi_vpd *vpd; u16 rot; @@ -3317,10 +3316,8 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); - if (rot == 1) { - lim->features &= ~BLK_FEAT_ROTATIONAL; - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); - } + if (rot == 1) + lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (!sdkp->first_scan) return; @@ -3599,7 +3596,6 @@ static int sd_revalidate_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; - struct request_queue *q = sdkp->disk->queue; sector_t old_capacity = sdkp->capacity; struct queue_limits lim; unsigned char *buffer; @@ -3646,8 +3642,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ - lim.features |= BLK_FEAT_ROTATIONAL; - blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); + lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 988e3248cffe..cf1bbf566b2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -292,6 +292,9 @@ enum { /* rotational device (hard drive or floppy) */ BLK_FEAT_ROTATIONAL = (1u << 2), + + /* contributes to the random number pool */ + BLK_FEAT_ADD_RANDOM = (1u << 3), }; /* @@ -557,7 +560,6 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ @@ -591,7 +593,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) -#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From cdb2497918cc2929691408bac87b58433b45b6d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:43 +0200 Subject: block: move the io_stat flag setting to queue_limits Move the io_stat flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Simplify md and dm to set the flag unconditionally instead of avoiding setting a simple flag for cases where it already is set by other means, which is a bit pointless. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-17-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 6 +++++- block/blk-sysfs.c | 2 +- drivers/md/dm-table.c | 12 +++++++++--- drivers/md/dm.c | 13 +++---------- drivers/md/md.c | 5 ++--- drivers/nvme/host/multipath.c | 2 +- include/linux/blkdev.h | 9 +++++---- 8 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 6b7edb50bfd3..cbe99444ed1a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -84,7 +84,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(NOMERGES), QUEUE_FLAG_NAME(SAME_COMP), QUEUE_FLAG_NAME(FAIL_IO), - QUEUE_FLAG_NAME(IO_STAT), QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(SYNCHRONOUS), QUEUE_FLAG_NAME(SAME_FORCE), diff --git a/block/blk-mq.c b/block/blk-mq.c index 58b0d6c7cc34..cf67dc13f7dd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4116,7 +4116,11 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, struct request_queue *q; int ret; - q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node); + if (!lim) + lim = &default_lim; + lim->features |= BLK_FEAT_IO_STAT; + + q = blk_alloc_queue(lim, set->numa_node); if (IS_ERR(q)) return q; q->queuedata = queuedata; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9174aca3b855..6f58530fb3c0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -324,7 +324,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) -QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); +QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); #undef QUEUE_SYSFS_BIT_FNS diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0a3838e45aff..5d5431e531ae 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -579,6 +579,12 @@ int dm_split_args(int *argc, char ***argvp, char *input) return 0; } +static void dm_set_stacking_limits(struct queue_limits *limits) +{ + blk_set_stacking_limits(limits); + limits->features |= BLK_FEAT_IO_STAT; +} + /* * Impose necessary and sufficient conditions on a devices's table such * that any incoming bio which respects its logical_block_size can be @@ -617,7 +623,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t, for (i = 0; i < t->num_targets; i++) { ti = dm_table_get_target(t, i); - blk_set_stacking_limits(&ti_limits); + dm_set_stacking_limits(&ti_limits); /* combine all target devices' limits */ if (ti->type->iterate_devices) @@ -1591,7 +1597,7 @@ int dm_calculate_queue_limits(struct dm_table *t, unsigned int zone_sectors = 0; bool zoned = false; - blk_set_stacking_limits(limits); + dm_set_stacking_limits(limits); t->integrity_supported = true; for (unsigned int i = 0; i < t->num_targets; i++) { @@ -1604,7 +1610,7 @@ int dm_calculate_queue_limits(struct dm_table *t, for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); - blk_set_stacking_limits(&ti_limits); + dm_set_stacking_limits(&ti_limits); if (!ti->type->iterate_devices) { /* Set I/O hints portion of queue limits */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 13037d6a6f62..8a976cee448b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2386,22 +2386,15 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) struct table_device *td; int r; - switch (type) { - case DM_TYPE_REQUEST_BASED: + WARN_ON_ONCE(type == DM_TYPE_NONE); + + if (type == DM_TYPE_REQUEST_BASED) { md->disk->fops = &dm_rq_blk_dops; r = dm_mq_init_request_queue(md, t); if (r) { DMERR("Cannot initialize queue for request-based dm mapped device"); return r; } - break; - case DM_TYPE_BIO_BASED: - case DM_TYPE_DAX_BIO_BASED: - blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue); - break; - case DM_TYPE_NONE: - WARN_ON_ONCE(true); - break; } r = dm_calculate_queue_limits(t, &limits); diff --git a/drivers/md/md.c b/drivers/md/md.c index c23423c51fb7..8db0db8d5a27 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5787,7 +5787,8 @@ struct mddev *md_alloc(dev_t dev, char *name) int unit; int error; struct queue_limits lim = { - .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | + BLK_FEAT_IO_STAT, }; /* @@ -6152,8 +6153,6 @@ int md_run(struct mddev *mddev) if (!mddev_is_dm(mddev)) { struct request_queue *q = mddev->gendisk->queue; - blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q); - /* Set the NOWAIT flags if all underlying devices support it */ if (nowait) blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 58c13304e558..eea727cfa9e6 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -538,6 +538,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; + lim.features |= BLK_FEAT_IO_STAT; if (head->ids.csi != NVME_CSI_ZNS) lim.max_zone_append_sectors = 0; @@ -550,7 +551,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ctrl->subsys->instance, head->instance); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); - blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue); /* * This assumes all controllers that refer to a namespace either * support poll queues or not. That is not a strict guarantee, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf1bbf566b2b..5fafb2f95fd1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -295,6 +295,9 @@ enum { /* contributes to the random number pool */ BLK_FEAT_ADD_RANDOM = (1u << 3), + + /* do disk/partitions IO accounting */ + BLK_FEAT_IO_STAT = (1u << 4), }; /* @@ -558,7 +561,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ @@ -577,8 +579,7 @@ struct request_queue { #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ -#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ - (1UL << QUEUE_FLAG_SAME_COMP) | \ +#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_SAME_COMP) | \ (1UL << QUEUE_FLAG_NOWAIT)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); @@ -592,7 +593,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) -#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) +#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From 1a02f3a73f8c670eddeb44bf52a75ae7f67cfc11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:44 +0200 Subject: block: move the stable_writes flag to queue_limits Move the stable_writes flag into the queue_limits feature field so that it can be set atomically with the queue frozen. The flag is now inherited by blk_stack_limits, which greatly simplifies the code in dm, and fixed md which previously did not pass on the flag set on lower devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-18-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-sysfs.c | 29 +---------------------------- drivers/block/drbd/drbd_main.c | 5 ++--- drivers/block/rbd.c | 9 +++------ drivers/block/zram/zram_drv.c | 2 +- drivers/md/dm-table.c | 19 ------------------- drivers/md/raid5.c | 6 ++++-- drivers/mmc/core/queue.c | 5 +++-- drivers/nvme/host/core.c | 9 +++++---- drivers/nvme/host/multipath.c | 4 ---- drivers/scsi/iscsi_tcp.c | 8 ++++---- include/linux/blkdev.h | 9 ++++++--- 12 files changed, 29 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cbe99444ed1a..eb73f1d348e5 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -88,7 +88,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SYNCHRONOUS), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), - QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(DAX), QUEUE_FLAG_NAME(STATS), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6f58530fb3c0..cde525724831 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -296,37 +296,10 @@ static ssize_t queue_##_name##_store(struct request_queue *q, \ return queue_feature_store(q, page, count, _feature); \ } -#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ -static ssize_t \ -queue_##name##_show(struct request_queue *q, char *page) \ -{ \ - int bit; \ - bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \ - return queue_var_show(neg ? !bit : bit, page); \ -} \ -static ssize_t \ -queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ -{ \ - unsigned long val; \ - ssize_t ret; \ - ret = queue_var_store(&val, page, count); \ - if (ret < 0) \ - return ret; \ - if (neg) \ - val = !val; \ - \ - if (val) \ - blk_queue_flag_set(QUEUE_FLAG_##flag, q); \ - else \ - blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \ - return ret; \ -} - QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) -QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); -#undef QUEUE_SYSFS_BIT_FNS +QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); static ssize_t queue_zoned_show(struct request_queue *q, char *page) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2ef29a478075..f92673f05c7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2698,7 +2698,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig */ .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | - BLK_FEAT_ROTATIONAL, + BLK_FEAT_ROTATIONAL | + BLK_FEAT_STABLE_WRITES, }; device = minor_to_device(minor); @@ -2737,8 +2738,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); - device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) goto out_no_io_page; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec1f1c7d4275..008e850555f4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4949,7 +4949,6 @@ static const struct blk_mq_ops rbd_mq_ops = { static int rbd_init_disk(struct rbd_device *rbd_dev) { struct gendisk *disk; - struct request_queue *q; unsigned int objset_bytes = rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; struct queue_limits lim = { @@ -4979,12 +4978,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT; } + if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) + lim.features |= BLK_FEAT_STABLE_WRITES; + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; } - q = disk->queue; snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", rbd_dev->dev_id); @@ -4996,10 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->minors = RBD_MINORS_PER_MAJOR; disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); - rbd_dev->disk = disk; return 0; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index aad840fc7e18..f8f1b5b54795 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2208,6 +2208,7 @@ static int zram_add(void) #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE .max_write_zeroes_sectors = UINT_MAX, #endif + .features = BLK_FEAT_STABLE_WRITES, }; struct zram *zram; int ret, device_id; @@ -2246,7 +2247,6 @@ static int zram_add(void) /* Actual capacity set using sysfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) goto out_cleanup_disk; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5d5431e531ae..aaf379cb15d9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1819,13 +1819,6 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } -static int device_requires_stable_pages(struct dm_target *ti, - struct dm_dev *dev, sector_t start, - sector_t len, void *data) -{ - return bdev_stable_writes(dev->bdev); -} - int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1862,18 +1855,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) dax_write_cache(t->md->dax_dev, true); - /* - * Some devices don't use blk_integrity but still want stable pages - * because they do their own checksumming. - * If any underlying device requires stable pages, a table must require - * them as well. Only targets that support iterate_devices are considered: - * don't want error, zero, etc to require stable pages. - */ - if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL)) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); - else - blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); - /* * For a zoned target, setup the zones related queue attributes * and resources necessary for zone append emulation if necessary. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 675c68fa6c64..e875763d6991 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7082,12 +7082,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) err = -ENODEV; else if (new != conf->skip_copy) { struct request_queue *q = mddev->gendisk->queue; + struct queue_limits lim = queue_limits_start_update(q); conf->skip_copy = new; if (new) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); + lim.features |= BLK_FEAT_STABLE_WRITES; else - blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); + lim.features &= ~BLK_FEAT_STABLE_WRITES; + err = queue_limits_commit_update(q, &lim); } mddev_unlock_and_resume(mddev); return err ?: len; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index da00904d4a3c..d0b3ca8a11f0 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -378,13 +378,14 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, lim.max_segments = host->max_segs; } + if (mmc_host_is_spi(host) && host->use_spi_crc) + lim.features |= BLK_FEAT_STABLE_WRITES; + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); if (IS_ERR(disk)) return disk; mq->queue = disk->queue; - if (mmc_host_is_spi(host) && host->use_spi_crc) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); blk_queue_rq_timeout(mq->queue, 60 * HZ); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d753fe71f35..5ecf762d7c88 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3724,6 +3724,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) { + struct queue_limits lim = { }; struct nvme_ns *ns; struct gendisk *disk; int node = ctrl->numa_node; @@ -3732,7 +3733,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (!ns) return; - disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns); + if (ctrl->opts && ctrl->opts->data_digest) + lim.features |= BLK_FEAT_STABLE_WRITES; + + disk = blk_mq_alloc_disk(ctrl->tagset, &lim, ns); if (IS_ERR(disk)) goto out_free_ns; disk->fops = &nvme_bdev_ops; @@ -3741,9 +3745,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ns->disk = disk; ns->queue = disk->queue; - if (ctrl->opts && ctrl->opts->data_digest) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); - if (ctrl->ops->supports_pci_p2pdma && ctrl->ops->supports_pci_p2pdma(ctrl)) blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index eea727cfa9e6..173796f2ddea 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -868,10 +868,6 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) nvme_mpath_set_live(ns); } - if (test_bit(QUEUE_FLAG_STABLE_WRITES, &ns->queue->queue_flags) && - ns->head->disk) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, - ns->head->disk->queue); #ifdef CONFIG_BLK_DEV_ZONED if (blk_queue_is_zoned(ns->queue) && ns->head->disk) ns->head->disk->nr_zones = ns->disk->nr_zones; diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 60688f18fac6..c708e1059638 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1057,15 +1057,15 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param) return 0; } -static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev) +static int iscsi_sw_tcp_device_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host); struct iscsi_session *session = tcp_sw_host->session; struct iscsi_conn *conn = session->leadconn; if (conn->datadgst_en) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, - sdev->request_queue); + lim->features |= BLK_FEAT_STABLE_WRITES; return 0; } @@ -1083,7 +1083,7 @@ static const struct scsi_host_template iscsi_sw_tcp_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .dma_boundary = PAGE_SIZE - 1, - .slave_configure = iscsi_sw_tcp_slave_configure, + .device_configure = iscsi_sw_tcp_device_configure, .proc_name = "iscsi_tcp", .this_id = -1, .track_queue_depth = 1, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5fafb2f95fd1..8936eb6ba609 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,13 +298,17 @@ enum { /* do disk/partitions IO accounting */ BLK_FEAT_IO_STAT = (1u << 4), + + /* don't modify data until writeback is done */ + BLK_FEAT_STABLE_WRITES = (1u << 5), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ + BLK_FEAT_STABLE_WRITES) /* internal flags in queue_limits.flags */ enum { @@ -565,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ @@ -1323,7 +1326,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev) if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; - return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); + return q->limits.features & BLK_FEAT_STABLE_WRITES; } static inline bool blk_queue_write_cache(struct request_queue *q) -- cgit v1.2.3 From aadd5c59c910427c0464c217d5ed588ff14e2502 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:45 +0200 Subject: block: move the synchronous flag to queue_limits Move the synchronous flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-19-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - drivers/block/brd.c | 2 +- drivers/block/zram/zram_drv.c | 4 ++-- drivers/nvdimm/btt.c | 3 +-- drivers/nvdimm/pmem.c | 4 ++-- include/linux/blkdev.h | 7 ++++--- 6 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index eb73f1d348e5..957774e40b1d 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -85,7 +85,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SAME_COMP), QUEUE_FLAG_NAME(FAIL_IO), QUEUE_FLAG_NAME(NOXMERGES), - QUEUE_FLAG_NAME(SYNCHRONOUS), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), QUEUE_FLAG_NAME(POLL), diff --git a/drivers/block/brd.c b/drivers/block/brd.c index b25dc463b5e3..d77deb571dbd 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -335,6 +335,7 @@ static int brd_alloc(int i) .max_hw_discard_sectors = UINT_MAX, .max_discard_segments = 1, .discard_granularity = PAGE_SIZE, + .features = BLK_FEAT_SYNCHRONOUS, }; list_for_each_entry(brd, &brd_devices, brd_list) @@ -366,7 +367,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f8f1b5b54795..efcb8d9d274c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2208,7 +2208,8 @@ static int zram_add(void) #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE .max_write_zeroes_sectors = UINT_MAX, #endif - .features = BLK_FEAT_STABLE_WRITES, + .features = BLK_FEAT_STABLE_WRITES | + BLK_FEAT_SYNCHRONOUS, }; struct zram *zram; int ret, device_id; @@ -2246,7 +2247,6 @@ static int zram_add(void) /* Actual capacity set using sysfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); - blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) goto out_cleanup_disk; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index e474afa8e9f6..e79c06d65bb7 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1501,6 +1501,7 @@ static int btt_blk_init(struct btt *btt) .logical_block_size = btt->sector_size, .max_hw_sectors = UINT_MAX, .max_integrity_segments = 1, + .features = BLK_FEAT_SYNCHRONOUS, }; int rc; @@ -1518,8 +1519,6 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; - blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue); - set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); if (rc) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 501cf226df01..b821dcf018f6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -455,7 +455,8 @@ static int pmem_attach_disk(struct device *dev, .logical_block_size = pmem_sector_size(ndns), .physical_block_size = PAGE_SIZE, .max_hw_sectors = UINT_MAX, - .features = BLK_FEAT_WRITE_CACHE, + .features = BLK_FEAT_WRITE_CACHE | + BLK_FEAT_SYNCHRONOUS, }; int nid = dev_to_node(dev), fua; struct resource *res = &nsio->res; @@ -546,7 +547,6 @@ static int pmem_attach_disk(struct device *dev, } pmem->virt_addr = addr; - blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q); if (pmem->pfn_flags & PFN_MAP) blk_queue_flag_set(QUEUE_FLAG_DAX, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8936eb6ba609..cee7b44a1425 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -301,6 +301,9 @@ enum { /* don't modify data until writeback is done */ BLK_FEAT_STABLE_WRITES = (1u << 5), + + /* always completes in submit context */ + BLK_FEAT_SYNCHRONOUS = (1u << 6), }; /* @@ -566,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -1315,8 +1317,7 @@ static inline bool bdev_nonrot(struct block_device *bdev) static inline bool bdev_synchronous(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_SYNCHRONOUS, - &bdev_get_queue(bdev)->queue_flags); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS; } static inline bool bdev_stable_writes(struct block_device *bdev) -- cgit v1.2.3 From f76af42f8bf13d2620084f305f01691de9238fc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:46 +0200 Subject: block: move the nowait flag to queue_limits Move the nowait flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Stacking drivers are simplified in that they now can simply set the flag, and blk_stack_limits will clear it when the features is not supported by any of the underlying devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-20-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 2 +- block/blk-settings.c | 9 +++++++++ drivers/block/brd.c | 4 ++-- drivers/md/dm-table.c | 18 +++--------------- drivers/md/md.c | 18 +----------------- drivers/nvme/host/multipath.c | 3 +-- include/linux/blkdev.h | 9 +++++---- 8 files changed, 22 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 957774e40b1d..62b132e9a9ce 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -96,7 +96,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), QUEUE_FLAG_NAME(HCTX_ACTIVE), - QUEUE_FLAG_NAME(NOWAIT), QUEUE_FLAG_NAME(SQ_SCHED), QUEUE_FLAG_NAME(SKIP_TAGSET_QUIESCE), }; diff --git a/block/blk-mq.c b/block/blk-mq.c index cf67dc13f7dd..43235acc8750 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4118,7 +4118,7 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, if (!lim) lim = &default_lim; - lim->features |= BLK_FEAT_IO_STAT; + lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; q = blk_alloc_queue(lim, set->numa_node); if (IS_ERR(q)) diff --git a/block/blk-settings.c b/block/blk-settings.c index 536ee202fcdc..bf4622c19b5c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -459,6 +459,15 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->features |= (b->features & BLK_FEAT_INHERIT_MASK); + /* + * BLK_FEAT_NOWAIT needs to be supported both by the stacking driver + * and all underlying devices. The stacking driver sets the flag + * before stacking the limits, and this will clear the flag if any + * of the underlying devices does not support it. + */ + if (!(b->features & BLK_FEAT_NOWAIT)) + t->features &= ~BLK_FEAT_NOWAIT; + t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_user_sectors = min_not_zero(t->max_user_sectors, b->max_user_sectors); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index d77deb571dbd..a300645cd9d4 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -335,7 +335,8 @@ static int brd_alloc(int i) .max_hw_discard_sectors = UINT_MAX, .max_discard_segments = 1, .discard_granularity = PAGE_SIZE, - .features = BLK_FEAT_SYNCHRONOUS, + .features = BLK_FEAT_SYNCHRONOUS | + BLK_FEAT_NOWAIT, }; list_for_each_entry(brd, &brd_devices, brd_list) @@ -367,7 +368,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) goto out_cleanup_disk; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index aaf379cb15d9..84d636712c72 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -582,7 +582,7 @@ int dm_split_args(int *argc, char ***argvp, char *input) static void dm_set_stacking_limits(struct queue_limits *limits) { blk_set_stacking_limits(limits); - limits->features |= BLK_FEAT_IO_STAT; + limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; } /* @@ -1746,12 +1746,6 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t) return true; } -static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - return !bdev_nowait(dev->bdev); -} - static bool dm_table_supports_nowait(struct dm_table *t) { for (unsigned int i = 0; i < t->num_targets; i++) { @@ -1759,10 +1753,6 @@ static bool dm_table_supports_nowait(struct dm_table *t) if (!dm_target_supports_nowait(ti->type)) return false; - - if (!ti->type->iterate_devices || - ti->type->iterate_devices(ti, device_not_nowait_capable, NULL)) - return false; } return true; @@ -1824,10 +1814,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, { int r; - if (dm_table_supports_nowait(t)) - blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); - else - blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); + if (!dm_table_supports_nowait(t)) + limits->features &= ~BLK_FEAT_NOWAIT; if (!dm_table_supports_discards(t)) { limits->max_hw_discard_sectors = 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index 8db0db8d5a27..f1c7d4f281c5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5788,7 +5788,7 @@ struct mddev *md_alloc(dev_t dev, char *name) int error; struct queue_limits lim = { .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | - BLK_FEAT_IO_STAT, + BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT, }; /* @@ -6150,13 +6150,6 @@ int md_run(struct mddev *mddev) } } - if (!mddev_is_dm(mddev)) { - struct request_queue *q = mddev->gendisk->queue; - - /* Set the NOWAIT flags if all underlying devices support it */ - if (nowait) - blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); - } if (pers->sync_request) { if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &md_redundancy_group)) @@ -7115,15 +7108,6 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); if (!mddev->thread) md_update_sb(mddev, 1); - /* - * If the new disk does not support REQ_NOWAIT, - * disable on the whole MD. - */ - if (!bdev_nowait(rdev->bdev)) { - pr_info("%s: Disabling nowait because %pg does not support nowait\n", - mdname(mddev), rdev->bdev); - blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue); - } /* * Kick recovery, maybe this spare has to be added to the * array immediately. diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 173796f2ddea..61a162c9cf4e 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -538,7 +538,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; - lim.features |= BLK_FEAT_IO_STAT; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; if (head->ids.csi != NVME_CSI_ZNS) lim.max_zone_append_sectors = 0; @@ -550,7 +550,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); - blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); /* * This assumes all controllers that refer to a namespace either * support poll queues or not. That is not a strict guarantee, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cee7b44a1425..f3d4519d609d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -304,6 +304,9 @@ enum { /* always completes in submit context */ BLK_FEAT_SYNCHRONOUS = (1u << 6), + + /* supports REQ_NOWAIT */ + BLK_FEAT_NOWAIT = (1u << 7), }; /* @@ -580,12 +583,10 @@ struct request_queue { #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ -#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ -#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_SAME_COMP) | \ - (1UL << QUEUE_FLAG_NOWAIT)) +#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); @@ -1348,7 +1349,7 @@ static inline bool bdev_fua(struct block_device *bdev) static inline bool bdev_nowait(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT; } static inline bool bdev_is_zoned(struct block_device *bdev) -- cgit v1.2.3 From f467fee48da4500786e145489787b37adae317c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:47 +0200 Subject: block: move the dax flag to queue_limits Move the dax flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-21-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - drivers/md/dm-table.c | 4 ++-- drivers/nvdimm/pmem.c | 7 ++----- drivers/s390/block/dcssblk.c | 2 +- include/linux/blkdev.h | 6 ++++-- 5 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 62b132e9a9ce..f4fa820251ce 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -88,7 +88,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), QUEUE_FLAG_NAME(POLL), - QUEUE_FLAG_NAME(DAX), QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(QUIESCED), diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 84d636712c72..e44697037e86 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1834,11 +1834,11 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; if (dm_table_supports_dax(t, device_not_dax_capable)) { - blk_queue_flag_set(QUEUE_FLAG_DAX, q); + limits->features |= BLK_FEAT_DAX; if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) set_dax_synchronous(t->md->dax_dev); } else - blk_queue_flag_clear(QUEUE_FLAG_DAX, q); + limits->features &= ~BLK_FEAT_DAX; if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) dax_write_cache(t->md->dax_dev, true); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b821dcf018f6..1dd74c969d5a 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -465,7 +465,6 @@ static int pmem_attach_disk(struct device *dev, struct dax_device *dax_dev; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; - struct request_queue *q; struct gendisk *disk; void *addr; int rc; @@ -499,6 +498,8 @@ static int pmem_attach_disk(struct device *dev, } if (fua) lim.features |= BLK_FEAT_FUA; + if (is_nd_pfn(dev)) + lim.features |= BLK_FEAT_DAX; if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { @@ -509,7 +510,6 @@ static int pmem_attach_disk(struct device *dev, disk = blk_alloc_disk(&lim, nid); if (IS_ERR(disk)) return PTR_ERR(disk); - q = disk->queue; pmem->disk = disk; pmem->pgmap.owner = pmem; @@ -547,9 +547,6 @@ static int pmem_attach_disk(struct device *dev, } pmem->virt_addr = addr; - if (pmem->pfn_flags & PFN_MAP) - blk_queue_flag_set(QUEUE_FLAG_DAX, q); - disk->fops = &pmem_fops; disk->private_data = pmem; nvdimm_namespace_disk_name(ndns, disk->disk_name); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6d1689a2717e..d5a5d11ae0dc 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -548,6 +548,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char { struct queue_limits lim = { .logical_block_size = 4096, + .features = BLK_FEAT_DAX, }; int rc, i, j, num_of_segments; struct dcssblk_dev_info *dev_info; @@ -643,7 +644,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->fops = &dcssblk_devops; dev_info->gd->private_data = dev_info; dev_info->gd->flags |= GENHD_FL_NO_PART; - blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue); seg_byte_size = (dev_info->end - dev_info->start + 1); set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3d4519d609d..7022e06a3dd9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -307,6 +307,9 @@ enum { /* supports REQ_NOWAIT */ BLK_FEAT_NOWAIT = (1u << 7), + + /* supports DAX */ + BLK_FEAT_DAX = (1u << 8), }; /* @@ -575,7 +578,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ @@ -602,7 +604,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) -#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) +#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From 8023e144f9d6e35f8786937e2f0c2fea0aba6dbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:48 +0200 Subject: block: move the poll flag to queue_limits Move the poll flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Stacking drivers are simplified in that they now can simply set the flag, and blk_stack_limits will clear it when the features is not supported by any of the underlying devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-22-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++-- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 31 ++++++++++++++----------- block/blk-settings.c | 10 ++++---- block/blk-sysfs.c | 4 ++-- drivers/md/dm-table.c | 54 +++++++++++-------------------------------- drivers/nvme/host/multipath.c | 12 +--------- include/linux/blkdev.h | 4 +++- 8 files changed, 45 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 2b45a4df9a1a..8d9fbd353fc7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -791,7 +791,7 @@ void submit_bio_noacct(struct bio *bio) } } - if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + if (!(q->limits.features & BLK_FEAT_POLL)) bio_clear_polled(bio); switch (bio_op(bio)) { @@ -915,8 +915,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) return 0; q = bdev_get_queue(bdev); - if (cookie == BLK_QC_T_NONE || - !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL)) return 0; blk_flush_plug(current->plug, false); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f4fa820251ce..3a2152791384 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -87,7 +87,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(INIT_DONE), - QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(QUIESCED), diff --git a/block/blk-mq.c b/block/blk-mq.c index 43235acc8750..e2b9710ddc5a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4109,6 +4109,12 @@ void blk_mq_release(struct request_queue *q) blk_mq_sysfs_deinit(q); } +static bool blk_mq_can_poll(struct blk_mq_tag_set *set) +{ + return set->nr_maps > HCTX_TYPE_POLL && + set->map[HCTX_TYPE_POLL].nr_queues; +} + struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, struct queue_limits *lim, void *queuedata) { @@ -4119,6 +4125,8 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, if (!lim) lim = &default_lim; lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; + if (blk_mq_can_poll(set)) + lim->features |= BLK_FEAT_POLL; q = blk_alloc_queue(lim, set->numa_node); if (IS_ERR(q)) @@ -4273,17 +4281,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, mutex_unlock(&q->sysfs_lock); } -static void blk_mq_update_poll_flag(struct request_queue *q) -{ - struct blk_mq_tag_set *set = q->tag_set; - - if (set->nr_maps > HCTX_TYPE_POLL && - set->map[HCTX_TYPE_POLL].nr_queues) - blk_queue_flag_set(QUEUE_FLAG_POLL, q); - else - blk_queue_flag_clear(QUEUE_FLAG_POLL, q); -} - int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q) { @@ -4311,7 +4308,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->tag_set = set; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; - blk_mq_update_poll_flag(q); INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); INIT_LIST_HEAD(&q->flush_list); @@ -4798,8 +4794,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, fallback: blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { + struct queue_limits lim; + blk_mq_realloc_hw_ctxs(set, q); - blk_mq_update_poll_flag(q); + if (q->nr_hw_queues != set->nr_hw_queues) { int i = prev_nr_hw_queues; @@ -4811,6 +4809,13 @@ fallback: set->nr_hw_queues = prev_nr_hw_queues; goto fallback; } + lim = queue_limits_start_update(q); + if (blk_mq_can_poll(set)) + lim.features |= BLK_FEAT_POLL; + else + lim.features &= ~BLK_FEAT_POLL; + if (queue_limits_commit_update(q, &lim) < 0) + pr_warn("updating the poll flag failed\n"); blk_mq_map_swqueue(q); } diff --git a/block/blk-settings.c b/block/blk-settings.c index bf4622c19b5c..026ba68d8298 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -460,13 +460,15 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->features |= (b->features & BLK_FEAT_INHERIT_MASK); /* - * BLK_FEAT_NOWAIT needs to be supported both by the stacking driver - * and all underlying devices. The stacking driver sets the flag - * before stacking the limits, and this will clear the flag if any - * of the underlying devices does not support it. + * BLK_FEAT_NOWAIT and BLK_FEAT_POLL need to be supported both by the + * stacking driver and all underlying devices. The stacking driver sets + * the flags before stacking the limits, and this will clear the flags + * if any of the underlying devices does not support it. */ if (!(b->features & BLK_FEAT_NOWAIT)) t->features &= ~BLK_FEAT_NOWAIT; + if (!(b->features & BLK_FEAT_POLL)) + t->features &= ~BLK_FEAT_POLL; t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_user_sectors = min_not_zero(t->max_user_sectors, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cde525724831..da4e96d686f9 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -394,13 +394,13 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, static ssize_t queue_poll_show(struct request_queue *q, char *page) { - return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page); + return queue_var_show(q->limits.features & BLK_FEAT_POLL, page); } static ssize_t queue_poll_store(struct request_queue *q, const char *page, size_t count) { - if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + if (!(q->limits.features & BLK_FEAT_POLL)) return -EINVAL; pr_info_ratelimited("writes to the poll attribute are ignored.\n"); pr_info_ratelimited("please use driver specific parameters instead.\n"); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e44697037e86..ca1f136575cf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -582,7 +582,7 @@ int dm_split_args(int *argc, char ***argvp, char *input) static void dm_set_stacking_limits(struct queue_limits *limits) { blk_set_stacking_limits(limits); - limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; + limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; } /* @@ -1024,14 +1024,13 @@ bool dm_table_request_based(struct dm_table *t) return __table_type_request_based(dm_table_get_type(t)); } -static bool dm_table_supports_poll(struct dm_table *t); - static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) { enum dm_queue_mode type = dm_table_get_type(t); unsigned int per_io_data_size = 0, front_pad, io_front_pad; unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; + unsigned int bioset_flags = 0; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1048,6 +1047,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * goto init_bs; } + if (md->queue->limits.features & BLK_FEAT_POLL) + bioset_flags |= BIOSET_PERCPU_CACHE; + for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1060,8 +1062,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET; - if (bioset_init(&pools->io_bs, pool_size, io_front_pad, - dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0)) + if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags)) goto out_free_pools; if (t->integrity_supported && bioset_integrity_create(&pools->io_bs, pool_size)) @@ -1404,14 +1405,6 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } -static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags); -} - /* * type->iterate_devices() should be called when the sanity check needs to * iterate and check all underlying data devices. iterate_devices() will @@ -1459,19 +1452,6 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev, return 0; } -static bool dm_table_supports_poll(struct dm_table *t) -{ - for (unsigned int i = 0; i < t->num_targets; i++) { - struct dm_target *ti = dm_table_get_target(t, i); - - if (!ti->type->iterate_devices || - ti->type->iterate_devices(ti, device_not_poll_capable, NULL)) - return false; - } - - return true; -} - /* * Check whether a table has no data devices attached using each * target's iterate_devices method. @@ -1817,6 +1797,13 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_nowait(t)) limits->features &= ~BLK_FEAT_NOWAIT; + /* + * The current polling impementation does not support request based + * stacking. + */ + if (!__table_type_bio_based(t->type)) + limits->features &= ~BLK_FEAT_POLL; + if (!dm_table_supports_discards(t)) { limits->max_hw_discard_sectors = 0; limits->discard_granularity = 0; @@ -1858,21 +1845,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, return r; dm_update_crypto_profile(q, t); - - /* - * Check for request-based device is left to - * dm_mq_init_request_queue()->blk_mq_init_allocated_queue(). - * - * For bio-based device, only set QUEUE_FLAG_POLL when all - * underlying devices supporting polling. - */ - if (__table_type_bio_based(t->type)) { - if (dm_table_supports_poll(t)) - blk_queue_flag_set(QUEUE_FLAG_POLL, q); - else - blk_queue_flag_clear(QUEUE_FLAG_POLL, q); - } - return 0; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 61a162c9cf4e..4933194d00e5 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -538,7 +538,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; - lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; if (head->ids.csi != NVME_CSI_ZNS) lim.max_zone_append_sectors = 0; @@ -549,16 +549,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) head->disk->private_data = head; sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); - - /* - * This assumes all controllers that refer to a namespace either - * support poll queues or not. That is not a strict guarantee, - * but if the assumption is wrong the effect is only suboptimal - * performance but not correctness problem. - */ - if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL && - ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) - blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7022e06a3dd9..cd27b66cbacc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -310,6 +310,9 @@ enum { /* supports DAX */ BLK_FEAT_DAX = (1u << 8), + + /* supports I/O polling */ + BLK_FEAT_POLL = (1u << 9), }; /* @@ -577,7 +580,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -- cgit v1.2.3 From b1fc937a55f5735b98d9dceae5bb6ba262501f56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:49 +0200 Subject: block: move the zoned flag into the features field Move the zoned flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-23-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 5 ++--- drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 5 +++-- drivers/md/dm-table.c | 11 ++++++----- drivers/md/dm-zone.c | 2 +- drivers/md/dm-zoned-target.c | 2 +- drivers/nvme/host/zns.c | 2 +- drivers/scsi/sd_zbc.c | 2 +- include/linux/blkdev.h | 9 ++++++--- 10 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index 026ba68d8298..96e07f24bd9a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -68,7 +68,7 @@ static void blk_apply_bdi_limits(struct backing_dev_info *bdi, static int blk_validate_zoned_limits(struct queue_limits *lim) { - if (!lim->zoned) { + if (!(lim->features & BLK_FEAT_ZONED)) { if (WARN_ON_ONCE(lim->max_open_zones) || WARN_ON_ONCE(lim->max_active_zones) || WARN_ON_ONCE(lim->zone_write_granularity) || @@ -602,8 +602,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_secure_erase_sectors); t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); - t->zoned = max(t->zoned, b->zoned); - if (!t->zoned) { + if (!(t->features & BLK_FEAT_ZONED)) { t->zone_write_granularity = 0; t->max_zone_append_sectors = 0; } diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index f118d304f310..ca8e739e76b9 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -158,7 +158,7 @@ int null_init_zoned_dev(struct nullb_device *dev, sector += dev->zone_size_sects; } - lim->zoned = true; + lim->features |= BLK_FEAT_ZONED; lim->chunk_sectors = dev->zone_size_sects; lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 4fcde0999358..69c16018cbb1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2196,7 +2196,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return -EOPNOTSUPP; - lim.zoned = true; + lim.features |= BLK_FEAT_ZONED; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; lim.max_zone_append_sectors = p->max_zone_append_sectors; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 13a2f24f1766..cea45b296f8b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - lim->zoned = true; + lim->features |= BLK_FEAT_ZONED; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); @@ -1546,7 +1546,8 @@ static int virtblk_probe(struct virtio_device *vdev) * All steps that follow use the VQs therefore they need to be * placed after the virtio_device_ready() call above. */ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (lim.features & BLK_FEAT_ZONED)) { blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); err = blk_revalidate_disk_zones(vblk->disk); if (err) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ca1f136575cf..df6313c3fe6b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1605,12 +1605,12 @@ int dm_calculate_queue_limits(struct dm_table *t, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); - if (!zoned && ti_limits.zoned) { + if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) { /* * After stacking all limits, validate all devices * in table support this zoned model and zone sectors. */ - zoned = ti_limits.zoned; + zoned = (ti_limits.features & BLK_FEAT_ZONED); zone_sectors = ti_limits.chunk_sectors; } @@ -1658,12 +1658,12 @@ combine_limits: * zoned model on host-managed zoned block devices. * BUT... */ - if (limits->zoned) { + if (limits->features & BLK_FEAT_ZONED) { /* * ...IF the above limits stacking determined a zoned model * validate that all of the table's devices conform to it. */ - zoned = limits->zoned; + zoned = limits->features & BLK_FEAT_ZONED; zone_sectors = limits->chunk_sectors; } if (validate_hardware_zoned(t, zoned, zone_sectors)) @@ -1834,7 +1834,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * For a zoned target, setup the zones related queue attributes * and resources necessary for zone append emulation if necessary. */ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) { + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (limits->features & limits->features & BLK_FEAT_ZONED)) { r = dm_set_zones_restrictions(t, q, limits); if (r) return r; diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 5d66d916730e..88d313229b43 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -263,7 +263,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, if (nr_conv_zones >= ret) { lim->max_open_zones = 0; lim->max_active_zones = 0; - lim->zoned = false; + lim->features &= ~BLK_FEAT_ZONED; clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); disk->nr_zones = 0; return 0; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 12236e6f46f3..cd0ee144973f 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1009,7 +1009,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->max_sectors = chunk_sectors; /* We are exposing a drive-managed zoned block device */ - limits->zoned = false; + limits->features &= ~BLK_FEAT_ZONED; } /* diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 77aa0f440a6d..06f2417aa50d 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,7 +108,7 @@ free_data: void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, struct nvme_zone_info *zi) { - lim->zoned = 1; + lim->features |= BLK_FEAT_ZONED; lim->max_open_zones = zi->max_open_zones; lim->max_active_zones = zi->max_active_zones; lim->max_zone_append_sectors = ns->ctrl->max_zone_append; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 360ec9804995..d3f84665946e 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -601,7 +601,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; - lim->zoned = true; + lim->features |= BLK_FEAT_ZONED; /* * Per ZBC and ZAC specifications, writes in sequential write required diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd27b66cbacc..bdc30c1fb1b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -313,6 +313,9 @@ enum { /* supports I/O polling */ BLK_FEAT_POLL = (1u << 9), + + /* is a zoned device */ + BLK_FEAT_ZONED = (1u << 10), }; /* @@ -320,7 +323,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) /* internal flags in queue_limits.flags */ enum { @@ -372,7 +375,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - bool zoned; unsigned int max_open_zones; unsigned int max_active_zones; @@ -654,7 +656,8 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) static inline bool blk_queue_is_zoned(struct request_queue *q) { - return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (q->limits.features & BLK_FEAT_ZONED); } #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From a52758a39768f441e468a41da6c15a59d6d6011a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:50 +0200 Subject: block: move the zone_resetall flag to queue_limits Move the zone_resetall flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-24-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - drivers/block/null_blk/zoned.c | 3 +-- drivers/block/ublk_drv.c | 4 +--- drivers/block/virtio_blk.c | 3 +-- drivers/nvme/host/zns.c | 3 +-- drivers/scsi/sd_zbc.c | 5 +---- include/linux/blkdev.h | 6 ++++-- 7 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3a2152791384..f2fd72f4414a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -91,7 +91,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(QUIESCED), QUEUE_FLAG_NAME(PCI_P2PDMA), - QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(SQ_SCHED), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index ca8e739e76b9..b42c00f13132 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -158,7 +158,7 @@ int null_init_zoned_dev(struct nullb_device *dev, sector += dev->zone_size_sects; } - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; lim->chunk_sectors = dev->zone_size_sects; lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; @@ -171,7 +171,6 @@ int null_register_zoned_dev(struct nullb *nullb) struct request_queue *q = nullb->q; struct gendisk *disk = nullb->disk; - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); disk->nr_zones = bdev_nr_zones(disk->part0); pr_info("%s: using %s zone append\n", diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 69c16018cbb1..4fdff13fc23b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -248,8 +248,6 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); - ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); } @@ -2196,7 +2194,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return -EOPNOTSUPP; - lim.features |= BLK_FEAT_ZONED; + lim.features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; lim.max_zone_append_sectors = p->max_zone_append_sectors; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index cea45b296f8b..6c64a67ab9c9 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); @@ -1548,7 +1548,6 @@ static int virtblk_probe(struct virtio_device *vdev) */ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (lim.features & BLK_FEAT_ZONED)) { - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); err = blk_revalidate_disk_zones(vblk->disk); if (err) goto out_cleanup_disk; diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 06f2417aa50d..99bb89c2495a 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -108,13 +108,12 @@ free_data: void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, struct nvme_zone_info *zi) { - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; lim->max_open_zones = zi->max_open_zones; lim->max_active_zones = zi->max_active_zones; lim->max_zone_append_sectors = ns->ctrl->max_zone_append; lim->chunk_sectors = ns->head->zsze = nvme_lba_to_sect(ns->head, zi->zone_size); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); } static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index d3f84665946e..f7067afac79c 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -592,8 +592,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, u8 buf[SD_BUF_SIZE]) { - struct gendisk *disk = sdkp->disk; - struct request_queue *q = disk->queue; unsigned int nr_zones; u32 zone_blocks = 0; int ret; @@ -601,7 +599,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; - lim->features |= BLK_FEAT_ZONED; + lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL; /* * Per ZBC and ZAC specifications, writes in sequential write required @@ -630,7 +628,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, sdkp->early_zone_info.zone_blocks = zone_blocks; /* The drive satisfies the kernel restrictions: set it up */ - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); if (sdkp->zones_max_open == U32_MAX) lim->max_open_zones = 0; else diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdc30c1fb1b5..1077cb8d8fd8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -316,6 +316,9 @@ enum { /* is a zoned device */ BLK_FEAT_ZONED = (1u << 10), + + /* supports Zone Reset All */ + BLK_FEAT_ZONE_RESETALL = (1u << 11), }; /* @@ -586,7 +589,6 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ -#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -607,7 +609,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ - test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) -- cgit v1.2.3 From 9c1e42e3c876c66796eda23e79836a4d92613a61 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:51 +0200 Subject: block: move the pci_p2pdma flag to queue_limits Move the pci_p2pdma flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-25-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - drivers/nvme/host/core.c | 8 +++----- include/linux/blkdev.h | 7 ++++--- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f2fd72f4414a..8b5a68861c11 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -90,7 +90,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(QUIESCED), - QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(SQ_SCHED), diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5ecf762d7c88..31e752e8d632 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3735,6 +3735,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (ctrl->opts && ctrl->opts->data_digest) lim.features |= BLK_FEAT_STABLE_WRITES; + if (ctrl->ops->supports_pci_p2pdma && + ctrl->ops->supports_pci_p2pdma(ctrl)) + lim.features |= BLK_FEAT_PCI_P2PDMA; disk = blk_mq_alloc_disk(ctrl->tagset, &lim, ns); if (IS_ERR(disk)) @@ -3744,11 +3747,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ns->disk = disk; ns->queue = disk->queue; - - if (ctrl->ops->supports_pci_p2pdma && - ctrl->ops->supports_pci_p2pdma(ctrl)) - blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); - ns->ctrl = ctrl; kref_init(&ns->kref); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1077cb8d8fd8..ab0f7dfba556 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -319,6 +319,9 @@ enum { /* supports Zone Reset All */ BLK_FEAT_ZONE_RESETALL = (1u << 11), + + /* supports PCI(e) p2p requests */ + BLK_FEAT_PCI_P2PDMA = (1u << 12), }; /* @@ -588,7 +591,6 @@ struct request_queue { #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -611,8 +613,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_zone_resetall(q) \ ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) -#define blk_queue_pci_p2pdma(q) \ - test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) +#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME #define blk_queue_rq_alloc_time(q) \ test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) -- cgit v1.2.3 From 8c8f5c85b20d0a7dc0ab9b2a17318130d69ceb5a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:52 +0200 Subject: block: move the skip_tagset_quiesce flag to queue_limits Move the skip_tagset_quiesce flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-26-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - drivers/nvme/host/core.c | 8 +++++--- include/linux/blkdev.h | 6 ++++-- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 8b5a68861c11..344f9e503bdb 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -93,7 +93,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(RQ_ALLOC_TIME), QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(SQ_SCHED), - QUEUE_FLAG_NAME(SKIP_TAGSET_QUIESCE), }; #undef QUEUE_FLAG_NAME diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 31e752e8d632..bf410d10b120 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4489,13 +4489,15 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, return ret; if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL); + struct queue_limits lim = { + .features = BLK_FEAT_SKIP_TAGSET_QUIESCE, + }; + + ctrl->connect_q = blk_mq_alloc_queue(set, &lim, NULL); if (IS_ERR(ctrl->connect_q)) { ret = PTR_ERR(ctrl->connect_q); goto out_free_tag_set; } - blk_queue_flag_set(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, - ctrl->connect_q); } ctrl->tagset = set; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ab0f7dfba556..2c433ebf6f20 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,6 +322,9 @@ enum { /* supports PCI(e) p2p requests */ BLK_FEAT_PCI_P2PDMA = (1u << 12), + + /* skip this queue in blk_mq_(un)quiesce_tagset */ + BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), }; /* @@ -594,7 +597,6 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ -#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ #define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) @@ -629,7 +631,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ - test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -- cgit v1.2.3 From 339d3948c07b4aa2940aeb874294a7d6782cec16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:53 +0200 Subject: block: move the bounce flag into the features field Move the bounce flag into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-27-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 - block/blk.h | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 6 ++++-- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index 96e07f24bd9a..d0e9096f93ca 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -479,7 +479,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_write_zeroes_sectors); t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t), queue_limits_max_zone_append_sectors(b)); - t->bounce = max(t->bounce, b->bounce); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); diff --git a/block/blk.h b/block/blk.h index 79e8d5d4fe0c..fa32f7fad5d7 100644 --- a/block/blk.h +++ b/block/blk.h @@ -394,7 +394,7 @@ struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q); static inline bool blk_queue_may_bounce(struct request_queue *q) { return IS_ENABLED(CONFIG_BOUNCE) && - q->limits.bounce == BLK_BOUNCE_HIGH && + (q->limits.features & BLK_FEAT_BOUNCE_HIGH) && max_low_pfn >= max_pfn; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 54f771ec8cfb..e2f7bfb2b9e4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1986,7 +1986,7 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) shost->dma_alignment, dma_get_cache_alignment() - 1); if (shost->no_highmem) - lim->bounce = BLK_BOUNCE_HIGH; + lim->features |= BLK_FEAT_BOUNCE_HIGH; dma_set_seg_boundary(dev, shost->dma_boundary); dma_set_max_seg_size(dev, shost->max_segment_size); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c433ebf6f20..e96ba7b97288 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -325,6 +325,9 @@ enum { /* skip this queue in blk_mq_(un)quiesce_tagset */ BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), + + /* bounce all highmem pages */ + BLK_FEAT_BOUNCE_HIGH = (1u << 14), }; /* @@ -332,7 +335,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH) /* internal flags in queue_limits.flags */ enum { @@ -352,7 +355,6 @@ enum blk_bounce { struct queue_limits { unsigned int features; unsigned int flags; - enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3