From 0f00b82e5413571ed225ddbccad6882d7ea60bc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Mar 2021 08:45:50 +0100 Subject: block: remove the revalidate_disk method No implementations left. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210308074550.422714-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc6bc8383b43..b4241f73f7a8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1870,7 +1870,6 @@ struct block_device_operations { unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); void (*unlock_native_capacity) (struct gendisk *); - int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ -- cgit v1.2.3 From ce288e0535688cc3475a3c3d4d96624514c3550c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 Mar 2021 09:29:59 +0200 Subject: block: remove BLK_BOUNCE_ISA support Remove the BLK_BOUNCE_ISA support now that all users are gone. Signed-off-by: Christoph Hellwig Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210331073001.46776-7-hch@lst.de Signed-off-by: Jens Axboe --- block/bio-integrity.c | 3 +- block/blk-map.c | 4 +- block/blk-settings.c | 11 ---- block/blk.h | 5 -- block/bounce.c | 124 ++++++++++------------------------------------ block/scsi_ioctl.c | 2 +- drivers/ata/libata-scsi.c | 3 +- include/linux/blkdev.h | 7 --- mm/Kconfig | 9 ++-- 9 files changed, 35 insertions(+), 133 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index dfa652122a2d..4b4eb8964a6f 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -204,7 +204,6 @@ bool bio_integrity_prep(struct bio *bio) { struct bio_integrity_payload *bip; struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); - struct request_queue *q = bio->bi_bdev->bd_disk->queue; void *buf; unsigned long start, end; unsigned int len, nr_pages; @@ -238,7 +237,7 @@ bool bio_integrity_prep(struct bio *bio) /* Allocate kernel buffer for protection data */ len = intervals * bi->tuple_size; - buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); + buf = kmalloc(len, GFP_NOIO); status = BLK_STS_RESOURCE; if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); diff --git a/block/blk-map.c b/block/blk-map.c index 1ffef782fcf2..b62b52dcb61d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -181,7 +181,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, i++; } else { - page = alloc_page(rq->q->bounce_gfp | gfp_mask); + page = alloc_page(GFP_NOIO | gfp_mask); if (!page) { ret = -ENOMEM; goto cleanup; @@ -486,7 +486,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | gfp_mask); + page = alloc_page(GFP_NOIO | gfp_mask); if (!page) goto cleanup; diff --git a/block/blk-settings.c b/block/blk-settings.c index b4aa2f37fab6..f9937dd2810e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -103,28 +103,17 @@ EXPORT_SYMBOL(blk_set_stacking_limits); void blk_queue_bounce_limit(struct request_queue *q, u64 max_addr) { unsigned long b_pfn = max_addr >> PAGE_SHIFT; - int dma = 0; - q->bounce_gfp = GFP_NOIO; #if BITS_PER_LONG == 64 /* * Assume anything <= 4GB can be handled by IOMMU. Actually * some IOMMUs can handle everything, but I don't know of a * way to test this here. */ - if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) - dma = 1; q->limits.bounce_pfn = max(max_low_pfn, b_pfn); #else - if (b_pfn < blk_max_low_pfn) - dma = 1; q->limits.bounce_pfn = b_pfn; #endif - if (dma) { - init_emergency_isa_pool(); - q->bounce_gfp = GFP_NOIO | GFP_DMA; - q->limits.bounce_pfn = b_pfn; - } } EXPORT_SYMBOL(blk_queue_bounce_limit); diff --git a/block/blk.h b/block/blk.h index 3b53e44b967e..895c9f4a5182 100644 --- a/block/blk.h +++ b/block/blk.h @@ -312,13 +312,8 @@ static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } #endif #ifdef CONFIG_BOUNCE -extern int init_emergency_isa_pool(void); extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); #else -static inline int init_emergency_isa_pool(void) -{ - return 0; -} static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) { } diff --git a/block/bounce.c b/block/bounce.c index 6c441f4f1cd4..debd5b0bd318 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -29,7 +29,7 @@ #define ISA_POOL_SIZE 16 static struct bio_set bounce_bio_set, bounce_bio_split; -static mempool_t page_pool, isa_page_pool; +static mempool_t page_pool; static void init_bounce_bioset(void) { @@ -89,41 +89,6 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) #endif /* CONFIG_HIGHMEM */ -/* - * allocate pages in the DMA region for the ISA pool - */ -static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) -{ - return mempool_alloc_pages(gfp_mask | GFP_DMA, data); -} - -static DEFINE_MUTEX(isa_mutex); - -/* - * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA - * as the max address, so check if the pool has already been created. - */ -int init_emergency_isa_pool(void) -{ - int ret; - - mutex_lock(&isa_mutex); - - if (mempool_initialized(&isa_page_pool)) { - mutex_unlock(&isa_mutex); - return 0; - } - - ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa, - mempool_free_pages, (void *) 0); - BUG_ON(ret); - - pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE); - init_bounce_bioset(); - mutex_unlock(&isa_mutex); - return 0; -} - /* * Simple bounce buffer support for highmem pages. Depending on the * queue gfp mask set, *to may or may not be a highmem page. kmap it @@ -159,7 +124,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from) } } -static void bounce_end_io(struct bio *bio, mempool_t *pool) +static void bounce_end_io(struct bio *bio) { struct bio *bio_orig = bio->bi_private; struct bio_vec *bvec, orig_vec; @@ -173,7 +138,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) orig_vec = bio_iter_iovec(bio_orig, orig_iter); if (bvec->bv_page != orig_vec.bv_page) { dec_zone_page_state(bvec->bv_page, NR_BOUNCE); - mempool_free(bvec->bv_page, pool); + mempool_free(bvec->bv_page, &page_pool); } bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len); } @@ -185,33 +150,17 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) static void bounce_end_io_write(struct bio *bio) { - bounce_end_io(bio, &page_pool); -} - -static void bounce_end_io_write_isa(struct bio *bio) -{ - - bounce_end_io(bio, &isa_page_pool); + bounce_end_io(bio); } -static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) +static void bounce_end_io_read(struct bio *bio) { struct bio *bio_orig = bio->bi_private; if (!bio->bi_status) copy_to_high_bio_irq(bio_orig, bio); - bounce_end_io(bio, pool); -} - -static void bounce_end_io_read(struct bio *bio) -{ - __bounce_end_io_read(bio, &page_pool); -} - -static void bounce_end_io_read_isa(struct bio *bio) -{ - __bounce_end_io_read(bio, &isa_page_pool); + bounce_end_io(bio); } static struct bio *bounce_clone_bio(struct bio *bio_src) @@ -287,8 +236,8 @@ err_put: return NULL; } -static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, - mempool_t *pool) + +void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { struct bio *bio; int rw = bio_data_dir(*bio_orig); @@ -298,6 +247,20 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, bool bounce = false; int sectors = 0; + /* + * Data-less bio, nothing to bounce + */ + if (!bio_has_data(*bio_orig)) + return; + + /* + * Just check if the bounce pfn is equal to or bigger than the highest + * pfn in the system -- in that case, don't waste time iterating over + * bio segments + */ + if (q->limits.bounce_pfn >= blk_max_pfn) + return; + bio_for_each_segment(from, *bio_orig, iter) { if (i++ < BIO_MAX_VECS) sectors += from.bv_len >> 9; @@ -327,7 +290,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (page_to_pfn(page) <= q->limits.bounce_pfn) continue; - to->bv_page = mempool_alloc(pool, q->bounce_gfp); + to->bv_page = mempool_alloc(&page_pool, GFP_NOIO); inc_zone_page_state(to->bv_page, NR_BOUNCE); if (rw == WRITE) { @@ -346,46 +309,11 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, bio->bi_flags |= (1 << BIO_BOUNCED); - if (pool == &page_pool) { + if (rw == READ) + bio->bi_end_io = bounce_end_io_read; + else bio->bi_end_io = bounce_end_io_write; - if (rw == READ) - bio->bi_end_io = bounce_end_io_read; - } else { - bio->bi_end_io = bounce_end_io_write_isa; - if (rw == READ) - bio->bi_end_io = bounce_end_io_read_isa; - } bio->bi_private = *bio_orig; *bio_orig = bio; } - -void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) -{ - mempool_t *pool; - - /* - * Data-less bio, nothing to bounce - */ - if (!bio_has_data(*bio_orig)) - return; - - /* - * for non-isa bounce case, just check if the bounce pfn is equal - * to or bigger than the highest pfn in the system -- in that case, - * don't waste time iterating over bio segments - */ - if (!(q->bounce_gfp & GFP_DMA)) { - if (q->limits.bounce_pfn >= blk_max_pfn) - return; - pool = &page_pool; - } else { - BUG_ON(!mempool_initialized(&isa_page_pool)); - pool = &isa_page_pool; - } - - /* - * slow path - */ - __blk_queue_bounce(q, bio_orig, pool); -} diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 6599bac0a78c..1048b0925567 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -431,7 +431,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, bytes = max(in_len, out_len); if (bytes) { - buffer = kzalloc(bytes, q->bounce_gfp | GFP_USER| __GFP_NOWARN); + buffer = kzalloc(bytes, GFP_NOIO | GFP_USER | __GFP_NOWARN); if (!buffer) return -ENOMEM; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 48b8934970f3..fd8b6febbf70 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1043,8 +1043,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) blk_queue_max_segments(q, queue_max_segments(q) - 1); sdev->dma_drain_len = ATAPI_MAX_DRAIN; - sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len, - q->bounce_gfp | GFP_KERNEL); + sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len, GFP_NOIO); if (!sdev->dma_drain_buf) { ata_dev_err(dev, "drain buffer allocation failed\n"); return -ENOMEM; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc6bc8383b43..0dbb72ea3735 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -436,11 +436,6 @@ struct request_queue { */ int id; - /* - * queue needs bounce pages for pages above this limit - */ - gfp_t bounce_gfp; - spinlock_t queue_lock; /* @@ -847,7 +842,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; * * BLK_BOUNCE_HIGH : bounce all highmem pages * BLK_BOUNCE_ANY : don't bounce anything - * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary */ #if BITS_PER_LONG == 32 @@ -856,7 +850,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_BOUNCE_HIGH -1ULL #endif #define BLK_BOUNCE_ANY (-1ULL) -#define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) /* * default timeout for SG_IO if none specified diff --git a/mm/Kconfig b/mm/Kconfig index 24c045b24b95..d0808a23e54b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -283,12 +283,11 @@ config PHYS_ADDR_T_64BIT config BOUNCE bool "Enable bounce buffers" default y - depends on BLOCK && MMU && (ZONE_DMA || HIGHMEM) + depends on BLOCK && MMU && HIGHMEM help - Enable bounce buffers for devices that cannot access - the full range of memory available to the CPU. Enabled - by default when ZONE_DMA or HIGHMEM is selected, but you - may say n to override this. + Enable bounce buffers for devices that cannot access the full range of + memory available to the CPU. Enabled by default when HIGHMEM is + selected, but you may say n to override this. config VIRT_TO_BUS bool -- cgit v1.2.3 From 9bb33f24abbd0fa2fadad01ec75438d7cc239189 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 Mar 2021 09:30:00 +0200 Subject: block: refactor the bounce buffering code Get rid of all the PFN arithmetics and just use an enum for the two remaining options, and use PageHighMem for the actual bounce decision. Add a fast path to entirely avoid the call for the common case of a queue not using the legacy bouncing code. Signed-off-by: Christoph Hellwig Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210331073001.46776-8-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ++---- block/blk-settings.c | 42 ++++++++---------------------------------- block/blk.h | 16 ++++++++++++---- block/bounce.c | 35 +++++------------------------------ include/linux/blkdev.h | 29 +++++++++++------------------ 5 files changed, 38 insertions(+), 90 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-core.c b/block/blk-core.c index fc60ff208497..9bcdae93f6d4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1161,10 +1161,8 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q, } /* - * queue's settings related to segment counting like q->bounce_pfn - * may differ from that of other stacking queues. - * Recalculate it to check the request correctly on this queue's - * limitation. + * The queue settings related to segment counting may differ from the + * original queue. */ rq->nr_phys_segments = blk_recalc_rq_segments(rq); if (rq->nr_phys_segments > queue_max_segments(q)) { diff --git a/block/blk-settings.c b/block/blk-settings.c index f9937dd2810e..9c009090c4b5 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -7,7 +7,6 @@ #include #include #include -#include /* for max_pfn/max_low_pfn */ #include #include #include @@ -17,11 +16,6 @@ #include "blk.h" #include "blk-wbt.h" -unsigned long blk_max_low_pfn; -EXPORT_SYMBOL(blk_max_low_pfn); - -unsigned long blk_max_pfn; - void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) { q->rq_timeout = timeout; @@ -55,7 +49,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->discard_alignment = 0; lim->discard_misaligned = 0; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; - lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); + lim->bounce = BLK_BOUNCE_NONE; lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; @@ -92,28 +86,16 @@ EXPORT_SYMBOL(blk_set_stacking_limits); /** * blk_queue_bounce_limit - set bounce buffer limit for queue * @q: the request queue for the device - * @max_addr: the maximum address the device can handle + * @bounce: bounce limit to enforce * * Description: - * Different hardware can have different requirements as to what pages - * it can do I/O directly to. A low level driver can call - * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @max_addr. + * Force bouncing for ISA DMA ranges or highmem. + * + * DEPRECATED, don't use in new code. **/ -void blk_queue_bounce_limit(struct request_queue *q, u64 max_addr) +void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce) { - unsigned long b_pfn = max_addr >> PAGE_SHIFT; - -#if BITS_PER_LONG == 64 - /* - * Assume anything <= 4GB can be handled by IOMMU. Actually - * some IOMMUs can handle everything, but I don't know of a - * way to test this here. - */ - q->limits.bounce_pfn = max(max_low_pfn, b_pfn); -#else - q->limits.bounce_pfn = b_pfn; -#endif + q->limits.bounce = bounce; } EXPORT_SYMBOL(blk_queue_bounce_limit); @@ -536,7 +518,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_write_zeroes_sectors); t->max_zone_append_sectors = min(t->max_zone_append_sectors, b->max_zone_append_sectors); - t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); + t->bounce = max(t->bounce, b->bounce); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); @@ -916,11 +898,3 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) } } EXPORT_SYMBOL_GPL(blk_queue_set_zoned); - -static int __init blk_settings_init(void) -{ - blk_max_low_pfn = max_low_pfn - 1; - blk_max_pfn = max_pfn - 1; - return 0; -} -subsys_initcall(blk_settings_init); diff --git a/block/blk.h b/block/blk.h index 895c9f4a5182..8f4337c5a9e6 100644 --- a/block/blk.h +++ b/block/blk.h @@ -6,6 +6,7 @@ #include #include #include +#include /* for max_pfn/max_low_pfn */ #include #include "blk-crypto-internal.h" #include "blk-mq.h" @@ -311,13 +312,20 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { } static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } #endif -#ifdef CONFIG_BOUNCE -extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); -#else +void __blk_queue_bounce(struct request_queue *q, struct bio **bio); + +static inline bool blk_queue_may_bounce(struct request_queue *q) +{ + return IS_ENABLED(CONFIG_BOUNCE) && + q->limits.bounce == BLK_BOUNCE_HIGH && + max_low_pfn >= max_pfn; +} + static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) { + if (unlikely(blk_queue_may_bounce(q) && bio_has_data(*bio))) + __blk_queue_bounce(q, bio); } -#endif /* CONFIG_BOUNCE */ #ifdef CONFIG_BLK_CGROUP_IOLATENCY extern int blk_iolatency_init(struct request_queue *q); diff --git a/block/bounce.c b/block/bounce.c index debd5b0bd318..6bafc0d1f867 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -49,11 +48,11 @@ static void init_bounce_bioset(void) bounce_bs_setup = true; } -#if defined(CONFIG_HIGHMEM) static __init int init_emergency_pool(void) { int ret; -#if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG) + +#ifndef CONFIG_MEMORY_HOTPLUG if (max_pfn <= max_low_pfn) return 0; #endif @@ -67,9 +66,7 @@ static __init int init_emergency_pool(void) } __initcall(init_emergency_pool); -#endif -#ifdef CONFIG_HIGHMEM /* * highmem version, map in to vec */ @@ -82,13 +79,6 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) kunmap_atomic(vto); } -#else /* CONFIG_HIGHMEM */ - -#define bounce_copy_vec(to, vfrom) \ - memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) - -#endif /* CONFIG_HIGHMEM */ - /* * Simple bounce buffer support for highmem pages. Depending on the * queue gfp mask set, *to may or may not be a highmem page. kmap it @@ -236,8 +226,7 @@ err_put: return NULL; } - -void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) +void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { struct bio *bio; int rw = bio_data_dir(*bio_orig); @@ -247,24 +236,10 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) bool bounce = false; int sectors = 0; - /* - * Data-less bio, nothing to bounce - */ - if (!bio_has_data(*bio_orig)) - return; - - /* - * Just check if the bounce pfn is equal to or bigger than the highest - * pfn in the system -- in that case, don't waste time iterating over - * bio segments - */ - if (q->limits.bounce_pfn >= blk_max_pfn) - return; - bio_for_each_segment(from, *bio_orig, iter) { if (i++ < BIO_MAX_VECS) sectors += from.bv_len >> 9; - if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn) + if (PageHighMem(from.bv_page)) bounce = true; } if (!bounce) @@ -287,7 +262,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) { struct page *page = to->bv_page; - if (page_to_pfn(page) <= q->limits.bounce_pfn) + if (!PageHighMem(page)) continue; to->bv_page = mempool_alloc(&page_pool, GFP_NOIO); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0dbb72ea3735..55cc8b96c844 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -313,8 +313,17 @@ enum blk_zoned_model { BLK_ZONED_HM, /* Host-managed zoned block device */ }; +/* + * BLK_BOUNCE_NONE: never bounce (default) + * BLK_BOUNCE_HIGH: bounce all highmem pages + */ +enum blk_bounce { + BLK_BOUNCE_NONE, + BLK_BOUNCE_HIGH, +}; + struct queue_limits { - unsigned long bounce_pfn; + enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; @@ -835,22 +844,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) return q->nr_requests; } -extern unsigned long blk_max_low_pfn, blk_max_pfn; - -/* - * standard bounce addresses: - * - * BLK_BOUNCE_HIGH : bounce all highmem pages - * BLK_BOUNCE_ANY : don't bounce anything - */ - -#if BITS_PER_LONG == 32 -#define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) -#else -#define BLK_BOUNCE_HIGH -1ULL -#endif -#define BLK_BOUNCE_ANY (-1ULL) - /* * default timeout for SG_IO if none specified */ @@ -1134,7 +1127,7 @@ extern void blk_abort_request(struct request *); * Access functions for manipulating queue properties */ extern void blk_cleanup_queue(struct request_queue *); -extern void blk_queue_bounce_limit(struct request_queue *, u64); +void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); -- cgit v1.2.3 From 393bb12e00580aaa23356504eed38d8f5571153a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 Mar 2021 09:30:01 +0200 Subject: block: stop calling blk_queue_bounce for passthrough requests Instead of overloading the passthrough fast path with the deprecated block layer bounce buffering let the users that combine an old undermaintained driver with a highmem system pay the price by always falling back to copies in that case. Signed-off-by: Christoph Hellwig Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210331073001.46776-9-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-map.c | 116 ++++++++++--------------------------- block/bounce.c | 11 +--- drivers/nvme/host/lightnvm.c | 2 +- drivers/target/target_core_pscsi.c | 4 +- include/linux/blkdev.h | 2 +- 5 files changed, 36 insertions(+), 99 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-map.c b/block/blk-map.c index b62b52dcb61d..dac78376acc8 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -123,7 +123,6 @@ static int bio_uncopy_user(struct bio *bio) bio_free_pages(bio); } kfree(bmd); - bio_put(bio); return ret; } @@ -132,7 +131,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, { struct bio_map_data *bmd; struct page *page; - struct bio *bio, *bounce_bio; + struct bio *bio; int i = 0, ret; int nr_pages; unsigned int len = iter->count; @@ -218,16 +217,9 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, bio->bi_private = bmd; - bounce_bio = bio; - ret = blk_rq_append_bio(rq, &bounce_bio); + ret = blk_rq_append_bio(rq, bio); if (ret) goto cleanup; - - /* - * We link the bounce buffer in and could have to traverse it later, so - * we have to get a ref to prevent it from being freed - */ - bio_get(bounce_bio); return 0; cleanup: if (!map_data) @@ -242,7 +234,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { unsigned int max_sectors = queue_max_hw_sectors(rq->q); - struct bio *bio, *bounce_bio; + struct bio *bio; int ret; int j; @@ -304,49 +296,17 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, break; } - /* - * Subtle: if we end up needing to bounce a bio, it would normally - * disappear when its bi_end_io is run. However, we need the original - * bio for the unmap, so grab an extra reference to it - */ - bio_get(bio); - - bounce_bio = bio; - ret = blk_rq_append_bio(rq, &bounce_bio); + ret = blk_rq_append_bio(rq, bio); if (ret) - goto out_put_orig; - - /* - * We link the bounce buffer in and could have to traverse it - * later, so we have to get a ref to prevent it from being freed - */ - bio_get(bounce_bio); + goto out_unmap; return 0; - out_put_orig: - bio_put(bio); out_unmap: bio_release_pages(bio, false); bio_put(bio); return ret; } -/** - * bio_unmap_user - unmap a bio - * @bio: the bio being unmapped - * - * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from - * process context. - * - * bio_unmap_user() may sleep. - */ -static void bio_unmap_user(struct bio *bio) -{ - bio_release_pages(bio, bio_data_dir(bio) == READ); - bio_put(bio); - bio_put(bio); -} - static void bio_invalidate_vmalloc_pages(struct bio *bio) { #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE @@ -519,33 +479,27 @@ cleanup: * Append a bio to a passthrough request. Only works if the bio can be merged * into the request based on the driver constraints. */ -int blk_rq_append_bio(struct request *rq, struct bio **bio) +int blk_rq_append_bio(struct request *rq, struct bio *bio) { - struct bio *orig_bio = *bio; struct bvec_iter iter; struct bio_vec bv; unsigned int nr_segs = 0; - blk_queue_bounce(rq->q, bio); + if (WARN_ON_ONCE(rq->q->limits.bounce != BLK_BOUNCE_NONE)) + return -EINVAL; - bio_for_each_bvec(bv, *bio, iter) + bio_for_each_bvec(bv, bio, iter) nr_segs++; if (!rq->bio) { - blk_rq_bio_prep(rq, *bio, nr_segs); + blk_rq_bio_prep(rq, bio, nr_segs); } else { - if (!ll_back_merge_fn(rq, *bio, nr_segs)) { - if (orig_bio != *bio) { - bio_put(*bio); - *bio = orig_bio; - } + if (!ll_back_merge_fn(rq, bio, nr_segs)) return -EINVAL; - } - - rq->biotail->bi_next = *bio; - rq->biotail = *bio; - rq->__data_len += (*bio)->bi_iter.bi_size; - bio_crypt_free_ctx(*bio); + rq->biotail->bi_next = bio; + rq->biotail = bio; + rq->__data_len += (bio)->bi_iter.bi_size; + bio_crypt_free_ctx(bio); } return 0; @@ -566,12 +520,6 @@ EXPORT_SYMBOL(blk_rq_append_bio); * * A matching blk_rq_unmap_user() must be issued at the end of I/O, while * still in process context. - * - * Note: The mapped bio may need to be bounced through blk_queue_bounce() - * before being submitted to the device, as pages mapped may be out of - * reach. It's the callers responsibility to make sure this happens. The - * original bio must be passed back in to blk_rq_unmap_user() for proper - * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, @@ -588,6 +536,8 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, if (map_data) copy = true; + else if (blk_queue_may_bounce(q)) + copy = true; else if (iov_iter_alignment(iter) & align) copy = true; else if (queue_virt_boundary(q)) @@ -641,25 +591,21 @@ EXPORT_SYMBOL(blk_rq_map_user); */ int blk_rq_unmap_user(struct bio *bio) { - struct bio *mapped_bio; + struct bio *next_bio; int ret = 0, ret2; while (bio) { - mapped_bio = bio; - if (unlikely(bio_flagged(bio, BIO_BOUNCED))) - mapped_bio = bio->bi_private; - if (bio->bi_private) { - ret2 = bio_uncopy_user(mapped_bio); + ret2 = bio_uncopy_user(bio); if (ret2 && !ret) ret = ret2; } else { - bio_unmap_user(mapped_bio); + bio_release_pages(bio, bio_data_dir(bio) == READ); } - mapped_bio = bio; + next_bio = bio; bio = bio->bi_next; - bio_put(mapped_bio); + bio_put(next_bio); } return ret; @@ -684,7 +630,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, { int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; - struct bio *bio, *orig_bio; + struct bio *bio; int ret; if (len > (queue_max_hw_sectors(q) << 9)) @@ -692,7 +638,8 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) + if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) || + blk_queue_may_bounce(q)) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else bio = bio_map_kern(q, kbuf, len, gfp_mask); @@ -703,14 +650,9 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= req_op(rq); - orig_bio = bio; - ret = blk_rq_append_bio(rq, &bio); - if (unlikely(ret)) { - /* request is too big */ - bio_put(orig_bio); - return ret; - } - - return 0; + ret = blk_rq_append_bio(rq, bio); + if (unlikely(ret)) + bio_put(bio); + return ret; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/block/bounce.c b/block/bounce.c index 6bafc0d1f867..94081e013c58 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -180,12 +180,8 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) * asking for trouble and would force extra work on * __bio_clone_fast() anyways. */ - if (bio_is_passthrough(bio_src)) - bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, - bio_segments(bio_src)); - else - bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), - &bounce_bio_set); + bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), + &bounce_bio_set); bio->bi_bdev = bio_src->bi_bdev; if (bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); @@ -245,8 +241,7 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) if (!bounce) return; - if (!bio_is_passthrough(*bio_orig) && - sectors < bio_sectors(*bio_orig)) { + if (sectors < bio_sectors(*bio_orig)) { bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split); bio_chain(bio, *bio_orig); submit_bio_noacct(*bio_orig); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index b705988629f2..f6ca2fbb711e 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -660,7 +660,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q, rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (rqd->bio) - blk_rq_append_bio(rq, &rqd->bio); + blk_rq_append_bio(rq, rqd->bio); else rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 3cbc074992bc..7df4a9c9c7ff 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -911,7 +911,7 @@ new_bio: " %d i: %d bio: %p, allocating another" " bio\n", bio->bi_vcnt, i, bio); - rc = blk_rq_append_bio(req, &bio); + rc = blk_rq_append_bio(req, bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; @@ -930,7 +930,7 @@ new_bio: } if (bio) { - rc = blk_rq_append_bio(req, &bio); + rc = blk_rq_append_bio(req, bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 55cc8b96c844..d5d320da51f8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -909,7 +909,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern int blk_rq_append_bio(struct request *rq, struct bio **bio); +int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_queue_split(struct bio **); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, -- cgit v1.2.3 From 907d52310024fae6632aabfc7e833decaf185e5f Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 1 Apr 2021 10:19:25 +0800 Subject: block: add queue_to_disk() to get gendisk from request_queue Sometimes we need to get the corresponding gendisk from request_queue. It is preferred that block drivers store private data in gendisk->private_data rather than request_queue->queuedata, e.g. see: commit c4a59c4e5db3 ("dm: stop using ->queuedata"). So if only request_queue is given, we need to get its corresponding gendisk to get the private data stored in that gendisk. Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jeffle Xu Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ include/trace/events/kyber.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5d320da51f8..30d2090583ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -689,6 +689,8 @@ static inline bool blk_account_rq(struct request *rq) dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) +#define queue_to_disk(q) (dev_to_disk(kobj_to_dev((q)->kobj.parent))) + static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h index c0e7d24ca256..f9802562edf6 100644 --- a/include/trace/events/kyber.h +++ b/include/trace/events/kyber.h @@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency, ), TP_fast_assign( - __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + __entry->dev = disk_devt(queue_to_disk(q)); strlcpy(__entry->domain, domain, sizeof(__entry->domain)); strlcpy(__entry->type, type, sizeof(__entry->type)); __entry->percentile = percentile; @@ -59,7 +59,7 @@ TRACE_EVENT(kyber_adjust, ), TP_fast_assign( - __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + __entry->dev = disk_devt(queue_to_disk(q)); strlcpy(__entry->domain, domain, sizeof(__entry->domain)); __entry->depth = depth; ), @@ -81,7 +81,7 @@ TRACE_EVENT(kyber_throttled, ), TP_fast_assign( - __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + __entry->dev = disk_devt(queue_to_disk(q)); strlcpy(__entry->domain, domain, sizeof(__entry->domain)); ), -- cgit v1.2.3 From 8d663f34f8afcf5fc6a84c3cc4fa28cc84d58e39 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Thu, 15 Apr 2021 11:39:20 +0800 Subject: blk-mq: bypass IO scheduler's limit_depth for passthrough request Commit 01e99aeca39796003 ("blk-mq: insert passthrough request into hctx->dispatch directly") gives high priority to passthrough requests and bypass underlying IO scheduler. But as we allocate tag for such request it still runs io-scheduler's callback limit_depth, while we really want is to give full sbitmap-depth capabity to such request for acquiring available tag. blktrace shows PC requests(dmraid -s -c -i) hit bfq's limit_depth: 8,0 2 0 0.000000000 39952 1,0 m N bfq [bfq_limit_depth] wr_busy 0 sync 0 depth 8 8,0 2 1 0.000008134 39952 D R 4 [dmraid] 8,0 2 2 0.000021538 24 C R [0] 8,0 2 0 0.000035442 39952 1,0 m N bfq [bfq_limit_depth] wr_busy 0 sync 0 depth 8 8,0 2 3 0.000038813 39952 D R 24 [dmraid] 8,0 2 4 0.000044356 24 C R [0] This patch introduce a new wrapper to make code not that ugly. Signed-off-by: Lin Feng Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210415033920.213963-1-linf@wangsu.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- include/linux/blkdev.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-mq.c b/block/blk-mq.c index d4d7c1caa439..927189a55575 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -361,11 +361,12 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data) if (e) { /* - * Flush requests are special and go directly to the + * Flush/passthrough requests are special and go directly to the * dispatch list. Don't include reserved tags in the * limiting, as it isn't useful. */ if (!op_is_flush(data->cmd_flags) && + !blk_op_is_passthrough(data->cmd_flags) && e->type->ops.limit_depth && !(data->flags & BLK_MQ_REQ_RESERVED)) e->type->ops.limit_depth(data->cmd_flags, data); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 30d2090583ad..f2e77ba97550 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -274,6 +274,12 @@ static inline bool bio_is_passthrough(struct bio *bio) return blk_op_is_scsi(op) || blk_op_is_private(op); } +static inline bool blk_op_is_passthrough(unsigned int op) +{ + return (blk_op_is_scsi(op & REQ_OP_MASK) || + blk_op_is_private(op & REQ_OP_MASK)); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; -- cgit v1.2.3 From cd2c7545ae1beac3b6aae033c7f31193b3255946 Mon Sep 17 00:00:00 2001 From: Changheun Lee Date: Mon, 3 May 2021 18:52:03 +0900 Subject: bio: limit bio max size bio size can grow up to 4GB when muli-page bvec is enabled. but sometimes it would lead to inefficient behaviors. in case of large chunk direct I/O, - 32MB chunk read in user space - all pages for 32MB would be merged to a bio structure if the pages physical addresses are contiguous. it makes some delay to submit until merge complete. bio max size should be limited to a proper size. When 32MB chunk read with direct I/O option is coming from userspace, kernel behavior is below now in do_direct_IO() loop. it's timeline. | bio merge for 32MB. total 8,192 pages are merged. | total elapsed time is over 2ms. |------------------ ... ----------------------->| | 8,192 pages merged a bio. | at this time, first bio submit is done. | 1 bio is split to 32 read request and issue. |---------------> |---------------> |---------------> ...... |---------------> |--------------->| total 19ms elapsed to complete 32MB read done from device. | If bio max size is limited with 1MB, behavior is changed below. | bio merge for 1MB. 256 pages are merged for each bio. | total 32 bio will be made. | total elapsed time is over 2ms. it's same. | but, first bio submit timing is fast. about 100us. |--->|--->|--->|---> ... -->|--->|--->|--->|--->| | 256 pages merged a bio. | at this time, first bio submit is done. | and 1 read request is issued for 1 bio. |---------------> |---------------> |---------------> ...... |---------------> |--------------->| total 17ms elapsed to complete 32MB read done from device. | As a result, read request issue timing is faster if bio max size is limited. Current kernel behavior with multipage bvec, super large bio can be created. And it lead to delay first I/O request issue. Signed-off-by: Changheun Lee Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20210503095203.29076-1-nanich.lee@samsung.com Signed-off-by: Jens Axboe --- block/bio.c | 13 +++++++++++-- block/blk-settings.c | 5 +++++ include/linux/bio.h | 4 +++- include/linux/blkdev.h | 2 ++ 4 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bio.c b/block/bio.c index 44205dfb6b60..221dc56ba22f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -255,6 +255,13 @@ void bio_init(struct bio *bio, struct bio_vec *table, } EXPORT_SYMBOL(bio_init); +unsigned int bio_max_size(struct bio *bio) +{ + struct block_device *bdev = bio->bi_bdev; + + return bdev ? bdev->bd_disk->queue->limits.bio_max_bytes : UINT_MAX; +} + /** * bio_reset - reinitialize a bio * @bio: bio to reset @@ -866,7 +873,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (page_is_mergeable(bv, page, len, off, same_page)) { - if (bio->bi_iter.bi_size > UINT_MAX - len) { + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) { *same_page = false; return false; } @@ -995,6 +1002,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; + unsigned int bytes_left = bio_max_size(bio) - bio->bi_iter.bi_size; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; bool same_page = false; @@ -1010,7 +1018,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + size = iov_iter_get_pages(iter, pages, bytes_left, nr_pages, + &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; diff --git a/block/blk-settings.c b/block/blk-settings.c index 9c009090c4b5..c646503e55d2 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); */ void blk_set_default_limits(struct queue_limits *lim) { + lim->bio_max_bytes = UINT_MAX; lim->max_segments = BLK_MAX_SEGMENTS; lim->max_discard_segments = 1; lim->max_integrity_segments = 0; @@ -139,6 +140,10 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto limits->logical_block_size >> SECTOR_SHIFT); limits->max_sectors = max_sectors; + if (check_shl_overflow(max_sectors, SECTOR_SHIFT, + &limits->bio_max_bytes)) + limits->bio_max_bytes = UINT_MAX; + q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); diff --git a/include/linux/bio.h b/include/linux/bio.h index a0b4cfdf62a4..f1a99f0a240c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio) return NULL; } +extern unsigned int bio_max_size(struct bio *bio); + /** * bio_full - check if the bio is full * @bio: bio to check @@ -119,7 +121,7 @@ static inline bool bio_full(struct bio *bio, unsigned len) if (bio->bi_vcnt >= bio->bi_max_vecs) return true; - if (bio->bi_iter.bi_size > UINT_MAX - len) + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) return true; return false; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365..40c7c4d87aa1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -327,6 +327,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int bio_max_bytes; + enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From 4ee60ec156d91c315d1f62dfc1bc5799dcc6b473 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 May 2021 18:02:27 -0700 Subject: include: remove pagemap.h from blkdev.h My UEK-derived config has 1030 files depending on pagemap.h before this change. Afterwards, just 326 files need to be rebuilt when I touch pagemap.h. I think blkdev.h is probably included too widely, but untangling that dependency is harder and this solves my problem. x86 allmodconfig builds, but there may be implicit include problems on other architectures. Link: https://lkml.kernel.org/r/20210309195747.283796-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Dan Williams [nvdimm] Acked-by: Jens Axboe [block] Reviewed-by: Christoph Hellwig Acked-by: Coly Li [bcache] Acked-by: Martin K. Petersen [scsi] Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/blk-settings.c | 1 + drivers/block/brd.c | 1 + drivers/block/loop.c | 1 + drivers/md/bcache/super.c | 1 + drivers/nvdimm/btt.c | 1 + drivers/nvdimm/pmem.c | 1 + drivers/scsi/scsicam.c | 1 + include/linux/blkdev.h | 1 - include/linux/swap.h | 1 + 9 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index 9c009090c4b5..902c40d67120 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 6e622c1327ee..7562cf30b14e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a370cde3ddd4..d58d68f3c7cd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 2b6d6e9cd680..bea8c4429ae8 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -16,6 +16,7 @@ #include "features.h" #include +#include #include #include #include diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 41aa1f01fc07..18a267d5073f 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 7daac795db39..ed10a8b66068 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index f1553a453616..0ffdb8f2995f 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365..1255823b2bc0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/swap.h b/include/linux/swap.h index f69e0f67651d..144727041e78 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 35c820e71565d1fa835b82499359218b219828ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 8 May 2021 21:49:48 -0600 Subject: Revert "bio: limit bio max size" This reverts commit cd2c7545ae1beac3b6aae033c7f31193b3255946. Alex reports that the commit causes corruption with LUKS on ext4. Revert it for now so that this can be investigated properly. Link: https://lore.kernel.org/linux-block/1620493841.bxdq8r5haw.none@localhost/ Reported-by: Alex Xu (Hello71) Signed-off-by: Jens Axboe --- block/bio.c | 13 ++----------- block/blk-settings.c | 5 ----- include/linux/bio.h | 4 +--- include/linux/blkdev.h | 2 -- 4 files changed, 3 insertions(+), 21 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bio.c b/block/bio.c index 221dc56ba22f..44205dfb6b60 100644 --- a/block/bio.c +++ b/block/bio.c @@ -255,13 +255,6 @@ void bio_init(struct bio *bio, struct bio_vec *table, } EXPORT_SYMBOL(bio_init); -unsigned int bio_max_size(struct bio *bio) -{ - struct block_device *bdev = bio->bi_bdev; - - return bdev ? bdev->bd_disk->queue->limits.bio_max_bytes : UINT_MAX; -} - /** * bio_reset - reinitialize a bio * @bio: bio to reset @@ -873,7 +866,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (page_is_mergeable(bv, page, len, off, same_page)) { - if (bio->bi_iter.bi_size > bio_max_size(bio) - len) { + if (bio->bi_iter.bi_size > UINT_MAX - len) { *same_page = false; return false; } @@ -1002,7 +995,6 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - unsigned int bytes_left = bio_max_size(bio) - bio->bi_iter.bi_size; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; bool same_page = false; @@ -1018,8 +1010,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - size = iov_iter_get_pages(iter, pages, bytes_left, nr_pages, - &offset); + size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; diff --git a/block/blk-settings.c b/block/blk-settings.c index c646503e55d2..9c009090c4b5 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -31,7 +31,6 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); */ void blk_set_default_limits(struct queue_limits *lim) { - lim->bio_max_bytes = UINT_MAX; lim->max_segments = BLK_MAX_SEGMENTS; lim->max_discard_segments = 1; lim->max_integrity_segments = 0; @@ -140,10 +139,6 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto limits->logical_block_size >> SECTOR_SHIFT); limits->max_sectors = max_sectors; - if (check_shl_overflow(max_sectors, SECTOR_SHIFT, - &limits->bio_max_bytes)) - limits->bio_max_bytes = UINT_MAX; - q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); diff --git a/include/linux/bio.h b/include/linux/bio.h index f1a99f0a240c..a0b4cfdf62a4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -106,8 +106,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -extern unsigned int bio_max_size(struct bio *bio); - /** * bio_full - check if the bio is full * @bio: bio to check @@ -121,7 +119,7 @@ static inline bool bio_full(struct bio *bio, unsigned len) if (bio->bi_vcnt >= bio->bi_max_vecs) return true; - if (bio->bi_iter.bi_size > bio_max_size(bio) - len) + if (bio->bi_iter.bi_size > UINT_MAX - len) return true; return false; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 40c7c4d87aa1..b91ba6207365 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -327,8 +327,6 @@ enum blk_bounce { }; struct queue_limits { - unsigned int bio_max_bytes; - enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3