diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 6 | ||||
-rw-r--r-- | block/bio.c | 1 | ||||
-rw-r--r-- | block/blk-cgroup.c | 78 | ||||
-rw-r--r-- | block/blk-cgroup.h | 15 | ||||
-rw-r--r-- | block/blk-core.c | 19 | ||||
-rw-r--r-- | block/blk-map.c | 7 | ||||
-rw-r--r-- | block/blk-mq.c | 16 | ||||
-rw-r--r-- | block/blk-mq.h | 5 | ||||
-rw-r--r-- | block/bsg.c | 2 | ||||
-rw-r--r-- | block/genhd.c | 28 |
10 files changed, 85 insertions, 92 deletions
diff --git a/block/Kconfig b/block/Kconfig index 0b7c13f9a089..86122e459fe0 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME config BLK_CGROUP_RWSTAT bool +config BLK_CGROUP_PUNT_BIO + bool + config BLK_DEV_BSG_COMMON tristate @@ -204,9 +207,6 @@ config BLK_INLINE_ENCRYPTION_FALLBACK source "block/partitions/Kconfig" -config BLOCK_COMPAT - def_bool COMPAT - config BLK_MQ_PCI def_bool PCI diff --git a/block/bio.c b/block/bio.c index fd11614bba4d..043944fd46eb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len, return false; return bio_add_page(bio, &folio->page, len, off) > 0; } +EXPORT_SYMBOL(bio_add_folio); void __bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ecb4cce8af2..0ce64dd73cfe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -55,7 +55,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; -static struct workqueue_struct *blkcg_punt_bio_wq; #define BLKG_DESTROY_BATCH_SIZE 64 @@ -165,7 +164,9 @@ static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); +#endif /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -187,6 +188,9 @@ static void blkg_release(struct percpu_ref *ref) call_rcu(&blkg->rcu_head, __blkg_release); } +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO +static struct workqueue_struct *blkcg_punt_bio_wq; + static void blkg_async_bio_workfn(struct work_struct *work) { struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, @@ -197,10 +201,10 @@ static void blkg_async_bio_workfn(struct work_struct *work) bool need_plug = false; /* as long as there are pending bios, @blkg can't go away */ - spin_lock_bh(&blkg->async_bio_lock); + spin_lock(&blkg->async_bio_lock); bio_list_merge(&bios, &blkg->async_bios); bio_list_init(&blkg->async_bios); - spin_unlock_bh(&blkg->async_bio_lock); + spin_unlock(&blkg->async_bio_lock); /* start plug only when bio_list contains at least 2 bios */ if (bios.head && bios.head->bi_next) { @@ -213,6 +217,40 @@ static void blkg_async_bio_workfn(struct work_struct *work) blk_finish_plug(&plug); } +/* + * When a shared kthread issues a bio for a cgroup, doing so synchronously can + * lead to priority inversions as the kthread can be trapped waiting for that + * cgroup. Use this helper instead of submit_bio to punt the actual issuing to + * a dedicated per-blkcg work item to avoid such priority inversions. + */ +void blkcg_punt_bio_submit(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + + if (blkg->parent) { + spin_lock(&blkg->async_bio_lock); + bio_list_add(&blkg->async_bios, bio); + spin_unlock(&blkg->async_bio_lock); + queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); + } else { + /* never bounce for the root cgroup */ + submit_bio(bio); + } +} +EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit); + +static int __init blkcg_punt_bio_init(void) +{ + blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", + WQ_MEM_RECLAIM | WQ_FREEZABLE | + WQ_UNBOUND | WQ_SYSFS, 0); + if (!blkcg_punt_bio_wq) + return -ENOMEM; + return 0; +} +subsys_initcall(blkcg_punt_bio_init); +#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */ + /** * bio_blkcg_css - return the blkcg CSS associated with a bio * @bio: target bio @@ -268,10 +306,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg->q = disk->queue; INIT_LIST_HEAD(&blkg->q_node); + blkg->blkcg = blkcg; +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); - blkg->blkcg = blkcg; +#endif u64_stats_init(&blkg->iostat.sync); for_each_possible_cpu(cpu) { @@ -1682,25 +1722,6 @@ out_unlock: } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); -bool __blkcg_punt_bio_submit(struct bio *bio) -{ - struct blkcg_gq *blkg = bio->bi_blkg; - - /* consume the flag first */ - bio->bi_opf &= ~REQ_CGROUP_PUNT; - - /* never bounce for the root cgroup */ - if (!blkg->parent) - return false; - - spin_lock_bh(&blkg->async_bio_lock); - bio_list_add(&blkg->async_bios, bio); - spin_unlock_bh(&blkg->async_bio_lock); - - queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); - return true; -} - /* * Scale the accumulated delay based on how long it has been since we updated * the delay. We only call this when we are adding delay, in case it's been a @@ -2079,16 +2100,5 @@ bool blk_cgroup_congested(void) return ret; } -static int __init blkcg_init(void) -{ - blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", - WQ_MEM_RECLAIM | WQ_FREEZABLE | - WQ_UNBOUND | WQ_SYSFS, 0); - if (!blkcg_punt_bio_wq) - return -ENOMEM; - return 0; -} -subsys_initcall(blkcg_init); - module_param(blkcg_debug_stats, bool, 0644); MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index d6ad3abc6eca..624c03c8fe64 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -72,9 +72,10 @@ struct blkcg_gq { struct blkg_iostat_set iostat; struct blkg_policy_data *pd[BLKCG_MAX_POLS]; - +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO spinlock_t async_bio_lock; struct bio_list async_bios; +#endif union { struct work_struct async_bio_work; struct work_struct free_work; @@ -375,16 +376,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q))) -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - static inline void blkcg_bio_issue_init(struct bio *bio) { bio_issue_init(&bio->bi_issue, bio_sectors(bio)); @@ -506,8 +497,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline void blk_cgroup_bio_start(struct bio *bio) { } static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } diff --git a/block/blk-core.c b/block/blk-core.c index 06bdb352568c..00c74330fa92 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -824,9 +824,6 @@ EXPORT_SYMBOL(submit_bio_noacct); */ void submit_bio(struct bio *bio) { - if (blkcg_punt_bio_submit(bio)) - return; - if (bio_op(bio) == REQ_OP_READ) { task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, bio_sectors(bio)); @@ -953,16 +950,11 @@ again: } } -unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, enum req_op op, +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time) { - const int sgrp = op_stat_group(op); - part_stat_lock(); update_io_ticks(bdev, start_time, false); - part_stat_inc(bdev, ios[sgrp]); - part_stat_add(bdev, sectors[sgrp], sectors); part_stat_local_inc(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -978,13 +970,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); */ unsigned long bio_start_io_acct(struct bio *bio) { - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), jiffies); + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, - unsigned long start_time) + unsigned int sectors, unsigned long start_time) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); @@ -992,6 +983,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op, part_stat_lock(); update_io_ticks(bdev, now, true); + part_stat_inc(bdev, ios[sgrp]); + part_stat_add(bdev, sectors[sgrp], sectors); part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -1001,7 +994,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev) { - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time); } EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); diff --git a/block/blk-map.c b/block/blk-map.c index 9137d16cecdc..04c55f1c492e 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -29,10 +29,11 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); if (!bmd) return NULL; - memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); bmd->iter = *data; - if (iter_is_iovec(data)) - bmd->iter.iov = bmd->iov; + if (iter_is_iovec(data)) { + memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs); + bmd->iter.__iov = bmd->iov; + } return bmd; } diff --git a/block/blk-mq.c b/block/blk-mq.c index c2d297efe229..f6dad0886a2f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1333,8 +1333,6 @@ bool blk_rq_is_poll(struct request *rq) return false; if (rq->mq_hctx->type != HCTX_TYPE_POLL) return false; - if (WARN_ON_ONCE(!rq->bio)) - return false; return true; } EXPORT_SYMBOL_GPL(blk_rq_is_poll); @@ -1342,7 +1340,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll); static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - bio_poll(rq->bio, NULL, 0); + blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0); cond_resched(); } while (!completion_done(wait)); } @@ -2711,6 +2709,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) struct blk_mq_hw_ctx *this_hctx = NULL; struct blk_mq_ctx *this_ctx = NULL; struct request *requeue_list = NULL; + struct request **requeue_lastp = &requeue_list; unsigned int depth = 0; LIST_HEAD(list); @@ -2721,10 +2720,10 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) this_hctx = rq->mq_hctx; this_ctx = rq->mq_ctx; } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { - rq_list_add(&requeue_list, rq); + rq_list_add_tail(&requeue_lastp, rq); continue; } - list_add_tail(&rq->queuelist, &list); + list_add(&rq->queuelist, &list); depth++; } while (!rq_list_empty(plug->mq_list)); @@ -2875,16 +2874,15 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, if (!plug) return NULL; + rq = rq_list_peek(&plug->cached_rq); + if (!rq || rq->q != q) + return NULL; if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { *bio = NULL; return NULL; } - rq = rq_list_peek(&plug->cached_rq); - if (!rq || rq->q != q) - return NULL; - type = blk_mq_get_hctx_type((*bio)->bi_opf); hctx_type = rq->mq_hctx->type; if (type != hctx_type && diff --git a/block/blk-mq.h b/block/blk-mq.h index f882677ff106..e876584d3516 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -433,12 +433,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ do { \ if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \ + struct blk_mq_tag_set *__tag_set = (q)->tag_set; \ int srcu_idx; \ \ might_sleep_if(check_sleep); \ - srcu_idx = srcu_read_lock((q)->tag_set->srcu); \ + srcu_idx = srcu_read_lock(__tag_set->srcu); \ (dispatch_ops); \ - srcu_read_unlock((q)->tag_set->srcu, srcu_idx); \ + srcu_read_unlock(__tag_set->srcu, srcu_idx); \ } else { \ rcu_read_lock(); \ (dispatch_ops); \ diff --git a/block/bsg.c b/block/bsg.c index 30fcc865ef4f..7eca43f33d7f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -245,7 +245,7 @@ static int __init bsg_init(void) dev_t devid; int ret; - bsg_class = class_create(THIS_MODULE, "bsg"); + bsg_class = class_create("bsg"); if (IS_ERR(bsg_class)) return PTR_ERR(bsg_class); bsg_class->devnode = bsg_devnode; diff --git a/block/genhd.c b/block/genhd.c index 9fa4a7cd978c..1cb489b927d5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -363,7 +363,6 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) if (disk->open_partitions) return -EBUSY; - set_bit(GD_NEED_PART_SCAN, &disk->state); /* * If the device is opened exclusively by current thread already, it's * safe to scan partitons, otherwise, use bd_prepare_to_claim() to @@ -376,12 +375,19 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) return ret; } + set_bit(GD_NEED_PART_SCAN, &disk->state); bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL); if (IS_ERR(bdev)) ret = PTR_ERR(bdev); else blkdev_put(bdev, mode & ~FMODE_EXCL); + /* + * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, + * and this will cause that re-assemble partitioned raid device will + * creat partition for underlying disk. + */ + clear_bit(GD_NEED_PART_SCAN, &disk->state); if (!(mode & FMODE_EXCL)) bd_abort_claiming(disk->part0, disk_scan_partitions); return ret; @@ -464,12 +470,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, if (ret) goto out_device_del; - if (!sysfs_deprecated) { - ret = sysfs_create_link(block_depr, &ddev->kobj, - kobject_name(&ddev->kobj)); - if (ret) - goto out_device_del; - } + ret = sysfs_create_link(block_depr, &ddev->kobj, + kobject_name(&ddev->kobj)); + if (ret) + goto out_device_del; /* * avoid probable deadlock caused by allocating memory with @@ -546,8 +550,7 @@ out_put_slave_dir: out_put_holder_dir: kobject_put(disk->part0->bd_holder_dir); out_del_block_link: - if (!sysfs_deprecated) - sysfs_remove_link(block_depr, dev_name(ddev)); + sysfs_remove_link(block_depr, dev_name(ddev)); out_device_del: device_del(ddev); out_free_ext_minor: @@ -648,8 +651,7 @@ void del_gendisk(struct gendisk *disk) part_stat_set_all(disk->part0, 0); disk->part0->bd_stamp = 0; - if (!sysfs_deprecated) - sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); @@ -894,7 +896,6 @@ static int __init genhd_device_init(void) { int error; - block_class.dev_kobj = sysfs_dev_block_kobj; error = class_register(&block_class); if (unlikely(error)) return error; @@ -903,8 +904,7 @@ static int __init genhd_device_init(void) register_blkdev(BLOCK_EXT_MAJOR, "blkext"); /* create top-level block dir */ - if (!sysfs_deprecated) - block_depr = kobject_create_and_add("block", NULL); + block_depr = kobject_create_and_add("block", NULL); return 0; } |