diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 22:12:44 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 22:12:44 +0300 |
commit | 3ad11d7ac8872b1c8da54494721fad8907ee41f7 (patch) | |
tree | 439d7cb75466978be936250c65a27ff05e82d9bc /block/blk-mq.c | |
parent | 857d64485e7c920364688a8a6dd0ffe5774327b6 (diff) | |
parent | 8858e8d98d5457ba23bcd0d99ce23e272b8b09a1 (diff) | |
download | linux-3ad11d7ac8872b1c8da54494721fad8907ee41f7.tar.xz |
Merge tag 'block-5.10-2020-10-12' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
- Series of merge handling cleanups (Baolin, Christoph)
- Series of blk-throttle fixes and cleanups (Baolin)
- Series cleaning up BDI, seperating the block device from the
backing_dev_info (Christoph)
- Removal of bdget() as a generic API (Christoph)
- Removal of blkdev_get() as a generic API (Christoph)
- Cleanup of is-partition checks (Christoph)
- Series reworking disk revalidation (Christoph)
- Series cleaning up bio flags (Christoph)
- bio crypt fixes (Eric)
- IO stats inflight tweak (Gabriel)
- blk-mq tags fixes (Hannes)
- Buffer invalidation fixes (Jan)
- Allow soft limits for zone append (Johannes)
- Shared tag set improvements (John, Kashyap)
- Allow IOPRIO_CLASS_RT for CAP_SYS_NICE (Khazhismel)
- DM no-wait support (Mike, Konstantin)
- Request allocation improvements (Ming)
- Allow md/dm/bcache to use IO stat helpers (Song)
- Series improving blk-iocost (Tejun)
- Various cleanups (Geert, Damien, Danny, Julia, Tetsuo, Tian, Wang,
Xianting, Yang, Yufen, yangerkun)
* tag 'block-5.10-2020-10-12' of git://git.kernel.dk/linux-block: (191 commits)
block: fix uapi blkzoned.h comments
blk-mq: move cancel of hctx->run_work to the front of blk_exit_queue
blk-mq: get rid of the dead flush handle code path
block: get rid of unnecessary local variable
block: fix comment and add lockdep assert
blk-mq: use helper function to test hw stopped
block: use helper function to test queue register
block: remove redundant mq check
block: invoke blk_mq_exit_sched no matter whether have .exit_sched
percpu_ref: don't refer to ref->data if it isn't allocated
block: ratelimit handle_bad_sector() message
blk-throttle: Re-use the throtl_set_slice_end()
blk-throttle: Open code __throtl_de/enqueue_tg()
blk-throttle: Move service tree validation out of the throtl_rb_first()
blk-throttle: Move the list operation after list validation
blk-throttle: Fix IO hang for a corner case
blk-throttle: Avoid tracking latency if low limit is invalid
blk-throttle: Avoid getting the current time if tg->last_finish_time is 0
blk-throttle: Remove a meaningless parameter for throtl_downgrade_state()
block: Remove redundant 'return' statement
...
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 101 |
1 files changed, 65 insertions, 36 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index cdced4aca2e8..deca157032c2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -105,7 +105,7 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - if (rq->part == mi->part) + if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; return true; @@ -519,7 +519,7 @@ void blk_mq_free_request(struct request *rq) ctx->rq_completed[rq_is_sync(rq)]++; if (rq->rq_flags & RQF_MQ_INFLIGHT) - atomic_dec(&hctx->nr_active); + __blk_mq_dec_active_requests(hctx); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->backing_dev_info); @@ -1096,19 +1096,20 @@ static inline unsigned int queued_to_index(unsigned int queued) static bool __blk_mq_get_driver_tag(struct request *rq) { - struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; + struct sbitmap_queue *bt = rq->mq_hctx->tags->bitmap_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; int tag; blk_mq_tag_busy(rq->mq_hctx); if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { - bt = &rq->mq_hctx->tags->breserved_tags; + bt = rq->mq_hctx->tags->breserved_tags; tag_offset = 0; + } else { + if (!hctx_may_queue(rq->mq_hctx, bt)) + return false; } - if (!hctx_may_queue(rq->mq_hctx, bt)) - return false; tag = __sbitmap_queue_get(bt); if (tag == BLK_MQ_NO_TAG) return false; @@ -1124,10 +1125,10 @@ static bool blk_mq_get_driver_tag(struct request *rq) if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) return false; - if ((hctx->flags & BLK_MQ_F_TAG_SHARED) && + if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && !(rq->rq_flags & RQF_MQ_INFLIGHT)) { rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&hctx->nr_active); + __blk_mq_inc_active_requests(hctx); } hctx->tags->rqs[rq->tag] = rq; return true; @@ -1145,7 +1146,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, struct sbitmap_queue *sbq; list_del_init(&wait->entry); - sbq = &hctx->tags->bitmap_tags; + sbq = hctx->tags->bitmap_tags; atomic_dec(&sbq->ws_active); } spin_unlock(&hctx->dispatch_wait_lock); @@ -1163,12 +1164,12 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; + struct sbitmap_queue *sbq = hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { blk_mq_sched_mark_restart_hctx(hctx); /* @@ -1425,7 +1426,7 @@ out: bool needs_restart; /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && - (hctx->flags & BLK_MQ_F_TAG_SHARED); + (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; blk_mq_release_budgets(q, nr_budgets); @@ -1803,7 +1804,7 @@ static void blk_mq_run_work_fn(struct work_struct *work) /* * If we are stopped, don't run the queue. */ - if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) + if (blk_mq_hctx_stopped(hctx)) return; __blk_mq_run_hw_queue(hctx); @@ -1936,13 +1937,18 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, unsigned int nr_segs) { + int err; + if (bio->bi_opf & REQ_RAHEAD) rq->cmd_flags |= REQ_FAILFAST_MASK; rq->__sector = bio->bi_iter.bi_sector; rq->write_hint = bio->bi_write_hint; blk_rq_bio_prep(rq, bio, nr_segs); - blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO); + + /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */ + err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO); + WARN_ON_ONCE(err); blk_account_io_start(rq); } @@ -2296,20 +2302,21 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, } } -void blk_mq_free_rq_map(struct blk_mq_tags *tags) +void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags) { kfree(tags->rqs); tags->rqs = NULL; kfree(tags->static_rqs); tags->static_rqs = NULL; - blk_mq_free_tags(tags); + blk_mq_free_tags(tags, flags); } struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int hctx_idx, unsigned int nr_tags, - unsigned int reserved_tags) + unsigned int reserved_tags, + unsigned int flags) { struct blk_mq_tags *tags; int node; @@ -2318,8 +2325,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, - BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); + tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags); if (!tags) return NULL; @@ -2327,7 +2333,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!tags->rqs) { - blk_mq_free_tags(tags); + blk_mq_free_tags(tags, flags); return NULL; } @@ -2336,7 +2342,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, node); if (!tags->static_rqs) { kfree(tags->rqs); - blk_mq_free_tags(tags); + blk_mq_free_tags(tags, flags); return NULL; } @@ -2660,6 +2666,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, goto free_hctx; atomic_set(&hctx->nr_active, 0); + atomic_set(&hctx->elevator_queued, 0); if (node == NUMA_NO_NODE) node = set->numa_node; hctx->numa_node = node; @@ -2668,7 +2675,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; - hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; + hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; INIT_LIST_HEAD(&hctx->hctx_list); @@ -2745,10 +2752,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, int hctx_idx) { + unsigned int flags = set->flags; int ret = 0; set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, - set->queue_depth, set->reserved_tags); + set->queue_depth, set->reserved_tags, flags); if (!set->tags[hctx_idx]) return false; @@ -2757,7 +2765,7 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, if (!ret) return true; - blk_mq_free_rq_map(set->tags[hctx_idx]); + blk_mq_free_rq_map(set->tags[hctx_idx], flags); set->tags[hctx_idx] = NULL; return false; } @@ -2765,9 +2773,11 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, unsigned int hctx_idx) { + unsigned int flags = set->flags; + if (set->tags && set->tags[hctx_idx]) { blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx); - blk_mq_free_rq_map(set->tags[hctx_idx]); + blk_mq_free_rq_map(set->tags[hctx_idx], flags); set->tags[hctx_idx] = NULL; } } @@ -2885,14 +2895,14 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) queue_for_each_hw_ctx(q, hctx, i) { if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; + hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, - bool shared) +static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; @@ -2913,9 +2923,9 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) list_del(&q->tag_set_list); if (list_is_singular(&set->tag_list)) { /* just transitioned to unshared */ - set->flags &= ~BLK_MQ_F_TAG_SHARED; + set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, false); + blk_mq_update_tag_set_shared(set, false); } mutex_unlock(&set->tag_list_lock); INIT_LIST_HEAD(&q->tag_set_list); @@ -2930,12 +2940,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, * Check to see if we're transitioning to shared (from 1 to 2 queues). */ if (!list_empty(&set->tag_list) && - !(set->flags & BLK_MQ_F_TAG_SHARED)) { - set->flags |= BLK_MQ_F_TAG_SHARED; + !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, true); + blk_mq_update_tag_set_shared(set, true); } - if (set->flags & BLK_MQ_F_TAG_SHARED) + if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); list_add_tail(&q->tag_set_list, &set->tag_list); @@ -3256,9 +3266,11 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i; - for (i = 0; i < set->nr_hw_queues; i++) + for (i = 0; i < set->nr_hw_queues; i++) { if (!__blk_mq_alloc_map_and_request(set, i)) goto out_unwind; + cond_resched(); + } return 0; @@ -3438,11 +3450,23 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (ret) goto out_free_mq_map; + if (blk_mq_is_sbitmap_shared(set->flags)) { + atomic_set(&set->active_queues_shared_sbitmap, 0); + + if (blk_mq_init_shared_sbitmap(set, set->flags)) { + ret = -ENOMEM; + goto out_free_mq_rq_maps; + } + } + mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); return 0; +out_free_mq_rq_maps: + for (i = 0; i < set->nr_hw_queues; i++) + blk_mq_free_map_and_requests(set, i); out_free_mq_map: for (i = 0; i < set->nr_maps; i++) { kfree(set->map[i].mq_map); @@ -3461,6 +3485,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) blk_mq_free_map_and_requests(set, i); + if (blk_mq_is_sbitmap_shared(set->flags)) + blk_mq_exit_shared_sbitmap(set); + for (j = 0; j < set->nr_maps; j++) { kfree(set->map[j].mq_map); set->map[j].mq_map = NULL; @@ -3497,6 +3524,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!hctx->sched_tags) { ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); + if (!ret && blk_mq_is_sbitmap_shared(set->flags)) + blk_mq_tag_resize_shared_sbitmap(set, nr); } else { ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, nr, true); |