diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 398 |
1 files changed, 224 insertions, 174 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 23968c02be0d..32d11305d51b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -89,7 +89,7 @@ struct mq_inflight { unsigned int inflight[2]; }; -static bool blk_mq_check_inflight(struct request *rq, void *priv) +static bool blk_mq_check_in_driver(struct request *rq, void *priv) { struct mq_inflight *mi = priv; @@ -101,24 +101,14 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv) return true; } -unsigned int blk_mq_in_flight(struct request_queue *q, - struct block_device *part) +void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]) { struct mq_inflight mi = { .part = part }; - blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); - - return mi.inflight[0] + mi.inflight[1]; -} - -void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, - unsigned int inflight[2]) -{ - struct mq_inflight mi = { .part = part }; - - blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); - inflight[0] = mi.inflight[0]; - inflight[1] = mi.inflight[1]; + blk_mq_queue_tag_busy_iter(bdev_get_queue(part), blk_mq_check_in_driver, + &mi); + inflight[READ] = mi.inflight[READ]; + inflight[WRITE] = mi.inflight[WRITE]; } #ifdef CONFIG_LOCKDEP @@ -131,6 +121,10 @@ static bool blk_freeze_set_owner(struct request_queue *q, if (!q->mq_freeze_depth) { q->mq_freeze_owner = owner; q->mq_freeze_owner_depth = 1; + q->mq_freeze_disk_dead = !q->disk || + test_bit(GD_DEAD, &q->disk->state) || + !blk_queue_registered(q); + q->mq_freeze_queue_dying = blk_queue_dying(q); return true; } @@ -142,8 +136,6 @@ static bool blk_freeze_set_owner(struct request_queue *q, /* verify the last unfreeze in owner context */ static bool blk_unfreeze_check_owner(struct request_queue *q) { - if (!q->mq_freeze_owner) - return false; if (q->mq_freeze_owner != current) return false; if (--q->mq_freeze_owner_depth == 0) { @@ -189,7 +181,7 @@ bool __blk_freeze_queue_start(struct request_queue *q, void blk_freeze_queue_start(struct request_queue *q) { if (__blk_freeze_queue_start(q, current)) - blk_freeze_acquire_lock(q, false, false); + blk_freeze_acquire_lock(q); } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -208,12 +200,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); -void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue_nomemsave(struct request_queue *q) { blk_freeze_queue_start(q); blk_mq_freeze_queue_wait(q); } -EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave); bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) { @@ -234,12 +226,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) return unfreeze; } -void blk_mq_unfreeze_queue(struct request_queue *q) +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q) { if (__blk_mq_unfreeze_queue(q, false)) - blk_unfreeze_release_lock(q, false, false); + blk_unfreeze_release_lock(q); } -EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore); /* * non_owner variant of blk_freeze_queue_start @@ -506,7 +498,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) retry: data->ctx = blk_mq_get_ctx(q); - data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); + data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx); if (q->elevator) { /* @@ -582,9 +574,13 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q, struct blk_mq_alloc_data data = { .q = q, .flags = flags, + .shallow_depth = 0, .cmd_flags = opf, + .rq_flags = 0, .nr_tags = plug->nr_ios, .cached_rqs = &plug->cached_rqs, + .ctx = NULL, + .hctx = NULL }; struct request *rq; @@ -644,8 +640,13 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, struct blk_mq_alloc_data data = { .q = q, .flags = flags, + .shallow_depth = 0, .cmd_flags = opf, + .rq_flags = 0, .nr_tags = 1, + .cached_rqs = NULL, + .ctx = NULL, + .hctx = NULL }; int ret; @@ -673,8 +674,13 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, struct blk_mq_alloc_data data = { .q = q, .flags = flags, + .shallow_depth = 0, .cmd_flags = opf, + .rq_flags = 0, .nr_tags = 1, + .cached_rqs = NULL, + .ctx = NULL, + .hctx = NULL }; u64 alloc_time_ns = 0; struct request *rq; @@ -2078,7 +2084,7 @@ static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued, * Returns true if we did some work AND can potentially do more. */ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, - unsigned int nr_budgets) + bool get_budget) { enum prep_dispatch prep; struct request_queue *q = hctx->queue; @@ -2100,7 +2106,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, rq = list_first_entry(list, struct request, queuelist); WARN_ON_ONCE(hctx != rq->mq_hctx); - prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets); + prep = blk_mq_prep_dispatch_rq(rq, get_budget); if (prep != PREP_DISPATCH_OK) break; @@ -2109,12 +2115,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, bd.rq = rq; bd.last = list_empty(list); - /* - * once the request is queued to lld, no need to cover the - * budget any more - */ - if (nr_budgets) - nr_budgets--; ret = q->mq_ops->queue_rq(hctx, &bd); switch (ret) { case BLK_STS_OK: @@ -2148,7 +2148,11 @@ out: ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || blk_mq_is_shared_tags(hctx->flags)); - if (nr_budgets) + /* + * If the caller allocated budgets, free the budgets of the + * requests that have not yet been passed to the block driver. + */ + if (!get_budget) blk_mq_release_budgets(q, list); spin_lock(&hctx->lock); @@ -2656,8 +2660,10 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, if (bio->bi_opf & REQ_RAHEAD) rq->cmd_flags |= REQ_FAILFAST_MASK; + rq->bio = rq->biotail = bio; rq->__sector = bio->bi_iter.bi_sector; - blk_rq_bio_prep(rq, bio, nr_segs); + rq->__data_len = bio->bi_iter.bi_size; + rq->nr_phys_segments = nr_segs; if (bio_integrity(bio)) rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, bio); @@ -2774,15 +2780,15 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) return __blk_mq_issue_directly(hctx, rq, last); } -static void blk_mq_plug_issue_direct(struct blk_plug *plug) +static void blk_mq_issue_direct(struct rq_list *rqs) { struct blk_mq_hw_ctx *hctx = NULL; struct request *rq; int queued = 0; blk_status_t ret = BLK_STS_OK; - while ((rq = rq_list_pop(&plug->mq_list))) { - bool last = rq_list_empty(&plug->mq_list); + while ((rq = rq_list_pop(rqs))) { + bool last = rq_list_empty(rqs); if (hctx != rq->mq_hctx) { if (hctx) { @@ -2813,15 +2819,64 @@ out: blk_mq_commit_rqs(hctx, queued, false); } -static void __blk_mq_flush_plug_list(struct request_queue *q, - struct blk_plug *plug) +static void __blk_mq_flush_list(struct request_queue *q, struct rq_list *rqs) { if (blk_queue_quiesced(q)) return; - q->mq_ops->queue_rqs(&plug->mq_list); + q->mq_ops->queue_rqs(rqs); +} + +static unsigned blk_mq_extract_queue_requests(struct rq_list *rqs, + struct rq_list *queue_rqs) +{ + struct request *rq = rq_list_pop(rqs); + struct request_queue *this_q = rq->q; + struct request **prev = &rqs->head; + struct rq_list matched_rqs = {}; + struct request *last = NULL; + unsigned depth = 1; + + rq_list_add_tail(&matched_rqs, rq); + while ((rq = *prev)) { + if (rq->q == this_q) { + /* move rq from rqs to matched_rqs */ + *prev = rq->rq_next; + rq_list_add_tail(&matched_rqs, rq); + depth++; + } else { + /* leave rq in rqs */ + prev = &rq->rq_next; + last = rq; + } + } + + rqs->tail = last; + *queue_rqs = matched_rqs; + return depth; } -static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) +static void blk_mq_dispatch_queue_requests(struct rq_list *rqs, unsigned depth) +{ + struct request_queue *q = rq_list_peek(rqs)->q; + + trace_block_unplug(q, depth, true); + + /* + * Peek first request and see if we have a ->queue_rqs() hook. + * If we do, we can dispatch the whole list in one go. + * We already know at this point that all requests belong to the + * same queue, caller must ensure that's the case. + */ + if (q->mq_ops->queue_rqs) { + blk_mq_run_dispatch_ops(q, __blk_mq_flush_list(q, rqs)); + if (rq_list_empty(rqs)) + return; + } + + blk_mq_run_dispatch_ops(q, blk_mq_issue_direct(rqs)); +} + +static void blk_mq_dispatch_list(struct rq_list *rqs, bool from_sched) { struct blk_mq_hw_ctx *this_hctx = NULL; struct blk_mq_ctx *this_ctx = NULL; @@ -2831,7 +2886,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) LIST_HEAD(list); do { - struct request *rq = rq_list_pop(&plug->mq_list); + struct request *rq = rq_list_pop(rqs); if (!this_hctx) { this_hctx = rq->mq_hctx; @@ -2844,9 +2899,9 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) } list_add_tail(&rq->queuelist, &list); depth++; - } while (!rq_list_empty(&plug->mq_list)); + } while (!rq_list_empty(rqs)); - plug->mq_list = requeue_list; + *rqs = requeue_list; trace_block_unplug(this_hctx->queue, depth, !from_sched); percpu_ref_get(&this_hctx->queue->q_usage_counter); @@ -2866,9 +2921,21 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) percpu_ref_put(&this_hctx->queue->q_usage_counter); } +static void blk_mq_dispatch_multiple_queue_requests(struct rq_list *rqs) +{ + do { + struct rq_list queue_rqs; + unsigned depth; + + depth = blk_mq_extract_queue_requests(rqs, &queue_rqs); + blk_mq_dispatch_queue_requests(&queue_rqs, depth); + while (!rq_list_empty(&queue_rqs)) + blk_mq_dispatch_list(&queue_rqs, false); + } while (!rq_list_empty(rqs)); +} + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - struct request *rq; unsigned int depth; /* @@ -2883,34 +2950,19 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) depth = plug->rq_count; plug->rq_count = 0; - if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { - struct request_queue *q; - - rq = rq_list_peek(&plug->mq_list); - q = rq->q; - trace_block_unplug(q, depth, true); - - /* - * Peek first request and see if we have a ->queue_rqs() hook. - * If we do, we can dispatch the whole plug list in one go. We - * already know at this point that all requests belong to the - * same queue, caller must ensure that's the case. - */ - if (q->mq_ops->queue_rqs) { - blk_mq_run_dispatch_ops(q, - __blk_mq_flush_plug_list(q, plug)); - if (rq_list_empty(&plug->mq_list)) - return; + if (!plug->has_elevator && !from_schedule) { + if (plug->multiple_queues) { + blk_mq_dispatch_multiple_queue_requests(&plug->mq_list); + return; } - blk_mq_run_dispatch_ops(q, - blk_mq_plug_issue_direct(plug)); + blk_mq_dispatch_queue_requests(&plug->mq_list, depth); if (rq_list_empty(&plug->mq_list)) return; } do { - blk_mq_dispatch_plug_list(plug, from_schedule); + blk_mq_dispatch_list(&plug->mq_list, from_schedule); } while (!rq_list_empty(&plug->mq_list)); } @@ -2961,13 +3013,18 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q, static struct request *blk_mq_get_new_requests(struct request_queue *q, struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs) + struct bio *bio) { struct blk_mq_alloc_data data = { .q = q, - .nr_tags = 1, + .flags = 0, + .shallow_depth = 0, .cmd_flags = bio->bi_opf, + .rq_flags = 0, + .nr_tags = 1, + .cached_rqs = NULL, + .ctx = NULL, + .hctx = NULL }; struct request *rq; @@ -2980,12 +3037,9 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, } rq = __blk_mq_alloc_requests(&data); - if (rq) - return rq; - rq_qos_cleanup(q, bio); - if (bio->bi_opf & REQ_NOWAIT) - bio_wouldblock_error(bio); - return NULL; + if (unlikely(!rq)) + rq_qos_cleanup(q, bio); + return rq; } /* @@ -3080,8 +3134,6 @@ void blk_mq_submit_bio(struct bio *bio) goto new_request; } - bio = blk_queue_bounce(bio, q); - /* * The cached request already holds a q_usage_counter reference and we * don't have to acquire a new one if we use it. @@ -3117,16 +3169,21 @@ void blk_mq_submit_bio(struct bio *bio) if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) goto queue_exit; - if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs)) - goto queue_exit; + if (bio_needs_zone_write_plugging(bio)) { + if (blk_zone_plug_bio(bio, nr_segs)) + goto queue_exit; + } new_request: - if (!rq) { - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) - goto queue_exit; - } else { + if (rq) { blk_mq_use_cached_rq(rq, plug, bio); + } else { + rq = blk_mq_get_new_requests(q, plug, bio); + if (unlikely(!rq)) { + if (bio->bi_opf & REQ_NOWAIT) + bio_wouldblock_error(bio); + goto queue_exit; + } } trace_block_getrq(bio); @@ -3310,6 +3367,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, rq->special_vec = rq_src->special_vec; } rq->nr_phys_segments = rq_src->nr_phys_segments; + rq->nr_integrity_segments = rq_src->nr_integrity_segments; if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) goto free_and_out; @@ -3479,8 +3537,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, - BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); + tags = blk_mq_init_tags(nr_tags, reserved_tags, set->flags, node); if (!tags) return NULL; @@ -4220,13 +4277,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, bool shared) { struct request_queue *q; + unsigned int memflags; lockdep_assert_held(&set->tag_list_lock); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); queue_set_hctx_shared(q, shared); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } } @@ -4456,14 +4514,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( return NULL; } -static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, - struct request_queue *q) +static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, + struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i, j; - /* protect against switching io scheduler */ - mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { int old_node; int node = blk_mq_get_hctx_node(set, i); @@ -4496,7 +4552,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); - mutex_unlock(&q->sysfs_lock); +} + +static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, + struct request_queue *q) +{ + __blk_mq_realloc_hw_ctxs(set, q); /* unregister cpuhp callbacks for exited hctxs */ blk_mq_remove_hw_queues_cpuhp(q); @@ -4545,8 +4606,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->nr_requests = set->queue_depth; blk_mq_init_cpu_queues(q, set->nr_hw_queues); - blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q); + blk_mq_add_queue_tag_set(set, q); return 0; err_hctxs: @@ -4766,6 +4827,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) goto out_free_srcu; } + init_rwsem(&set->update_nr_hwq_lock); + ret = -ENOMEM; set->tags = kcalloc_node(set->nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, @@ -4906,90 +4969,67 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) } /* - * request_queue and elevator_type pair. - * It is just used by __blk_mq_update_nr_hw_queues to cache - * the elevator_type associated with a request_queue. + * Switch back to the elevator type stored in the xarray. */ -struct blk_mq_qe_pair { - struct list_head node; - struct request_queue *q; - struct elevator_type *type; -}; - -/* - * Cache the elevator_type in qe pair list and switch the - * io scheduler to 'none' - */ -static bool blk_mq_elv_switch_none(struct list_head *head, - struct request_queue *q) +static void blk_mq_elv_switch_back(struct request_queue *q, + struct xarray *elv_tbl) { - struct blk_mq_qe_pair *qe; + struct elevator_type *e = xa_load(elv_tbl, q->id); - qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); - if (!qe) - return false; - - /* q->elevator needs protection from ->sysfs_lock */ - mutex_lock(&q->sysfs_lock); - - /* the check has to be done with holding sysfs_lock */ - if (!q->elevator) { - kfree(qe); - goto unlock; - } + /* The elv_update_nr_hw_queues unfreezes the queue. */ + elv_update_nr_hw_queues(q, e); - INIT_LIST_HEAD(&qe->node); - qe->q = q; - qe->type = q->elevator->type; - /* keep a reference to the elevator module as we'll switch back */ - __elevator_get(qe->type); - list_add(&qe->node, head); - elevator_disable(q); -unlock: - mutex_unlock(&q->sysfs_lock); - - return true; + /* Drop the reference acquired in blk_mq_elv_switch_none. */ + if (e) + elevator_put(e); } -static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head, - struct request_queue *q) +/* + * Stores elevator type in xarray and set current elevator to none. It uses + * q->id as an index to store the elevator type into the xarray. + */ +static int blk_mq_elv_switch_none(struct request_queue *q, + struct xarray *elv_tbl) { - struct blk_mq_qe_pair *qe; + int ret = 0; - list_for_each_entry(qe, head, node) - if (qe->q == q) - return qe; + lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock); - return NULL; -} + /* + * Accessing q->elevator without holding q->elevator_lock is safe here + * because we're called from nr_hw_queue update which is protected by + * set->update_nr_hwq_lock in the writer context. So, scheduler update/ + * switch code (which acquires the same lock in the reader context) + * can't run concurrently. + */ + if (q->elevator) { -static void blk_mq_elv_switch_back(struct list_head *head, - struct request_queue *q) -{ - struct blk_mq_qe_pair *qe; - struct elevator_type *t; + ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL); + if (WARN_ON_ONCE(ret)) + return ret; - qe = blk_lookup_qe_pair(head, q); - if (!qe) - return; - t = qe->type; - list_del(&qe->node); - kfree(qe); + /* + * Before we switch elevator to 'none', take a reference to + * the elevator module so that while nr_hw_queue update is + * running, no one can remove elevator module. We'd put the + * reference to elevator module later when we switch back + * elevator. + */ + __elevator_get(q->elevator->type); - mutex_lock(&q->sysfs_lock); - elevator_switch(q, t); - /* drop the reference acquired in blk_mq_elv_switch_none */ - elevator_put(t); - mutex_unlock(&q->sysfs_lock); + elevator_set_none(q); + } + return ret; } static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) { struct request_queue *q; - LIST_HEAD(head); int prev_nr_hw_queues = set->nr_hw_queues; + unsigned int memflags; int i; + struct xarray elv_tbl; lockdep_assert_held(&set->tag_list_lock); @@ -5000,29 +5040,34 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; + memflags = memalloc_noio_save(); + + xa_init(&elv_tbl); + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_debugfs_unregister_hctxs(q); + blk_mq_sysfs_unregister_hctxs(q); + } + list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue(q); + blk_mq_freeze_queue_nomemsave(q); + /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done * updating the new sw to hw queue mappings. */ list_for_each_entry(q, &set->tag_list, tag_set_list) - if (!blk_mq_elv_switch_none(&head, q)) + if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; - list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_debugfs_unregister_hctxs(q); - blk_mq_sysfs_unregister_hctxs(q); - } - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) - goto reregister; + goto switch_back; fallback: blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_realloc_hw_ctxs(set, q); + __blk_mq_realloc_hw_ctxs(set, q); if (q->nr_hw_queues != set->nr_hw_queues) { int i = prev_nr_hw_queues; @@ -5037,19 +5082,22 @@ fallback: } blk_mq_map_swqueue(q); } +switch_back: + /* The blk_mq_elv_switch_back unfreezes queue for us. */ + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_elv_switch_back(q, &elv_tbl); -reregister: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); blk_mq_debugfs_register_hctxs(q); + + blk_mq_remove_hw_queues_cpuhp(q); + blk_mq_add_hw_queues_cpuhp(q); } -switch_back: - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_elv_switch_back(&head, q); + xa_destroy(&elv_tbl); - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_unfreeze_queue(q); + memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) @@ -5058,9 +5106,11 @@ switch_back: void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) { + down_write(&set->update_nr_hwq_lock); mutex_lock(&set->tag_list_lock); __blk_mq_update_nr_hw_queues(set, nr_hw_queues); mutex_unlock(&set->tag_list_lock); + up_write(&set->update_nr_hwq_lock); } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); |