summaryrefslogtreecommitdiff
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c398
1 files changed, 224 insertions, 174 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 23968c02be0d..32d11305d51b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -89,7 +89,7 @@ struct mq_inflight {
unsigned int inflight[2];
};
-static bool blk_mq_check_inflight(struct request *rq, void *priv)
+static bool blk_mq_check_in_driver(struct request *rq, void *priv)
{
struct mq_inflight *mi = priv;
@@ -101,24 +101,14 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv)
return true;
}
-unsigned int blk_mq_in_flight(struct request_queue *q,
- struct block_device *part)
+void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2])
{
struct mq_inflight mi = { .part = part };
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
-
- return mi.inflight[0] + mi.inflight[1];
-}
-
-void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
- unsigned int inflight[2])
-{
- struct mq_inflight mi = { .part = part };
-
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
- inflight[0] = mi.inflight[0];
- inflight[1] = mi.inflight[1];
+ blk_mq_queue_tag_busy_iter(bdev_get_queue(part), blk_mq_check_in_driver,
+ &mi);
+ inflight[READ] = mi.inflight[READ];
+ inflight[WRITE] = mi.inflight[WRITE];
}
#ifdef CONFIG_LOCKDEP
@@ -131,6 +121,10 @@ static bool blk_freeze_set_owner(struct request_queue *q,
if (!q->mq_freeze_depth) {
q->mq_freeze_owner = owner;
q->mq_freeze_owner_depth = 1;
+ q->mq_freeze_disk_dead = !q->disk ||
+ test_bit(GD_DEAD, &q->disk->state) ||
+ !blk_queue_registered(q);
+ q->mq_freeze_queue_dying = blk_queue_dying(q);
return true;
}
@@ -142,8 +136,6 @@ static bool blk_freeze_set_owner(struct request_queue *q,
/* verify the last unfreeze in owner context */
static bool blk_unfreeze_check_owner(struct request_queue *q)
{
- if (!q->mq_freeze_owner)
- return false;
if (q->mq_freeze_owner != current)
return false;
if (--q->mq_freeze_owner_depth == 0) {
@@ -189,7 +181,7 @@ bool __blk_freeze_queue_start(struct request_queue *q,
void blk_freeze_queue_start(struct request_queue *q)
{
if (__blk_freeze_queue_start(q, current))
- blk_freeze_acquire_lock(q, false, false);
+ blk_freeze_acquire_lock(q);
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
@@ -208,12 +200,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
-void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue_nomemsave(struct request_queue *q)
{
blk_freeze_queue_start(q);
blk_mq_freeze_queue_wait(q);
}
-EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave);
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
@@ -234,12 +226,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
return unfreeze;
}
-void blk_mq_unfreeze_queue(struct request_queue *q)
+void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q)
{
if (__blk_mq_unfreeze_queue(q, false))
- blk_unfreeze_release_lock(q, false, false);
+ blk_unfreeze_release_lock(q);
}
-EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore);
/*
* non_owner variant of blk_freeze_queue_start
@@ -506,7 +498,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
retry:
data->ctx = blk_mq_get_ctx(q);
- data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
+ data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
if (q->elevator) {
/*
@@ -582,9 +574,13 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
+ .shallow_depth = 0,
.cmd_flags = opf,
+ .rq_flags = 0,
.nr_tags = plug->nr_ios,
.cached_rqs = &plug->cached_rqs,
+ .ctx = NULL,
+ .hctx = NULL
};
struct request *rq;
@@ -644,8 +640,13 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
+ .shallow_depth = 0,
.cmd_flags = opf,
+ .rq_flags = 0,
.nr_tags = 1,
+ .cached_rqs = NULL,
+ .ctx = NULL,
+ .hctx = NULL
};
int ret;
@@ -673,8 +674,13 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
+ .shallow_depth = 0,
.cmd_flags = opf,
+ .rq_flags = 0,
.nr_tags = 1,
+ .cached_rqs = NULL,
+ .ctx = NULL,
+ .hctx = NULL
};
u64 alloc_time_ns = 0;
struct request *rq;
@@ -2078,7 +2084,7 @@ static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
* Returns true if we did some work AND can potentially do more.
*/
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
- unsigned int nr_budgets)
+ bool get_budget)
{
enum prep_dispatch prep;
struct request_queue *q = hctx->queue;
@@ -2100,7 +2106,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
rq = list_first_entry(list, struct request, queuelist);
WARN_ON_ONCE(hctx != rq->mq_hctx);
- prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
+ prep = blk_mq_prep_dispatch_rq(rq, get_budget);
if (prep != PREP_DISPATCH_OK)
break;
@@ -2109,12 +2115,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
bd.rq = rq;
bd.last = list_empty(list);
- /*
- * once the request is queued to lld, no need to cover the
- * budget any more
- */
- if (nr_budgets)
- nr_budgets--;
ret = q->mq_ops->queue_rq(hctx, &bd);
switch (ret) {
case BLK_STS_OK:
@@ -2148,7 +2148,11 @@ out:
((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
blk_mq_is_shared_tags(hctx->flags));
- if (nr_budgets)
+ /*
+ * If the caller allocated budgets, free the budgets of the
+ * requests that have not yet been passed to the block driver.
+ */
+ if (!get_budget)
blk_mq_release_budgets(q, list);
spin_lock(&hctx->lock);
@@ -2656,8 +2660,10 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
if (bio->bi_opf & REQ_RAHEAD)
rq->cmd_flags |= REQ_FAILFAST_MASK;
+ rq->bio = rq->biotail = bio;
rq->__sector = bio->bi_iter.bi_sector;
- blk_rq_bio_prep(rq, bio, nr_segs);
+ rq->__data_len = bio->bi_iter.bi_size;
+ rq->nr_phys_segments = nr_segs;
if (bio_integrity(bio))
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
bio);
@@ -2774,15 +2780,15 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
return __blk_mq_issue_directly(hctx, rq, last);
}
-static void blk_mq_plug_issue_direct(struct blk_plug *plug)
+static void blk_mq_issue_direct(struct rq_list *rqs)
{
struct blk_mq_hw_ctx *hctx = NULL;
struct request *rq;
int queued = 0;
blk_status_t ret = BLK_STS_OK;
- while ((rq = rq_list_pop(&plug->mq_list))) {
- bool last = rq_list_empty(&plug->mq_list);
+ while ((rq = rq_list_pop(rqs))) {
+ bool last = rq_list_empty(rqs);
if (hctx != rq->mq_hctx) {
if (hctx) {
@@ -2813,15 +2819,64 @@ out:
blk_mq_commit_rqs(hctx, queued, false);
}
-static void __blk_mq_flush_plug_list(struct request_queue *q,
- struct blk_plug *plug)
+static void __blk_mq_flush_list(struct request_queue *q, struct rq_list *rqs)
{
if (blk_queue_quiesced(q))
return;
- q->mq_ops->queue_rqs(&plug->mq_list);
+ q->mq_ops->queue_rqs(rqs);
+}
+
+static unsigned blk_mq_extract_queue_requests(struct rq_list *rqs,
+ struct rq_list *queue_rqs)
+{
+ struct request *rq = rq_list_pop(rqs);
+ struct request_queue *this_q = rq->q;
+ struct request **prev = &rqs->head;
+ struct rq_list matched_rqs = {};
+ struct request *last = NULL;
+ unsigned depth = 1;
+
+ rq_list_add_tail(&matched_rqs, rq);
+ while ((rq = *prev)) {
+ if (rq->q == this_q) {
+ /* move rq from rqs to matched_rqs */
+ *prev = rq->rq_next;
+ rq_list_add_tail(&matched_rqs, rq);
+ depth++;
+ } else {
+ /* leave rq in rqs */
+ prev = &rq->rq_next;
+ last = rq;
+ }
+ }
+
+ rqs->tail = last;
+ *queue_rqs = matched_rqs;
+ return depth;
}
-static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
+static void blk_mq_dispatch_queue_requests(struct rq_list *rqs, unsigned depth)
+{
+ struct request_queue *q = rq_list_peek(rqs)->q;
+
+ trace_block_unplug(q, depth, true);
+
+ /*
+ * Peek first request and see if we have a ->queue_rqs() hook.
+ * If we do, we can dispatch the whole list in one go.
+ * We already know at this point that all requests belong to the
+ * same queue, caller must ensure that's the case.
+ */
+ if (q->mq_ops->queue_rqs) {
+ blk_mq_run_dispatch_ops(q, __blk_mq_flush_list(q, rqs));
+ if (rq_list_empty(rqs))
+ return;
+ }
+
+ blk_mq_run_dispatch_ops(q, blk_mq_issue_direct(rqs));
+}
+
+static void blk_mq_dispatch_list(struct rq_list *rqs, bool from_sched)
{
struct blk_mq_hw_ctx *this_hctx = NULL;
struct blk_mq_ctx *this_ctx = NULL;
@@ -2831,7 +2886,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
LIST_HEAD(list);
do {
- struct request *rq = rq_list_pop(&plug->mq_list);
+ struct request *rq = rq_list_pop(rqs);
if (!this_hctx) {
this_hctx = rq->mq_hctx;
@@ -2844,9 +2899,9 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
}
list_add_tail(&rq->queuelist, &list);
depth++;
- } while (!rq_list_empty(&plug->mq_list));
+ } while (!rq_list_empty(rqs));
- plug->mq_list = requeue_list;
+ *rqs = requeue_list;
trace_block_unplug(this_hctx->queue, depth, !from_sched);
percpu_ref_get(&this_hctx->queue->q_usage_counter);
@@ -2866,9 +2921,21 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
percpu_ref_put(&this_hctx->queue->q_usage_counter);
}
+static void blk_mq_dispatch_multiple_queue_requests(struct rq_list *rqs)
+{
+ do {
+ struct rq_list queue_rqs;
+ unsigned depth;
+
+ depth = blk_mq_extract_queue_requests(rqs, &queue_rqs);
+ blk_mq_dispatch_queue_requests(&queue_rqs, depth);
+ while (!rq_list_empty(&queue_rqs))
+ blk_mq_dispatch_list(&queue_rqs, false);
+ } while (!rq_list_empty(rqs));
+}
+
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
- struct request *rq;
unsigned int depth;
/*
@@ -2883,34 +2950,19 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
depth = plug->rq_count;
plug->rq_count = 0;
- if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
- struct request_queue *q;
-
- rq = rq_list_peek(&plug->mq_list);
- q = rq->q;
- trace_block_unplug(q, depth, true);
-
- /*
- * Peek first request and see if we have a ->queue_rqs() hook.
- * If we do, we can dispatch the whole plug list in one go. We
- * already know at this point that all requests belong to the
- * same queue, caller must ensure that's the case.
- */
- if (q->mq_ops->queue_rqs) {
- blk_mq_run_dispatch_ops(q,
- __blk_mq_flush_plug_list(q, plug));
- if (rq_list_empty(&plug->mq_list))
- return;
+ if (!plug->has_elevator && !from_schedule) {
+ if (plug->multiple_queues) {
+ blk_mq_dispatch_multiple_queue_requests(&plug->mq_list);
+ return;
}
- blk_mq_run_dispatch_ops(q,
- blk_mq_plug_issue_direct(plug));
+ blk_mq_dispatch_queue_requests(&plug->mq_list, depth);
if (rq_list_empty(&plug->mq_list))
return;
}
do {
- blk_mq_dispatch_plug_list(plug, from_schedule);
+ blk_mq_dispatch_list(&plug->mq_list, from_schedule);
} while (!rq_list_empty(&plug->mq_list));
}
@@ -2961,13 +3013,18 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,
static struct request *blk_mq_get_new_requests(struct request_queue *q,
struct blk_plug *plug,
- struct bio *bio,
- unsigned int nsegs)
+ struct bio *bio)
{
struct blk_mq_alloc_data data = {
.q = q,
- .nr_tags = 1,
+ .flags = 0,
+ .shallow_depth = 0,
.cmd_flags = bio->bi_opf,
+ .rq_flags = 0,
+ .nr_tags = 1,
+ .cached_rqs = NULL,
+ .ctx = NULL,
+ .hctx = NULL
};
struct request *rq;
@@ -2980,12 +3037,9 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
}
rq = __blk_mq_alloc_requests(&data);
- if (rq)
- return rq;
- rq_qos_cleanup(q, bio);
- if (bio->bi_opf & REQ_NOWAIT)
- bio_wouldblock_error(bio);
- return NULL;
+ if (unlikely(!rq))
+ rq_qos_cleanup(q, bio);
+ return rq;
}
/*
@@ -3080,8 +3134,6 @@ void blk_mq_submit_bio(struct bio *bio)
goto new_request;
}
- bio = blk_queue_bounce(bio, q);
-
/*
* The cached request already holds a q_usage_counter reference and we
* don't have to acquire a new one if we use it.
@@ -3117,16 +3169,21 @@ void blk_mq_submit_bio(struct bio *bio)
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
goto queue_exit;
- if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs))
- goto queue_exit;
+ if (bio_needs_zone_write_plugging(bio)) {
+ if (blk_zone_plug_bio(bio, nr_segs))
+ goto queue_exit;
+ }
new_request:
- if (!rq) {
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq))
- goto queue_exit;
- } else {
+ if (rq) {
blk_mq_use_cached_rq(rq, plug, bio);
+ } else {
+ rq = blk_mq_get_new_requests(q, plug, bio);
+ if (unlikely(!rq)) {
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
+ goto queue_exit;
+ }
}
trace_block_getrq(bio);
@@ -3310,6 +3367,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->special_vec = rq_src->special_vec;
}
rq->nr_phys_segments = rq_src->nr_phys_segments;
+ rq->nr_integrity_segments = rq_src->nr_integrity_segments;
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
goto free_and_out;
@@ -3479,8 +3537,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
if (node == NUMA_NO_NODE)
node = set->numa_node;
- tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
- BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
+ tags = blk_mq_init_tags(nr_tags, reserved_tags, set->flags, node);
if (!tags)
return NULL;
@@ -4220,13 +4277,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
bool shared)
{
struct request_queue *q;
+ unsigned int memflags;
lockdep_assert_held(&set->tag_list_lock);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
queue_set_hctx_shared(q, shared);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
}
@@ -4456,14 +4514,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
return NULL;
}
-static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
- struct request_queue *q)
+static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+ struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i, j;
- /* protect against switching io scheduler */
- mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@@ -4496,7 +4552,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
- mutex_unlock(&q->sysfs_lock);
+}
+
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+ struct request_queue *q)
+{
+ __blk_mq_realloc_hw_ctxs(set, q);
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4545,8 +4606,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->nr_requests = set->queue_depth;
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
- blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q);
+ blk_mq_add_queue_tag_set(set, q);
return 0;
err_hctxs:
@@ -4766,6 +4827,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
goto out_free_srcu;
}
+ init_rwsem(&set->update_nr_hwq_lock);
+
ret = -ENOMEM;
set->tags = kcalloc_node(set->nr_hw_queues,
sizeof(struct blk_mq_tags *), GFP_KERNEL,
@@ -4906,90 +4969,67 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
}
/*
- * request_queue and elevator_type pair.
- * It is just used by __blk_mq_update_nr_hw_queues to cache
- * the elevator_type associated with a request_queue.
+ * Switch back to the elevator type stored in the xarray.
*/
-struct blk_mq_qe_pair {
- struct list_head node;
- struct request_queue *q;
- struct elevator_type *type;
-};
-
-/*
- * Cache the elevator_type in qe pair list and switch the
- * io scheduler to 'none'
- */
-static bool blk_mq_elv_switch_none(struct list_head *head,
- struct request_queue *q)
+static void blk_mq_elv_switch_back(struct request_queue *q,
+ struct xarray *elv_tbl)
{
- struct blk_mq_qe_pair *qe;
+ struct elevator_type *e = xa_load(elv_tbl, q->id);
- qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
- if (!qe)
- return false;
-
- /* q->elevator needs protection from ->sysfs_lock */
- mutex_lock(&q->sysfs_lock);
-
- /* the check has to be done with holding sysfs_lock */
- if (!q->elevator) {
- kfree(qe);
- goto unlock;
- }
+ /* The elv_update_nr_hw_queues unfreezes the queue. */
+ elv_update_nr_hw_queues(q, e);
- INIT_LIST_HEAD(&qe->node);
- qe->q = q;
- qe->type = q->elevator->type;
- /* keep a reference to the elevator module as we'll switch back */
- __elevator_get(qe->type);
- list_add(&qe->node, head);
- elevator_disable(q);
-unlock:
- mutex_unlock(&q->sysfs_lock);
-
- return true;
+ /* Drop the reference acquired in blk_mq_elv_switch_none. */
+ if (e)
+ elevator_put(e);
}
-static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head,
- struct request_queue *q)
+/*
+ * Stores elevator type in xarray and set current elevator to none. It uses
+ * q->id as an index to store the elevator type into the xarray.
+ */
+static int blk_mq_elv_switch_none(struct request_queue *q,
+ struct xarray *elv_tbl)
{
- struct blk_mq_qe_pair *qe;
+ int ret = 0;
- list_for_each_entry(qe, head, node)
- if (qe->q == q)
- return qe;
+ lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock);
- return NULL;
-}
+ /*
+ * Accessing q->elevator without holding q->elevator_lock is safe here
+ * because we're called from nr_hw_queue update which is protected by
+ * set->update_nr_hwq_lock in the writer context. So, scheduler update/
+ * switch code (which acquires the same lock in the reader context)
+ * can't run concurrently.
+ */
+ if (q->elevator) {
-static void blk_mq_elv_switch_back(struct list_head *head,
- struct request_queue *q)
-{
- struct blk_mq_qe_pair *qe;
- struct elevator_type *t;
+ ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL);
+ if (WARN_ON_ONCE(ret))
+ return ret;
- qe = blk_lookup_qe_pair(head, q);
- if (!qe)
- return;
- t = qe->type;
- list_del(&qe->node);
- kfree(qe);
+ /*
+ * Before we switch elevator to 'none', take a reference to
+ * the elevator module so that while nr_hw_queue update is
+ * running, no one can remove elevator module. We'd put the
+ * reference to elevator module later when we switch back
+ * elevator.
+ */
+ __elevator_get(q->elevator->type);
- mutex_lock(&q->sysfs_lock);
- elevator_switch(q, t);
- /* drop the reference acquired in blk_mq_elv_switch_none */
- elevator_put(t);
- mutex_unlock(&q->sysfs_lock);
+ elevator_set_none(q);
+ }
+ return ret;
}
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int nr_hw_queues)
{
struct request_queue *q;
- LIST_HEAD(head);
int prev_nr_hw_queues = set->nr_hw_queues;
+ unsigned int memflags;
int i;
+ struct xarray elv_tbl;
lockdep_assert_held(&set->tag_list_lock);
@@ -5000,29 +5040,34 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
return;
+ memflags = memalloc_noio_save();
+
+ xa_init(&elv_tbl);
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ blk_mq_debugfs_unregister_hctxs(q);
+ blk_mq_sysfs_unregister_hctxs(q);
+ }
+
list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_freeze_queue(q);
+ blk_mq_freeze_queue_nomemsave(q);
+
/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
* updating the new sw to hw queue mappings.
*/
list_for_each_entry(q, &set->tag_list, tag_set_list)
- if (!blk_mq_elv_switch_none(&head, q))
+ if (blk_mq_elv_switch_none(q, &elv_tbl))
goto switch_back;
- list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_debugfs_unregister_hctxs(q);
- blk_mq_sysfs_unregister_hctxs(q);
- }
-
if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
- goto reregister;
+ goto switch_back;
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_realloc_hw_ctxs(set, q);
+ __blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;
@@ -5037,19 +5082,22 @@ fallback:
}
blk_mq_map_swqueue(q);
}
+switch_back:
+ /* The blk_mq_elv_switch_back unfreezes queue for us. */
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_elv_switch_back(q, &elv_tbl);
-reregister:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_sysfs_register_hctxs(q);
blk_mq_debugfs_register_hctxs(q);
+
+ blk_mq_remove_hw_queues_cpuhp(q);
+ blk_mq_add_hw_queues_cpuhp(q);
}
-switch_back:
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_elv_switch_back(&head, q);
+ xa_destroy(&elv_tbl);
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_unfreeze_queue(q);
+ memalloc_noio_restore(memflags);
/* Free the excess tags when nr_hw_queues shrink. */
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
@@ -5058,9 +5106,11 @@ switch_back:
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
{
+ down_write(&set->update_nr_hwq_lock);
mutex_lock(&set->tag_list_lock);
__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
mutex_unlock(&set->tag_list_lock);
+ up_write(&set->update_nr_hwq_lock);
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);