diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 12 | ||||
-rw-r--r-- | block/blk-core.c | 41 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 181 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 25 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 40 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 3 | ||||
-rw-r--r-- | block/blk-mq.c | 140 | ||||
-rw-r--r-- | block/blk-mq.h | 4 | ||||
-rw-r--r-- | block/blk-stat.c | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 114 | ||||
-rw-r--r-- | block/genhd.c | 37 | ||||
-rw-r--r-- | block/sed-opal.c | 10 |
13 files changed, 366 insertions, 247 deletions
diff --git a/block/bio.c b/block/bio.c index 5eec5e08417f..e75878f8b14a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) bio_list_init(&punt); bio_list_init(&nopunt); - while ((bio = bio_list_pop(current->bio_list))) + while ((bio = bio_list_pop(¤t->bio_list[0]))) bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[0] = nopunt; - *current->bio_list = nopunt; + bio_list_init(&nopunt); + while ((bio = bio_list_pop(¤t->bio_list[1]))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[1] = nopunt; spin_lock(&bs->rescue_lock); bio_list_merge(&bs->rescue_list, &punt); @@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) * we retry with the original gfp_flags. */ - if (current->bio_list && !bio_list_empty(current->bio_list)) + if (current->bio_list && + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))) gfp_mask &= ~__GFP_DIRECT_RECLAIM; p = mempool_alloc(bs->bio_pool, gfp_mask); diff --git a/block/blk-core.c b/block/blk-core.c index 1086dac8724c..d772c221cc17 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -578,8 +578,6 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - put_disk_devt(q->disk_devt); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } @@ -1975,7 +1973,14 @@ end_io: */ blk_qc_t generic_make_request(struct bio *bio) { - struct bio_list bio_list_on_stack; + /* + * bio_list_on_stack[0] contains bios submitted by the current + * make_request_fn. + * bio_list_on_stack[1] contains bios that were submitted before + * the current make_request_fn, but that haven't been processed + * yet. + */ + struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) @@ -1992,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio) * should be added at the tail */ if (current->bio_list) { - bio_list_add(current->bio_list, bio); + bio_list_add(¤t->bio_list[0], bio); goto out; } @@ -2011,23 +2016,39 @@ blk_qc_t generic_make_request(struct bio *bio) * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); - bio_list_init(&bio_list_on_stack); - current->bio_list = &bio_list_on_stack; + bio_list_init(&bio_list_on_stack[0]); + current->bio_list = bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { + struct bio_list lower, same; + + /* Create a fresh bio_list for all subordinate requests */ + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); blk_queue_exit(q); - bio = bio_list_pop(current->bio_list); + /* sort new bios into those for a lower level + * and those for the same level + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) + if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + /* now assemble so we handle the lowest level first */ + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); } else { - struct bio *bio_next = bio_list_pop(current->bio_list); - bio_io_error(bio); - bio = bio_next; } + bio = bio_list_pop(&bio_list_on_stack[0]); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 09af8ff18719..c974a1bbf4cb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -171,7 +171,8 @@ void blk_mq_sched_put_request(struct request *rq) void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { - struct elevator_queue *e = hctx->queue->elevator; + struct request_queue *q = hctx->queue; + struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; bool did_work = false; LIST_HEAD(rq_list); @@ -203,10 +204,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart_hctx(hctx); - did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); + did_work = blk_mq_dispatch_rq_list(q, &rq_list); } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); - blk_mq_dispatch_rq_list(hctx, &rq_list); + blk_mq_dispatch_rq_list(q, &rq_list); } /* @@ -222,7 +223,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) if (!rq) break; list_add(&rq->queuelist, &rq_list); - } while (blk_mq_dispatch_rq_list(hctx, &rq_list)); + } while (blk_mq_dispatch_rq_list(q, &rq_list)); } } @@ -317,25 +318,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) + if (blk_mq_hctx_has_pending(hctx)) { blk_mq_run_hw_queue(hctx, true); + return true; + } } + return false; } -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - unsigned int i; +/** + * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list + * @pos: loop cursor. + * @skip: the list element that will not be examined. Iteration starts at + * @skip->next. + * @head: head of the list to examine. This list must have at least one + * element, namely @skip. + * @member: name of the list_head structure within typeof(*pos). + */ +#define list_for_each_entry_rcu_rr(pos, skip, head, member) \ + for ((pos) = (skip); \ + (pos = (pos)->member.next != (head) ? list_entry_rcu( \ + (pos)->member.next, typeof(*pos), member) : \ + list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ + (pos) != (skip); ) - if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_restart_hctx(hctx); +/* + * Called after a driver tag has been freed to check whether a hctx needs to + * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware + * queues in a round-robin fashion if the tag set of @hctx is shared with other + * hardware queues. + */ +void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) +{ + struct blk_mq_tags *const tags = hctx->tags; + struct blk_mq_tag_set *const set = hctx->queue->tag_set; + struct request_queue *const queue = hctx->queue, *q; + struct blk_mq_hw_ctx *hctx2; + unsigned int i, j; + + if (set->flags & BLK_MQ_F_TAG_SHARED) { + rcu_read_lock(); + list_for_each_entry_rcu_rr(q, queue, &set->tag_list, + tag_set_list) { + queue_for_each_hw_ctx(q, hctx2, i) + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + goto done; + } + j = hctx->queue_num + 1; + for (i = 0; i < queue->nr_hw_queues; i++, j++) { + if (j == queue->nr_hw_queues) + j = 0; + hctx2 = queue->queue_hw_ctx[j]; + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + break; } +done: + rcu_read_unlock(); } else { blk_mq_sched_restart_hctx(hctx); } @@ -431,11 +475,67 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, } } -int blk_mq_sched_setup(struct request_queue *q) +static int blk_mq_sched_alloc_tags(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct blk_mq_tag_set *set = q->tag_set; + int ret; + + hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, + set->reserved_tags); + if (!hctx->sched_tags) + return -ENOMEM; + + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); + if (ret) + blk_mq_sched_free_tags(set, hctx, hctx_idx); + + return ret; +} + +static void blk_mq_sched_tags_teardown(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; - int ret, i; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_free_tags(set, hctx, i); +} + +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return 0; + + return blk_mq_sched_alloc_tags(q, hctx, hctx_idx); +} + +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return; + + blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); +} + +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + int ret; + + if (!e) { + q->elevator = NULL; + return 0; + } /* * Default to 256, since we don't split into sync/async like the @@ -443,49 +543,30 @@ int blk_mq_sched_setup(struct request_queue *q) */ q->nr_requests = 2 * BLKDEV_MAX_RQ; - /* - * We're switching to using an IO scheduler, so setup the hctx - * scheduler tags and switch the request map from the regular - * tags to scheduler tags. First allocate what we need, so we - * can safely fail and fallback, if needed. - */ - ret = 0; queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags = blk_mq_alloc_rq_map(set, i, - q->nr_requests, set->reserved_tags); - if (!hctx->sched_tags) { - ret = -ENOMEM; - break; - } - ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); + ret = blk_mq_sched_alloc_tags(q, hctx, i); if (ret) - break; + goto err; } - /* - * If we failed, free what we did allocate - */ - if (ret) { - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->sched_tags) - continue; - blk_mq_sched_free_tags(set, hctx, i); - } - - return ret; - } + ret = e->ops.mq.init_sched(q, e); + if (ret) + goto err; return 0; + +err: + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; + return ret; } -void blk_mq_sched_teardown(struct request_queue *q) +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) { - struct blk_mq_tag_set *set = q->tag_set; - struct blk_mq_hw_ctx *hctx; - int i; - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_free_tags(set, hctx, i); + if (e->type->ops.mq.exit_sched) + e->type->ops.mq.exit_sched(e); + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; } int blk_mq_sched_init(struct request_queue *q) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index a75b16b123f7..3a9e6e40558b 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -19,7 +19,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); +void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async, bool can_block); @@ -32,8 +32,13 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, struct list_head *rq_list, struct request *(*get_rq)(struct blk_mq_hw_ctx *)); -int blk_mq_sched_setup(struct request_queue *q); -void blk_mq_sched_teardown(struct request_queue *q); +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); + +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); int blk_mq_sched_init(struct request_queue *q); @@ -131,20 +136,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } -/* - * Mark a hardware queue and the request queue it belongs to as needing a - * restart. - */ -static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 295e69670c39..d745ab81033a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -17,6 +17,15 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { } +static void blk_mq_hw_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, + kobj); + free_cpumask_var(hctx->cpumask); + kfree(hctx->ctxs); + kfree(hctx); +} + struct blk_mq_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_ctx *, char *); @@ -200,7 +209,7 @@ static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -242,24 +251,15 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; + int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - hctx_for_each_ctx(hctx, ctx, j) - kobject_put(&ctx->kobj); - - kobject_put(&hctx->kobj); - } - blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); - kobject_put(&q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -277,7 +277,19 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_deinit(struct request_queue *q) +{ + struct blk_mq_ctx *ctx; + int cpu; + + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(q->queue_ctx, cpu); + kobject_put(&ctx->kobj); + } + kobject_put(&q->mq_kobj); +} + +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -297,8 +309,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e48bc2c72615..9d97bfc4d465 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i]; + if (!tags) + continue; + for (j = 0; j < tags->nr_tags; j++) { if (!tags->static_rqs[j]) continue; diff --git a/block/blk-mq.c b/block/blk-mq.c index b2fd175e84d7..572966f49596 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -321,7 +321,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); - blk_mq_put_ctx(alloc_data.ctx); blk_queue_exit(q); if (!rq) @@ -349,7 +348,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) blk_mq_sched_completed_request(hctx, rq); - blk_mq_sched_restart_queues(hctx); + blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -697,17 +696,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, { struct blk_mq_timeout_data *data = priv; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { - /* - * If a request wasn't started before the queue was - * marked dying, kill it here or it'll go unnoticed. - */ - if (unlikely(blk_queue_dying(rq->q))) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - } + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; - } if (time_after_eq(jiffies, rq->deadline)) { if (!blk_mark_rq_complete(rq)) @@ -855,12 +845,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, }; - if (rq->tag != -1) { -done: - if (hctx) - *hctx = data.hctx; - return true; - } + if (rq->tag != -1) + goto done; if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; @@ -872,10 +858,12 @@ done: atomic_inc(&data.hctx->nr_active); } data.hctx->tags->rqs[rq->tag] = rq; - goto done; } - return false; +done: + if (hctx) + *hctx = data.hctx; + return rq->tag != -1; } static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, @@ -972,13 +960,16 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) return true; } -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) +bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) { - struct request_queue *q = hctx->queue; + struct blk_mq_hw_ctx *hctx; struct request *rq; LIST_HEAD(driver_list); struct list_head *dptr; - int queued, ret = BLK_MQ_RQ_QUEUE_OK; + int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; + + if (list_empty(list)) + return false; /* * Start off with dptr being NULL, so we start the first request @@ -989,8 +980,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) /* * Now process all the entries, sending them to the driver. */ - queued = 0; - while (!list_empty(list)) { + errors = queued = 0; + do { struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); @@ -1046,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) default: pr_err("blk-mq: bad return on queue: %d\n", ret); case BLK_MQ_RQ_QUEUE_ERROR: + errors++; rq->errors = -EIO; blk_mq_end_request(rq, rq->errors); break; @@ -1060,7 +1052,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) */ if (!dptr && list->next != list->prev) dptr = &driver_list; - } + } while (!list_empty(list)); hctx->dispatched[queued_to_index(queued)]++; @@ -1097,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) blk_mq_run_hw_queue(hctx, true); } - return queued != 0; + return (queued + errors) != 0; } static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) @@ -1143,7 +1135,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) return hctx->next_cpu; } -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, + unsigned long msecs) { if (unlikely(blk_mq_hctx_stopped(hctx) || !blk_mq_hw_queue_mapped(hctx))) @@ -1160,7 +1153,24 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) put_cpu(); } - kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); + if (msecs == 0) + kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->run_work); + else + kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->delayed_run_work, + msecs_to_jiffies(msecs)); +} + +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) +{ + __blk_mq_delay_run_hw_queue(hctx, true, msecs); +} +EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); + +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +{ + __blk_mq_delay_run_hw_queue(hctx, async, 0); } void blk_mq_run_hw_queues(struct request_queue *q, bool async) @@ -1263,6 +1273,15 @@ static void blk_mq_run_work_fn(struct work_struct *work) __blk_mq_run_hw_queue(hctx); } +static void blk_mq_delayed_run_work_fn(struct work_struct *work) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work); + + __blk_mq_run_hw_queue(hctx); +} + static void blk_mq_delay_work_fn(struct work_struct *work) { struct blk_mq_hw_ctx *hctx; @@ -1434,7 +1453,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) +static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, + bool may_sleep) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1475,7 +1495,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) } insert: - blk_mq_sched_insert_request(rq, false, true, true, false); + blk_mq_sched_insert_request(rq, false, true, false, may_sleep); } /* @@ -1569,11 +1589,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, false); rcu_read_unlock(); } else { srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, true); srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); } goto done; @@ -1931,6 +1951,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, hctx->fq->flush_rq, hctx_idx, flush_start_tag + hctx_idx); + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); + if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -1955,16 +1977,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static void blk_mq_free_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - free_cpumask_var(hctx->cpumask); -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -1977,6 +1989,7 @@ static int blk_mq_init_hctx(struct request_queue *q, node = hctx->numa_node = set->numa_node; INIT_WORK(&hctx->run_work, blk_mq_run_work_fn); + INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn); INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); @@ -2007,9 +2020,12 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; + if (blk_mq_sched_init_hctx(q, hctx, hctx_idx)) + goto exit_hctx; + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); if (!hctx->fq) - goto exit_hctx; + goto sched_exit_hctx; if (set->ops->init_request && set->ops->init_request(set->driver_data, @@ -2024,6 +2040,8 @@ static int blk_mq_init_hctx(struct request_queue *q, free_fq: kfree(hctx->fq); + sched_exit_hctx: + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -2045,7 +2063,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -2251,21 +2268,23 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned int i; - blk_mq_sched_teardown(q); - /* hctx kobj stays in hctx */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) continue; - kfree(hctx->ctxs); - kfree(hctx); + kobject_put(&hctx->kobj); } q->mq_map = NULL; kfree(q->queue_hw_ctx); - /* ctx kobj stays in queue_ctx */ + /* + * release .mq_kobj and sw queue's kobject now because + * both share lifetime with request queue. + */ + blk_mq_sysfs_deinit(q); + free_percpu(q->queue_ctx); } @@ -2330,10 +2349,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); - kfree(hctx->ctxs); - kfree(hctx); hctxs[j] = NULL; } @@ -2352,6 +2368,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) @@ -2442,7 +2461,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); - blk_mq_free_hw_queues(q, set); } /* Basically redo blk_mq_init_queue with queue frozen */ @@ -2580,6 +2598,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; } +static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) +{ + if (set->ops->map_queues) + return set->ops->map_queues(set); + else + return blk_mq_map_queues(set); +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -2634,10 +2660,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->mq_map) goto out_free_tags; - if (set->ops->map_queues) - ret = set->ops->map_queues(set); - else - ret = blk_mq_map_queues(set); + ret = blk_mq_update_queue_map(set); if (ret) goto out_free_mq_map; @@ -2729,6 +2752,7 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) blk_mq_freeze_queue(q); set->nr_hw_queues = nr_hw_queues; + blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); diff --git a/block/blk-mq.h b/block/blk-mq.h index 088ced003c13..660a17e1d033 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -31,7 +31,7 @@ void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); +bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, @@ -77,6 +77,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); +extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); diff --git a/block/blk-stat.c b/block/blk-stat.c index 9b43efb8933f..186fcb981e9b 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -30,11 +30,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat) static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { + blk_stat_flush_batch(src); + if (!src->nr_samples) return; - blk_stat_flush_batch(src); - dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c44b321335f3..37f0b3ad635e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -816,7 +816,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->elevator) { ioc_clear_queue(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); } blk_exit_rl(&q->root_rl); diff --git a/block/elevator.c b/block/elevator.c index 01139f549b5b..dbeecf7be719 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -242,26 +242,21 @@ int elevator_init(struct request_queue *q, char *name) } } - if (e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = e->ops.mq.init_sched(q, e); - } else + if (e->uses_mq) + err = blk_mq_init_sched(q, e); + else err = e->ops.sq.elevator_init_fn(q, e); - if (err) { - if (e->uses_mq) - blk_mq_sched_teardown(q); + if (err) elevator_put(e); - } return err; } EXPORT_SYMBOL(elevator_init); -void elevator_exit(struct elevator_queue *e) +void elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); if (e->uses_mq && e->type->ops.mq.exit_sched) - e->type->ops.mq.exit_sched(e); + blk_mq_exit_sched(q, e); else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); @@ -946,6 +941,45 @@ void elv_unregister(struct elevator_type *e) } EXPORT_SYMBOL_GPL(elv_unregister); +static int elevator_switch_mq(struct request_queue *q, + struct elevator_type *new_e) +{ + int ret; + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + if (q->elevator) { + if (q->elevator->registered) + elv_unregister_queue(q); + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + } + + ret = blk_mq_init_sched(q, new_e); + if (ret) + goto out; + + if (new_e) { + ret = elv_register_queue(q); + if (ret) { + elevator_exit(q, q->elevator); + goto out; + } + } + + if (new_e) + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + else + blk_add_trace_msg(q, "elv switch: none"); + +out: + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + return ret; + +} + /* * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we @@ -958,10 +992,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) bool old_registered = false; int err; - if (q->mq_ops) { - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - } + if (q->mq_ops) + return elevator_switch_mq(q, new_e); /* * Turn on BYPASS and drain all requests w/ elevator private data. @@ -973,11 +1005,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (old) { old_registered = old->registered; - if (old->uses_mq) - blk_mq_sched_teardown(q); - - if (!q->mq_ops) - blk_queue_bypass_start(q); + blk_queue_bypass_start(q); /* unregister and clear all auxiliary data of the old elevator */ if (old_registered) @@ -987,56 +1015,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) } /* allocate, init and register new elevator */ - if (new_e) { - if (new_e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = new_e->ops.mq.init_sched(q, new_e); - } else - err = new_e->ops.sq.elevator_init_fn(q, new_e); - if (err) - goto fail_init; + err = new_e->ops.sq.elevator_init_fn(q, new_e); + if (err) + goto fail_init; - err = elv_register_queue(q); - if (err) - goto fail_register; - } else - q->elevator = NULL; + err = elv_register_queue(q); + if (err) + goto fail_register; /* done, kill the old one and finish */ if (old) { - elevator_exit(old); - if (!q->mq_ops) - blk_queue_bypass_end(q); + elevator_exit(q, old); + blk_queue_bypass_end(q); } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); - } - - if (new_e) - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); - else - blk_add_trace_msg(q, "elv switch: none"); + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); return 0; fail_register: - if (q->mq_ops) - blk_mq_sched_teardown(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); fail_init: /* switch failed, restore and re-register old elevator */ if (old) { q->elevator = old; elv_register_queue(q); - if (!q->mq_ops) - blk_queue_bypass_end(q); - } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); + blk_queue_bypass_end(q); } return err; diff --git a/block/genhd.c b/block/genhd.c index b26a5ea115d0..a9c516a8b37d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,20 +572,6 @@ exit: disk_part_iter_exit(&piter); } -void put_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt && atomic_dec_and_test(&disk_devt->count)) - disk_devt->release(disk_devt); -} -EXPORT_SYMBOL(put_disk_devt); - -void get_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt) - atomic_inc(&disk_devt->count); -} -EXPORT_SYMBOL(get_disk_devt); - /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); - /* - * Take a reference on the devt and assign it to queue since it - * must not be reallocated while the bdi is registered - */ - disk->queue->disk_devt = disk->disk_devt; - get_disk_devt(disk->disk_devt); - /* Register BDI before referencing it from bdev */ bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); @@ -681,12 +660,16 @@ void del_gendisk(struct gendisk *disk) disk->flags &= ~GENHD_FL_UP; sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - /* - * Unregister bdi before releasing device numbers (as they can get - * reused and we'd get clashes in sysfs). - */ - bdi_unregister(disk->queue->backing_dev_info); - blk_unregister_queue(disk); + if (disk->queue) { + /* + * Unregister bdi before releasing device numbers (as they can + * get reused and we'd get clashes in sysfs). + */ + bdi_unregister(disk->queue->backing_dev_info); + blk_unregister_queue(disk); + } else { + WARN_ON(1); + } blk_unregister_region(disk_devt(disk), disk->minors); part_stat_set_all(&disk->part0, 0); diff --git a/block/sed-opal.c b/block/sed-opal.c index 1e18dca360fc..14035f826b5e 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) static int gen_key(struct opal_dev *dev, void *data) { - const u8 *method; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data) set_comid(dev, dev->comid); memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); - method = opalmethod[OPAL_GENKEY]; kfree(dev->prev_data); dev->prev_data = NULL; @@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) static int lock_unlock_locking_range(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; - const u8 *method; struct opal_lock_unlock *lkul = data; u8 read_locked = 1, write_locked = 1; int err = 0; @@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 read_locked = 1, write_locked = 1; - const u8 *method; struct opal_lock_unlock *lkul = data; int ret; clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, pr_err("Locking state was not RO or RW\n"); return -EINVAL; } - if (lk_unlk->session.who < OPAL_USER1 && + if (lk_unlk->session.who < OPAL_USER1 || lk_unlk->session.who > OPAL_USER9) { pr_err("Authority was not within the range of users: %d\n", lk_unlk->session.who); @@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev, int ret; /* We can't activate Admin1 it's active as manufactured */ - if (opal_session->who < OPAL_USER1 && + if (opal_session->who < OPAL_USER1 || opal_session->who > OPAL_USER9) { pr_err("Who was not a valid user: %d\n", opal_session->who); return -EINVAL; |