diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 247 |
1 files changed, 123 insertions, 124 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index ecfc47fad236..a6d4780580fc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -127,8 +127,7 @@ struct mq_inflight { unsigned int inflight[2]; }; -static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, +static bool blk_mq_check_inflight(struct request *rq, void *priv, bool reserved) { struct mq_inflight *mi = priv; @@ -260,17 +259,9 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); */ void blk_mq_wait_quiesce_done(struct request_queue *q) { - struct blk_mq_hw_ctx *hctx; - unsigned int i; - bool rcu = false; - - queue_for_each_hw_ctx(q, hctx, i) { - if (hctx->flags & BLK_MQ_F_BLOCKING) - synchronize_srcu(hctx->srcu); - else - rcu = true; - } - if (rcu) + if (blk_queue_has_srcu(q)) + synchronize_srcu(q->srcu); + else synchronize_rcu(); } EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done); @@ -394,7 +385,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, INIT_LIST_HEAD(&rq->queuelist); /* tag was already set */ WRITE_ONCE(rq->deadline, 0); - refcount_set(&rq->ref, 1); + req_ref_set(rq, 1); if (rq->rq_flags & RQF_ELV) { struct elevator_queue *e = data->q->elevator; @@ -642,7 +633,7 @@ void blk_mq_free_request(struct request *rq) rq_qos_done(q, rq); WRITE_ONCE(rq->state, MQ_RQ_IDLE); - if (refcount_dec_and_test(&rq->ref)) + if (req_ref_put_and_test(rq)) __blk_mq_free_request(rq); } EXPORT_SYMBOL_GPL(blk_mq_free_request); @@ -718,6 +709,47 @@ static void blk_print_req_error(struct request *req, blk_status_t status) IOPRIO_PRIO_CLASS(req->ioprio)); } +/* + * Fully end IO on a request. Does not support partial completions, or + * errors. + */ +static void blk_complete_request(struct request *req) +{ + const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0; + int total_bytes = blk_rq_bytes(req); + struct bio *bio = req->bio; + + trace_block_rq_complete(req, BLK_STS_OK, total_bytes); + + if (!bio) + return; + +#ifdef CONFIG_BLK_DEV_INTEGRITY + if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ) + req->q->integrity.profile->complete_fn(req, total_bytes); +#endif + + blk_account_io_completion(req, total_bytes); + + do { + struct bio *next = bio->bi_next; + + /* Completion has already been traced */ + bio_clear_flag(bio, BIO_TRACE_COMPLETION); + if (!is_flush) + bio_endio(bio); + bio = next; + } while (bio); + + /* + * Reset counters so that the request stacking driver + * can find how many bytes remain in the request + * later. + */ + req->bio = NULL; + req->__data_len = 0; +} + /** * blk_update_request - Complete multiple bytes without completing the request * @req: the request being processed @@ -931,14 +963,14 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) prefetch(rq->bio); prefetch(rq->rq_next); - blk_update_request(rq, BLK_STS_OK, blk_rq_bytes(rq)); + blk_complete_request(rq); if (iob->need_ts) __blk_mq_end_request_acct(rq, now); rq_qos_done(rq->q, rq); WRITE_ONCE(rq->state, MQ_RQ_IDLE); - if (!refcount_dec_and_test(&rq->ref)) + if (!req_ref_put_and_test(rq)) continue; blk_crypto_free_request(rq); @@ -1071,26 +1103,6 @@ void blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); -static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) - __releases(hctx->srcu) -{ - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) - rcu_read_unlock(); - else - srcu_read_unlock(hctx->srcu, srcu_idx); -} - -static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) - __acquires(hctx->srcu) -{ - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { - /* shut up gcc false positive */ - *srcu_idx = 0; - rcu_read_lock(); - } else - *srcu_idx = srcu_read_lock(hctx->srcu); -} - /** * blk_mq_start_request - Start processing a request * @rq: Pointer to request to be started @@ -1336,14 +1348,15 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, - void *priv, bool reserved) +static bool blk_mq_rq_inflight(struct request *rq, void *priv, + bool reserved) { /* - * If we find a request that isn't idle and the queue matches, - * we know the queue is busy. Return false to stop the iteration. + * If we find a request that isn't idle we know the queue is busy + * as it's checked in the iter. + * Return false to stop the iteration. */ - if (blk_mq_request_started(rq) && rq->q == hctx->queue) { + if (blk_mq_request_started(rq)) { bool *busy = priv; *busy = true; @@ -1401,12 +1414,11 @@ void blk_mq_put_rq_ref(struct request *rq) { if (is_flush_rq(rq)) rq->end_io(rq, 0); - else if (refcount_dec_and_test(&rq->ref)) + else if (req_ref_put_and_test(rq)) __blk_mq_free_request(rq); } -static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, bool reserved) +static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved) { unsigned long *next = priv; @@ -1947,19 +1959,14 @@ out: */ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { - int srcu_idx; - /* * We can't run the queue inline with ints disabled. Ensure that * we catch bad users of this early. */ WARN_ON_ONCE(in_interrupt()); - might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - - hctx_lock(hctx, &srcu_idx); - blk_mq_sched_dispatch_requests(hctx); - hctx_unlock(hctx, srcu_idx); + blk_mq_run_dispatch_ops(hctx->queue, + blk_mq_sched_dispatch_requests(hctx)); } static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) @@ -2071,7 +2078,6 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); */ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { - int srcu_idx; bool need_run; /* @@ -2082,10 +2088,9 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) * And queue will be rerun in blk_mq_unquiesce_queue() if it is * quiesced. */ - hctx_lock(hctx, &srcu_idx); - need_run = !blk_queue_quiesced(hctx->queue) && - blk_mq_hctx_has_pending(hctx); - hctx_unlock(hctx, srcu_idx); + __blk_mq_run_dispatch_ops(hctx->queue, false, + need_run = !blk_queue_quiesced(hctx->queue) && + blk_mq_hctx_has_pending(hctx)); if (need_run) __blk_mq_delay_run_hw_queue(hctx, async, 0); @@ -2488,33 +2493,18 @@ insert: static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_status_t ret; - int srcu_idx; - - might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - - hctx_lock(hctx, &srcu_idx); + blk_status_t ret = + __blk_mq_try_issue_directly(hctx, rq, false, true); - ret = __blk_mq_try_issue_directly(hctx, rq, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); - - hctx_unlock(hctx, srcu_idx); } static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) { - blk_status_t ret; - int srcu_idx; - struct blk_mq_hw_ctx *hctx = rq->mq_hctx; - - hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, true, last); - hctx_unlock(hctx, srcu_idx); - - return ret; + return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last); } static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule) @@ -2559,10 +2549,19 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule) blk_mq_commit_rqs(hctx, &queued, from_schedule); } +static void __blk_mq_flush_plug_list(struct request_queue *q, + struct blk_plug *plug) +{ + if (blk_queue_quiesced(q)) + return; + q->mq_ops->queue_rqs(&plug->mq_list); +} + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct blk_mq_hw_ctx *this_hctx; struct blk_mq_ctx *this_ctx; + struct request *rq; unsigned int depth; LIST_HEAD(list); @@ -2571,7 +2570,31 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) plug->rq_count = 0; if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { - blk_mq_plug_issue_direct(plug, false); + struct request_queue *q; + + rq = rq_list_peek(&plug->mq_list); + q = rq->q; + + /* + * Peek first request and see if we have a ->queue_rqs() hook. + * If we do, we can dispatch the whole plug list in one go. We + * already know at this point that all requests belong to the + * same queue, caller must ensure that's the case. + * + * Since we pass off the full list to the driver at this point, + * we do not increment the active request count for the queue. + * Bypass shared tags for now because of that. + */ + if (q->mq_ops->queue_rqs && + !(rq->mq_hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + blk_mq_run_dispatch_ops(q, + __blk_mq_flush_plug_list(q, plug)); + if (rq_list_empty(plug->mq_list)) + return; + } + + blk_mq_run_dispatch_ops(q, + blk_mq_plug_issue_direct(plug, false)); if (rq_list_empty(plug->mq_list)) return; } @@ -2580,8 +2603,6 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) this_ctx = NULL; depth = 0; do { - struct request *rq; - rq = rq_list_pop(&plug->mq_list); if (!this_hctx) { @@ -2693,26 +2714,18 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q, static struct request *blk_mq_get_new_requests(struct request_queue *q, struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs) + struct bio *bio) { struct blk_mq_alloc_data data = { .q = q, .nr_tags = 1, + .cmd_flags = bio->bi_opf, }; struct request *rq; if (unlikely(bio_queue_enter(bio))) return NULL; - if (unlikely(!submit_bio_checks(bio))) - goto queue_exit; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - goto queue_exit; - - rq_qos_throttle(q, bio); - /* ->bi_opf is finalized after submit_bio_checks() returns */ - data.cmd_flags = bio->bi_opf; if (plug) { data.nr_tags = plug->nr_ios; plug->nr_ios = 1; @@ -2720,21 +2733,17 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, } rq = __blk_mq_alloc_requests(&data); - if (!rq) - goto fail; - return rq; - -fail: + if (rq) + return rq; rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -queue_exit: blk_queue_exit(q); return NULL; } static inline struct request *blk_mq_get_cached_request(struct request_queue *q, - struct blk_plug *plug, struct bio *bio, unsigned int nsegs) + struct blk_plug *plug, struct bio *bio) { struct request *rq; @@ -2744,10 +2753,6 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, if (!rq || rq->q != q) return NULL; - if (unlikely(!submit_bio_checks(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - return NULL; if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type) return NULL; if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) @@ -2756,7 +2761,6 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, rq->cmd_flags = bio->bi_opf; plug->cached_rq = rq_list_next(rq); INIT_LIST_HEAD(&rq->queuelist); - rq_qos_throttle(q, bio); return rq; } @@ -2792,9 +2796,14 @@ void blk_mq_submit_bio(struct bio *bio) if (!bio_integrity_prep(bio)) return; - rq = blk_mq_get_cached_request(q, plug, bio, nr_segs); + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) + return; + + rq_qos_throttle(q, bio); + + rq = blk_mq_get_cached_request(q, plug, bio); if (!rq) { - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + rq = blk_mq_get_new_requests(q, plug, bio); if (unlikely(!rq)) return; } @@ -2825,7 +2834,8 @@ void blk_mq_submit_bio(struct bio *bio) (q->nr_hw_queues == 1 || !is_sync))) blk_mq_sched_insert_request(rq, false, true, true); else - blk_mq_try_issue_directly(rq->mq_hctx, rq); + blk_mq_run_dispatch_ops(rq->q, + blk_mq_try_issue_directly(rq->mq_hctx, rq)); } /** @@ -2910,7 +2920,9 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - return blk_mq_request_issue_directly(rq, true); + blk_mq_run_dispatch_ops(rq->q, + ret = blk_mq_request_issue_directly(rq, true)); + return ret; } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); @@ -3048,7 +3060,7 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags, unsigned long rq_addr = (unsigned long)rq; if (rq_addr >= start && rq_addr < end) { - WARN_ON_ONCE(refcount_read(&rq->ref) != 0); + WARN_ON_ONCE(req_ref_read(rq) != 0); cmpxchg(&drv_tags->rqs[i], rq, NULL); } } @@ -3382,7 +3394,7 @@ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, if (!tags) return; - WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0); + WARN_ON_ONCE(req_ref_read(flush_rq) != 0); for (i = 0; i < queue_depth; i++) cmpxchg(&tags->rqs[i], flush_rq, NULL); @@ -3436,20 +3448,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) -{ - int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); - - BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu), - __alignof__(struct blk_mq_hw_ctx)) != - sizeof(struct blk_mq_hw_ctx)); - - if (tag_set->flags & BLK_MQ_F_BLOCKING) - hw_ctx_size += sizeof(struct srcu_struct); - - return hw_ctx_size; -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -3487,7 +3485,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx; gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY; - hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node); + hctx = kzalloc_node(sizeof(struct blk_mq_hw_ctx), gfp, node); if (!hctx) goto fail_alloc_hctx; @@ -3529,8 +3527,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, if (!hctx->fq) goto free_bitmap; - if (hctx->flags & BLK_MQ_F_BLOCKING) - init_srcu_struct(hctx->srcu); blk_mq_hctx_kobj_init(hctx); return hctx; @@ -3866,7 +3862,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, struct request_queue *q; int ret; - q = blk_alloc_queue(set->numa_node); + q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING); if (!q) return ERR_PTR(-ENOMEM); q->queuedata = queuedata; @@ -4015,6 +4011,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q) { + WARN_ON_ONCE(blk_queue_has_srcu(q) != + !!(set->flags & BLK_MQ_F_BLOCKING)); + /* mark the queue as mq asap */ q->mq_ops = set->ops; |