diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-cgroup.c | 116 | ||||
-rw-r--r-- | block/bfq-iosched.c | 2 | ||||
-rw-r--r-- | block/bfq-iosched.h | 23 | ||||
-rw-r--r-- | block/bio-integrity.c | 11 | ||||
-rw-r--r-- | block/bio.c | 8 | ||||
-rw-r--r-- | block/blk-core.c | 174 | ||||
-rw-r--r-- | block/blk-exec.c | 4 | ||||
-rw-r--r-- | block/blk-flush.c | 8 | ||||
-rw-r--r-- | block/blk-integrity.c | 4 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 77 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 62 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 148 | ||||
-rw-r--r-- | block/blk-throttle.c | 22 | ||||
-rw-r--r-- | block/bounce.c | 4 | ||||
-rw-r--r-- | block/bsg-lib.c | 5 | ||||
-rw-r--r-- | block/bsg.c | 12 | ||||
-rw-r--r-- | block/cfq-iosched.c | 9 | ||||
-rw-r--r-- | block/t10-pi.c | 30 |
19 files changed, 455 insertions, 268 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index c8a32fb345cf..78b2e0db4fb2 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -52,7 +52,7 @@ BFQG_FLAG_FNS(idling) BFQG_FLAG_FNS(empty) #undef BFQG_FLAG_FNS -/* This should be called with the queue_lock held. */ +/* This should be called with the scheduler lock held. */ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) { unsigned long long now; @@ -67,7 +67,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) bfqg_stats_clear_waiting(stats); } -/* This should be called with the queue_lock held. */ +/* This should be called with the scheduler lock held. */ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, struct bfq_group *curr_bfqg) { @@ -81,7 +81,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, bfqg_stats_mark_waiting(stats); } -/* This should be called with the queue_lock held. */ +/* This should be called with the scheduler lock held. */ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { unsigned long long now; @@ -203,12 +203,30 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) static void bfqg_get(struct bfq_group *bfqg) { - return blkg_get(bfqg_to_blkg(bfqg)); + bfqg->ref++; } void bfqg_put(struct bfq_group *bfqg) { - return blkg_put(bfqg_to_blkg(bfqg)); + bfqg->ref--; + + if (bfqg->ref == 0) + kfree(bfqg); +} + +static void bfqg_and_blkg_get(struct bfq_group *bfqg) +{ + /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ + bfqg_get(bfqg); + + blkg_get(bfqg_to_blkg(bfqg)); +} + +void bfqg_and_blkg_put(struct bfq_group *bfqg) +{ + bfqg_put(bfqg); + + blkg_put(bfqg_to_blkg(bfqg)); } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, @@ -312,7 +330,11 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; - bfqg_get(bfqg); + /* + * Make sure that bfqg and its associated blkg do not + * disappear before entity. + */ + bfqg_and_blkg_get(bfqg); } entity->parent = bfqg->my_entity; /* NULL for root group */ entity->sched_data = &bfqg->sched_data; @@ -399,6 +421,8 @@ struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) return NULL; } + /* see comments in bfq_bic_update_cgroup for why refcounting */ + bfqg_get(bfqg); return &bfqg->pd; } @@ -426,7 +450,7 @@ void bfq_pd_free(struct blkg_policy_data *pd) struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_exit(&bfqg->stats); - return kfree(bfqg); + bfqg_put(bfqg); } void bfq_pd_reset_stats(struct blkg_policy_data *pd) @@ -496,9 +520,10 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, * Move @bfqq to @bfqg, deactivating it from its old group and reactivating * it on the new one. Avoid putting the entity on the old group idle tree. * - * Must be called under the queue lock; the cgroup owning @bfqg must - * not disappear (by now this just means that we are called under - * rcu_read_lock()). + * Must be called under the scheduler lock, to make sure that the blkg + * owning @bfqg does not disappear (see comments in + * bfq_bic_update_cgroup on guaranteeing the consistency of blkg + * objects). */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) @@ -519,16 +544,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); - bfqg_put(bfqq_group(bfqq)); + bfqg_and_blkg_put(bfqq_group(bfqq)); - /* - * Here we use a reference to bfqg. We don't need a refcounter - * as the cgroup reference will not be dropped, so that its - * destroy() callback will not be invoked. - */ entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; - bfqg_get(bfqg); + /* pin down bfqg and its associated blkg */ + bfqg_and_blkg_get(bfqg); if (bfq_bfqq_busy(bfqq)) { bfq_pos_tree_add_move(bfqd, bfqq); @@ -545,8 +566,9 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * @bic: the bic to move. * @blkcg: the blk-cgroup to move to. * - * Move bic to blkcg, assuming that bfqd->queue is locked; the caller - * has to make sure that the reference to cgroup is valid across the call. + * Move bic to blkcg, assuming that bfqd->lock is held; which makes + * sure that the reference to cgroup is valid across the call (see + * comments in bfq_bic_update_cgroup on this issue) * * NOTE: an alternative approach might have been to store the current * cgroup in bfqq and getting a reference to it, reducing the lookup @@ -604,6 +626,57 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) goto out; bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); + /* + * Update blkg_path for bfq_log_* functions. We cache this + * path, and update it here, for the following + * reasons. Operations on blkg objects in blk-cgroup are + * protected with the request_queue lock, and not with the + * lock that protects the instances of this scheduler + * (bfqd->lock). This exposes BFQ to the following sort of + * race. + * + * The blkg_lookup performed in bfq_get_queue, protected + * through rcu, may happen to return the address of a copy of + * the original blkg. If this is the case, then the + * bfqg_and_blkg_get performed in bfq_get_queue, to pin down + * the blkg, is useless: it does not prevent blk-cgroup code + * from destroying both the original blkg and all objects + * directly or indirectly referred by the copy of the + * blkg. + * + * On the bright side, destroy operations on a blkg invoke, as + * a first step, hooks of the scheduler associated with the + * blkg. And these hooks are executed with bfqd->lock held for + * BFQ. As a consequence, for any blkg associated with the + * request queue this instance of the scheduler is attached + * to, we are guaranteed that such a blkg is not destroyed, and + * that all the pointers it contains are consistent, while we + * are holding bfqd->lock. A blkg_lookup performed with + * bfqd->lock held then returns a fully consistent blkg, which + * remains consistent until this lock is held. + * + * Thanks to the last fact, and to the fact that: (1) bfqg has + * been obtained through a blkg_lookup in the above + * assignment, and (2) bfqd->lock is being held, here we can + * safely use the policy data for the involved blkg (i.e., the + * field bfqg->pd) to get to the blkg associated with bfqg, + * and then we can safely use any field of blkg. After we + * release bfqd->lock, even just getting blkg through this + * bfqg may cause dangling references to be traversed, as + * bfqg->pd may not exist any more. + * + * In view of the above facts, here we cache, in the bfqg, any + * blkg data we may need for this bic, and for its associated + * bfq_queue. As of now, we need to cache only the path of the + * blkg, which is used in the bfq_log_* functions. + * + * Finally, note that bfqg itself needs to be protected from + * destruction on the blkg_free of the original blkg (which + * invokes bfq_pd_free). We use an additional private + * refcounter for bfqg, to let it disappear only after no + * bfq_queue refers to it any longer. + */ + blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; out: rcu_read_unlock(); @@ -640,8 +713,6 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree with the entities. - * - * Needs queue_lock to be taken and reference to be valid over the call. */ static void bfq_reparent_active_entities(struct bfq_data *bfqd, struct bfq_group *bfqg, @@ -692,8 +763,7 @@ void bfq_pd_offline(struct blkg_policy_data *pd) /* * The idle tree may still contain bfq_queues belonging * to exited task because they never migrated to a different - * cgroup from the one being destroyed now. No one else - * can access them so it's safe to act without any lock. + * cgroup from the one being destroyed now. */ bfq_flush_idle_tree(st); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 08ce45096350..ed93da2462ab 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3665,7 +3665,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) kmem_cache_free(bfq_pool, bfqq); #ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_put(bfqg); + bfqg_and_blkg_put(bfqg); #endif } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ae783c06dfd9..5c3bf9861492 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -759,6 +759,12 @@ struct bfq_group { /* must be the first member */ struct blkg_policy_data pd; + /* cached path for this blkg (see comments in bfq_bic_update_cgroup) */ + char blkg_path[128]; + + /* reference counter (see comments in bfq_bic_update_cgroup) */ + int ref; + struct bfq_entity entity; struct bfq_sched_data sched_data; @@ -838,7 +844,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); -void bfqg_put(struct bfq_group *bfqg); +void bfqg_and_blkg_put(struct bfq_group *bfqg); #ifdef CONFIG_BFQ_GROUP_IOSCHED extern struct cftype bfq_blkcg_legacy_files[]; @@ -910,20 +916,13 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ - char __pbuf[128]; \ - \ - blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid, \ + blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid,\ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - __pbuf, ##args); \ + bfqq_group(bfqq)->blkg_path, ##args); \ } while (0) -#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ - char __pbuf[128]; \ - \ - blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \ -} while (0) +#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "%s " fmt, (bfqg)->blkg_path, ##args) #else /* CONFIG_BFQ_GROUP_IOSCHED */ diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5384713d48bc..b8a3a65f7364 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -175,6 +175,9 @@ bool bio_integrity_enabled(struct bio *bio) if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) return false; + if (!bio_sectors(bio)) + return false; + /* Already protected? */ if (bio_integrity(bio)) return false; @@ -221,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, * @bio: bio to generate/verify integrity metadata for * @proc_fn: Pointer to the relevant processing function */ -static int bio_integrity_process(struct bio *bio, +static blk_status_t bio_integrity_process(struct bio *bio, integrity_processing_fn *proc_fn) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); @@ -229,7 +232,7 @@ static int bio_integrity_process(struct bio *bio, struct bvec_iter bviter; struct bio_vec bv; struct bio_integrity_payload *bip = bio_integrity(bio); - unsigned int ret = 0; + blk_status_t ret = BLK_STS_OK; void *prot_buf = page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; @@ -366,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio *bio = bip->bip_bio; struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn); + bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn); /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; @@ -395,7 +398,7 @@ void bio_integrity_endio(struct bio *bio) * integrity metadata. Restore original bio end_io handler * and run it. */ - if (bio->bi_error) { + if (bio->bi_status) { bio->bi_end_io = bip->bip_end_io; bio_endio(bio); diff --git a/block/bio.c b/block/bio.c index 888e7801c638..7a5c8ed27f42 100644 --- a/block/bio.c +++ b/block/bio.c @@ -309,8 +309,8 @@ static struct bio *__bio_chain_endio(struct bio *bio) { struct bio *parent = bio->bi_private; - if (!parent->bi_error) - parent->bi_error = bio->bi_error; + if (!parent->bi_status) + parent->bi_status = bio->bi_status; bio_put(bio); return parent; } @@ -918,7 +918,7 @@ static void submit_bio_wait_endio(struct bio *bio) { struct submit_bio_ret *ret = bio->bi_private; - ret->error = bio->bi_error; + ret->error = blk_status_to_errno(bio->bi_status); complete(&ret->event); } @@ -1818,7 +1818,7 @@ again: if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), - bio, bio->bi_error); + bio, bio->bi_status); bio_clear_flag(bio, BIO_TRACE_COMPLETION); } diff --git a/block/blk-core.c b/block/blk-core.c index a7421b772d0e..8592409db272 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -129,11 +129,69 @@ void blk_rq_init(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(blk_rq_init); +static const struct { + int errno; + const char *name; +} blk_errors[] = { + [BLK_STS_OK] = { 0, "" }, + [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" }, + [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" }, + [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" }, + [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" }, + [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" }, + [BLK_STS_NEXUS] = { -EBADE, "critical nexus" }, + [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" }, + [BLK_STS_PROTECTION] = { -EILSEQ, "protection" }, + [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, + + /* device mapper special case, should not leak out: */ + [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, + + /* everything else not covered above: */ + [BLK_STS_IOERR] = { -EIO, "I/O" }, +}; + +blk_status_t errno_to_blk_status(int errno) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(blk_errors); i++) { + if (blk_errors[i].errno == errno) + return (__force blk_status_t)i; + } + + return BLK_STS_IOERR; +} +EXPORT_SYMBOL_GPL(errno_to_blk_status); + +int blk_status_to_errno(blk_status_t status) +{ + int idx = (__force int)status; + + if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors))) + return -EIO; + return blk_errors[idx].errno; +} +EXPORT_SYMBOL_GPL(blk_status_to_errno); + +static void print_req_error(struct request *req, blk_status_t status) +{ + int idx = (__force int)status; + + if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors))) + return; + + printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n", + __func__, blk_errors[idx].name, req->rq_disk ? + req->rq_disk->disk_name : "?", + (unsigned long long)blk_rq_pos(req)); +} + static void req_bio_endio(struct request *rq, struct bio *bio, - unsigned int nbytes, int error) + unsigned int nbytes, blk_status_t error) { if (error) - bio->bi_error = error; + bio->bi_status = error; if (unlikely(rq->rq_flags & RQF_QUIET)) bio_set_flag(bio, BIO_QUIET); @@ -1668,7 +1726,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio, q->bio_split); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio->bi_error = -EIO; + bio->bi_status = BLK_STS_IOERR; bio_endio(bio); return BLK_QC_T_NONE; } @@ -1726,7 +1784,10 @@ get_rq: req = get_request(q, bio->bi_opf, bio, GFP_NOIO); if (IS_ERR(req)) { __wbt_done(q->rq_wb, wb_acct); - bio->bi_error = PTR_ERR(req); + if (PTR_ERR(req) == -ENOMEM) + bio->bi_status = BLK_STS_RESOURCE; + else + bio->bi_status = BLK_STS_IOERR; bio_endio(bio); goto out_unlock; } @@ -1881,7 +1942,7 @@ generic_make_request_checks(struct bio *bio) { struct request_queue *q; int nr_sectors = bio_sectors(bio); - int err = -EIO; + blk_status_t status = BLK_STS_IOERR; char b[BDEVNAME_SIZE]; struct hd_struct *part; @@ -1924,7 +1985,7 @@ generic_make_request_checks(struct bio *bio) !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); if (!nr_sectors) { - err = 0; + status = BLK_STS_OK; goto end_io; } } @@ -1976,9 +2037,9 @@ generic_make_request_checks(struct bio *bio) return true; not_supported: - err = -EOPNOTSUPP; + status = BLK_STS_NOTSUPP; end_io: - bio->bi_error = err; + bio->bi_status = status; bio_endio(bio); return false; } @@ -2183,29 +2244,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, * @q: the queue to submit the request * @rq: the request being queued */ -int blk_insert_cloned_request(struct request_queue *q, struct request *rq) +blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; int where = ELEVATOR_INSERT_BACK; if (blk_cloned_rq_check_limits(q, rq)) - return -EIO; + return BLK_STS_IOERR; if (rq->rq_disk && should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) - return -EIO; + return BLK_STS_IOERR; if (q->mq_ops) { if (blk_queue_io_stat(q)) blk_account_io_start(rq, true); blk_mq_sched_insert_request(rq, false, true, false, false); - return 0; + return BLK_STS_OK; } spin_lock_irqsave(q->queue_lock, flags); if (unlikely(blk_queue_dying(q))) { spin_unlock_irqrestore(q->queue_lock, flags); - return -ENODEV; + return BLK_STS_IOERR; } /* @@ -2222,7 +2283,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - return 0; + return BLK_STS_OK; } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); @@ -2456,15 +2517,14 @@ struct request *blk_peek_request(struct request_queue *q) rq = NULL; break; } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { - int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO; - rq->rq_flags |= RQF_QUIET; /* * Mark this request as started so we don't trigger * any debug logic in the end I/O path. */ blk_start_request(rq); - __blk_end_request_all(rq, err); + __blk_end_request_all(rq, ret == BLKPREP_INVALID ? + BLK_STS_TARGET : BLK_STS_IOERR); } else { printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); break; @@ -2553,7 +2613,7 @@ EXPORT_SYMBOL(blk_fetch_request); /** * blk_update_request - Special helper function for request stacking drivers * @req: the request being processed - * @error: %0 for success, < %0 for error + * @error: block status code * @nr_bytes: number of bytes to complete @req * * Description: @@ -2572,49 +2632,19 @@ EXPORT_SYMBOL(blk_fetch_request); * %false - this request doesn't have any more data * %true - this request has more data **/ -bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) +bool blk_update_request(struct request *req, blk_status_t error, + unsigned int nr_bytes) { int total_bytes; - trace_block_rq_complete(req, error, nr_bytes); + trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes); if (!req->bio) return false; - if (error && !blk_rq_is_passthrough(req) && - !(req->rq_flags & RQF_QUIET)) { - char *error_type; - - switch (error) { - case -ENOLINK: - error_type = "recoverable transport"; - break; - case -EREMOTEIO: - error_type = "critical target"; - break; - case -EBADE: - error_type = "critical nexus"; - break; - case -ETIMEDOUT: - error_type = "timeout"; - break; - case -ENOSPC: - error_type = "critical space allocation"; - break; - case -ENODATA: - error_type = "critical medium"; - break; - case -EIO: - default: - error_type = "I/O"; - break; - } - printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n", - __func__, error_type, req->rq_disk ? - req->rq_disk->disk_name : "?", - (unsigned long long)blk_rq_pos(req)); - - } + if (unlikely(error && !blk_rq_is_passthrough(req) && + !(req->rq_flags & RQF_QUIET))) + print_req_error(req, error); blk_account_io_completion(req, nr_bytes); @@ -2680,7 +2710,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) } EXPORT_SYMBOL_GPL(blk_update_request); -static bool blk_update_bidi_request(struct request *rq, int error, +static bool blk_update_bidi_request(struct request *rq, blk_status_t error, unsigned int nr_bytes, unsigned int bidi_bytes) { @@ -2721,7 +2751,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); /* * queue lock must be held */ -void blk_finish_request(struct request *req, int error) +void blk_finish_request(struct request *req, blk_status_t error) { struct request_queue *q = req->q; @@ -2758,7 +2788,7 @@ EXPORT_SYMBOL(blk_finish_request); /** * blk_end_bidi_request - Complete a bidi request * @rq: the request to complete - * @error: %0 for success, < %0 for error + * @error: block status code * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq * @@ -2772,7 +2802,7 @@ EXPORT_SYMBOL(blk_finish_request); * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool blk_end_bidi_request(struct request *rq, int error, +static bool blk_end_bidi_request(struct request *rq, blk_status_t error, unsigned int nr_bytes, unsigned int bidi_bytes) { struct request_queue *q = rq->q; @@ -2791,7 +2821,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, /** * __blk_end_bidi_request - Complete a bidi request with queue lock held * @rq: the request to complete - * @error: %0 for success, < %0 for error + * @error: block status code * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq * @@ -2803,7 +2833,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool __blk_end_bidi_request(struct request *rq, int error, +static bool __blk_end_bidi_request(struct request *rq, blk_status_t error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) @@ -2817,7 +2847,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error, /** * blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed - * @error: %0 for success, < %0 for error + * @error: block status code * @nr_bytes: number of bytes to complete * * Description: @@ -2828,7 +2858,8 @@ static bool __blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) +bool blk_end_request(struct request *rq, blk_status_t error, + unsigned int nr_bytes) { return blk_end_bidi_request(rq, error, nr_bytes, 0); } @@ -2837,12 +2868,12 @@ EXPORT_SYMBOL(blk_end_request); /** * blk_end_request_all - Helper function for drives to finish the request. * @rq: the request to finish - * @error: %0 for success, < %0 for error + * @error: block status code * * Description: * Completely finish @rq. */ -void blk_end_request_all(struct request *rq, int error) +void blk_end_request_all(struct request *rq, blk_status_t error) { bool pending; unsigned int bidi_bytes = 0; @@ -2858,7 +2889,7 @@ EXPORT_SYMBOL(blk_end_request_all); /** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed - * @error: %0 for success, < %0 for error + * @error: block status code * @nr_bytes: number of bytes to complete * * Description: @@ -2868,7 +2899,8 @@ EXPORT_SYMBOL(blk_end_request_all); * %false - we are done with this request * %true - still buffers pending for this request **/ -bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) +bool __blk_end_request(struct request *rq, blk_status_t error, + unsigned int nr_bytes) { return __blk_end_bidi_request(rq, error, nr_bytes, 0); } @@ -2877,12 +2909,12 @@ EXPORT_SYMBOL(__blk_end_request); /** * __blk_end_request_all - Helper function for drives to finish the request. * @rq: the request to finish - * @error: %0 for success, < %0 for error + * @error: block status code * * Description: * Completely finish @rq. Must be called with queue lock held. */ -void __blk_end_request_all(struct request *rq, int error) +void __blk_end_request_all(struct request *rq, blk_status_t error) { bool pending; unsigned int bidi_bytes = 0; @@ -2898,7 +2930,7 @@ EXPORT_SYMBOL(__blk_end_request_all); /** * __blk_end_request_cur - Helper function to finish the current request chunk. * @rq: the request to finish the current chunk for - * @error: %0 for success, < %0 for error + * @error: block status code * * Description: * Complete the current consecutively mapped chunk from @rq. Must @@ -2908,7 +2940,7 @@ EXPORT_SYMBOL(__blk_end_request_all); * %false - we are done with this request * %true - still buffers pending for this request */ -bool __blk_end_request_cur(struct request *rq, int error) +bool __blk_end_request_cur(struct request *rq, blk_status_t error) { return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } @@ -3249,7 +3281,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) * Short-circuit if @q is dead */ if (unlikely(blk_queue_dying(q))) { - __blk_end_request_all(rq, -ENODEV); + __blk_end_request_all(rq, BLK_STS_IOERR); continue; } diff --git a/block/blk-exec.c b/block/blk-exec.c index a9451e3b8587..5c0f3dc446dc 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -16,7 +16,7 @@ * @rq: request to complete * @error: end I/O status of the request */ -static void blk_end_sync_rq(struct request *rq, int error) +static void blk_end_sync_rq(struct request *rq, blk_status_t error) { struct completion *waiting = rq->end_io_data; @@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; - __blk_end_request_all(rq, -ENXIO); + __blk_end_request_all(rq, BLK_STS_IOERR); spin_unlock_irq(q->queue_lock); return; } diff --git a/block/blk-flush.c b/block/blk-flush.c index c4e0880b54bb..a572b47fa059 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front) */ static bool blk_flush_complete_seq(struct request *rq, struct blk_flush_queue *fq, - unsigned int seq, int error) + unsigned int seq, blk_status_t error) { struct request_queue *q = rq->q; struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; @@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq, return kicked | queued; } -static void flush_end_io(struct request *flush_rq, int error) +static void flush_end_io(struct request *flush_rq, blk_status_t error) { struct request_queue *q = flush_rq->q; struct list_head *running; @@ -341,7 +341,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) return blk_flush_queue_rq(flush_rq, false); } -static void flush_data_end_io(struct request *rq, int error) +static void flush_data_end_io(struct request *rq, blk_status_t error) { struct request_queue *q = rq->q; struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); @@ -382,7 +382,7 @@ static void flush_data_end_io(struct request *rq, int error) blk_run_queue_async(q); } -static void mq_flush_data_end_io(struct request *rq, int error) +static void mq_flush_data_end_io(struct request *rq, blk_status_t error) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 0f891a9aff4d..feb30570eaf5 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = { .sysfs_ops = &integrity_ops, }; -static int blk_integrity_nop_fn(struct blk_integrity_iter *iter) +static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) { - return 0; + return BLK_STS_OK; } static const struct blk_integrity_profile nop_profile = { diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 803aed4d7221..9edebbdce0bd 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf, blk_mq_run_hw_queues(q, true); } else if (strcmp(op, "start") == 0) { blk_mq_start_stopped_hw_queues(q, true); + } else if (strcmp(op, "kick") == 0) { + blk_mq_kick_requeue_list(q); } else { pr_err("%s: unsupported operation '%s'\n", __func__, op); inval: - pr_err("%s: use either 'run' or 'start'\n", __func__); + pr_err("%s: use 'run', 'start' or 'kick'\n", __func__); return -EINVAL; } return count; @@ -267,6 +269,14 @@ static const char *const rqf_name[] = { }; #undef RQF_NAME +#define RQAF_NAME(name) [REQ_ATOM_##name] = #name +static const char *const rqaf_name[] = { + RQAF_NAME(COMPLETE), + RQAF_NAME(STARTED), + RQAF_NAME(POLL_SLEPT), +}; +#undef RQAF_NAME + int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) { const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; @@ -283,6 +293,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) seq_puts(m, ", .rq_flags="); blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, ARRAY_SIZE(rqf_name)); + seq_puts(m, ", .atomic_flags="); + blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name)); seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, rq->internal_tag); if (mq_ops->show_rq) @@ -298,6 +310,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) } EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); +static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos) + __acquires(&q->requeue_lock) +{ + struct request_queue *q = m->private; + + spin_lock_irq(&q->requeue_lock); + return seq_list_start(&q->requeue_list, *pos); +} + +static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct request_queue *q = m->private; + + return seq_list_next(v, &q->requeue_list, pos); +} + +static void queue_requeue_list_stop(struct seq_file *m, void *v) + __releases(&q->requeue_lock) +{ + struct request_queue *q = m->private; + + spin_unlock_irq(&q->requeue_lock); +} + +static const struct seq_operations queue_requeue_list_seq_ops = { + .start = queue_requeue_list_start, + .next = queue_requeue_list_next, + .stop = queue_requeue_list_stop, + .show = blk_mq_debugfs_rq_show, +}; + static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) __acquires(&hctx->lock) { @@ -329,6 +372,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = { .show = blk_mq_debugfs_rq_show, }; +struct show_busy_params { + struct seq_file *m; + struct blk_mq_hw_ctx *hctx; +}; + +/* + * Note: the state of a request may change while this function is in progress, + * e.g. due to a concurrent blk_mq_finish_request() call. + */ +static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) +{ + const struct show_busy_params *params = data; + + if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx && + test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + __blk_mq_debugfs_rq_show(params->m, + list_entry_rq(&rq->queuelist)); +} + +static int hctx_busy_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct show_busy_params params = { .m = m, .hctx = hctx }; + + blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq, + ¶ms); + + return 0; +} + static int hctx_ctx_map_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; @@ -655,6 +728,7 @@ const struct file_operations blk_mq_debugfs_fops = { static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { {"poll_stat", 0400, queue_poll_stat_show}, + {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops}, {"state", 0600, queue_state_show, queue_state_write}, {}, }; @@ -663,6 +737,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { {"state", 0400, hctx_state_show}, {"flags", 0400, hctx_flags_show}, {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, + {"busy", 0400, hctx_busy_show}, {"ctx_map", 0400, hctx_ctx_map_show}, {"tags", 0400, hctx_tags_show}, {"tags_bitmap", 0400, hctx_tags_bitmap_show}, diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1f5b692526ae..c4e2afb9d12d 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -221,19 +221,71 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, } EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); +/* + * Reverse check our software queue for entries that we could potentially + * merge with. Currently includes a hand-wavy stop count of 8, to not spend + * too much time checking for merges. + */ +static bool blk_mq_attempt_merge(struct request_queue *q, + struct blk_mq_ctx *ctx, struct bio *bio) +{ + struct request *rq; + int checked = 8; + + list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { + bool merged = false; + + if (!checked--) + break; + + if (!blk_rq_merge_ok(rq, bio)) + continue; + + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_back_merge(q, rq, bio); + break; + case ELEVATOR_FRONT_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_front_merge(q, rq, bio); + break; + case ELEVATOR_DISCARD_MERGE: + merged = bio_attempt_discard_merge(q, rq, bio); + break; + default: + continue; + } + + if (merged) + ctx->rq_merged++; + return merged; + } + + return false; +} + bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) { struct elevator_queue *e = q->elevator; + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + bool ret = false; - if (e->type->ops.mq.bio_merge) { - struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - + if (e && e->type->ops.mq.bio_merge) { blk_mq_put_ctx(ctx); return e->type->ops.mq.bio_merge(hctx, bio); } - return false; + if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) { + /* default per sw-queue merge */ + spin_lock(&ctx->lock); + ret = blk_mq_attempt_merge(q, ctx, bio); + spin_unlock(&ctx->lock); + } + + blk_mq_put_ctx(ctx); + return ret; } bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index edafb5383b7b..b87e5be5db8c 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -38,9 +38,7 @@ int blk_mq_sched_init(struct request_queue *q); static inline bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) { - struct elevator_queue *e = q->elevator; - - if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio)) + if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return false; return __blk_mq_sched_bio_merge(q, bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index 1bcccedcc74f..359d2dc0d414 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -394,7 +394,7 @@ void blk_mq_free_request(struct request *rq) } EXPORT_SYMBOL_GPL(blk_mq_free_request); -inline void __blk_mq_end_request(struct request *rq, int error) +inline void __blk_mq_end_request(struct request *rq, blk_status_t error) { blk_account_io_done(rq); @@ -409,7 +409,7 @@ inline void __blk_mq_end_request(struct request *rq, int error) } EXPORT_SYMBOL(__blk_mq_end_request); -void blk_mq_end_request(struct request *rq, int error) +void blk_mq_end_request(struct request *rq, blk_status_t error) { if (blk_update_request(rq, error, blk_rq_bytes(rq))) BUG(); @@ -753,50 +753,6 @@ static void blk_mq_timeout_work(struct work_struct *work) blk_queue_exit(q); } -/* - * Reverse check our software queue for entries that we could potentially - * merge with. Currently includes a hand-wavy stop count of 8, to not spend - * too much time checking for merges. - */ -static bool blk_mq_attempt_merge(struct request_queue *q, - struct blk_mq_ctx *ctx, struct bio *bio) -{ - struct request *rq; - int checked = 8; - - list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { - bool merged = false; - - if (!checked--) - break; - - if (!blk_rq_merge_ok(rq, bio)) - continue; - - switch (blk_try_merge(rq, bio)) { - case ELEVATOR_BACK_MERGE: - if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_back_merge(q, rq, bio); - break; - case ELEVATOR_FRONT_MERGE: - if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_front_merge(q, rq, bio); - break; - case ELEVATOR_DISCARD_MERGE: - merged = bio_attempt_discard_merge(q, rq, bio); - break; - default: - continue; - } - - if (merged) - ctx->rq_merged++; - return merged; - } - - return false; -} - struct flush_busy_ctx_data { struct blk_mq_hw_ctx *hctx; struct list_head *list; @@ -968,7 +924,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) { struct blk_mq_hw_ctx *hctx; struct request *rq; - int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; + int errors, queued; if (list_empty(list)) return false; @@ -979,6 +935,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) errors = queued = 0; do { struct blk_mq_queue_data bd; + blk_status_t ret; rq = list_first_entry(list, struct request, queuelist); if (!blk_mq_get_driver_tag(rq, &hctx, false)) { @@ -1019,25 +976,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) } ret = q->mq_ops->queue_rq(hctx, &bd); - switch (ret) { - case BLK_MQ_RQ_QUEUE_OK: - queued++; - break; - case BLK_MQ_RQ_QUEUE_BUSY: + if (ret == BLK_STS_RESOURCE) { blk_mq_put_driver_tag_hctx(hctx, rq); list_add(&rq->queuelist, list); __blk_mq_requeue_request(rq); break; - default: - pr_err("blk-mq: bad return on queue: %d\n", ret); - case BLK_MQ_RQ_QUEUE_ERROR: + } + + if (unlikely(ret != BLK_STS_OK)) { errors++; - blk_mq_end_request(rq, -EIO); - break; + blk_mq_end_request(rq, BLK_STS_IOERR); + continue; } - if (ret == BLK_MQ_RQ_QUEUE_BUSY) - break; + queued++; } while (!list_empty(list)); hctx->dispatched[queued_to_index(queued)]++; @@ -1075,7 +1027,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) * - blk_mq_run_hw_queue() checks whether or not a queue has * been stopped before rerunning a queue. * - Some but not all block drivers stop a queue before - * returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq + * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq * and dm-rq. */ if (!blk_mq_sched_needs_restart(hctx) && @@ -1427,30 +1379,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx) !blk_queue_nomerges(hctx->queue); } -static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, - struct request *rq, struct bio *bio) +static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx, + struct request *rq) { - if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) { - blk_mq_bio_to_request(rq, bio); - spin_lock(&ctx->lock); -insert_rq: - __blk_mq_insert_request(hctx, rq, false); - spin_unlock(&ctx->lock); - return false; - } else { - struct request_queue *q = hctx->queue; - - spin_lock(&ctx->lock); - if (!blk_mq_attempt_merge(q, ctx, bio)) { - blk_mq_bio_to_request(rq, bio); - goto insert_rq; - } - - spin_unlock(&ctx->lock); - __blk_mq_finish_request(hctx, ctx, rq); - return true; - } + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, false); + spin_unlock(&ctx->lock); } static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) @@ -1461,22 +1396,28 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, - bool may_sleep) +static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, + blk_qc_t *cookie, bool may_sleep) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { .rq = rq, .last = true, }; - struct blk_mq_hw_ctx *hctx; blk_qc_t new_cookie; - int ret; + blk_status_t ret; + bool run_queue = true; + + if (blk_mq_hctx_stopped(hctx)) { + run_queue = false; + goto insert; + } if (q->elevator) goto insert; - if (!blk_mq_get_driver_tag(rq, &hctx, false)) + if (!blk_mq_get_driver_tag(rq, NULL, false)) goto insert; new_cookie = request_to_qc_t(hctx, rq); @@ -1487,20 +1428,21 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, * would have done */ ret = q->mq_ops->queue_rq(hctx, &bd); - if (ret == BLK_MQ_RQ_QUEUE_OK) { + switch (ret) { + case BLK_STS_OK: *cookie = new_cookie; return; - } - - if (ret == BLK_MQ_RQ_QUEUE_ERROR) { + case BLK_STS_RESOURCE: + __blk_mq_requeue_request(rq); + goto insert; + default: *cookie = BLK_QC_T_NONE; - blk_mq_end_request(rq, -EIO); + blk_mq_end_request(rq, ret); return; } - __blk_mq_requeue_request(rq); insert: - blk_mq_sched_insert_request(rq, false, true, false, may_sleep); + blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep); } static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, @@ -1508,7 +1450,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, { if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - __blk_mq_try_issue_directly(rq, cookie, false); + __blk_mq_try_issue_directly(hctx, rq, cookie, false); rcu_read_unlock(); } else { unsigned int srcu_idx; @@ -1516,7 +1458,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, might_sleep(); srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); - __blk_mq_try_issue_directly(rq, cookie, true); + __blk_mq_try_issue_directly(hctx, rq, cookie, true); srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); } } @@ -1619,9 +1561,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); - if (same_queue_rq) + if (same_queue_rq) { + data.hctx = blk_mq_map_queue(q, + same_queue_rq->mq_ctx->cpu); blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); + } } else if (q->nr_hw_queues > 1 && is_sync) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); @@ -1630,11 +1575,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); blk_mq_sched_insert_request(rq, false, true, true, true); - } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { + } else { blk_mq_put_ctx(data.ctx); + blk_mq_bio_to_request(rq, bio); + blk_mq_queue_io(data.hctx, data.ctx, rq); blk_mq_run_hw_queue(data.hctx, true); - } else - blk_mq_put_ctx(data.ctx); + } return cookie; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index fc13dd0c6e39..a7285bf2831c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -27,6 +27,13 @@ static int throtl_quantum = 32; #define MIN_THROTL_IOPS (10) #define DFL_LATENCY_TARGET (-1L) #define DFL_IDLE_THRESHOLD (0) +#define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */ +#define LATENCY_FILTERED_SSD (0) +/* + * For HD, very small latency comes from sequential IO. Such IO is helpless to + * help determine if its IO is impacted by others, hence we ignore the IO + */ +#define LATENCY_FILTERED_HD (1000L) /* 1ms */ #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) @@ -212,6 +219,7 @@ struct throtl_data struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; struct latency_bucket __percpu *latency_buckets; unsigned long last_calculate_time; + unsigned long filtered_latency; bool track_bio_latency; }; @@ -698,7 +706,7 @@ static void throtl_dequeue_tg(struct throtl_grp *tg) static void throtl_schedule_pending_timer(struct throtl_service_queue *sq, unsigned long expires) { - unsigned long max_expire = jiffies + 8 * sq_to_tg(sq)->td->throtl_slice; + unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice; /* * Since we are adjusting the throttle limit dynamically, the sleep @@ -2281,7 +2289,7 @@ void blk_throtl_bio_endio(struct bio *bio) throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat), bio_op(bio), lat); - if (tg->latency_target) { + if (tg->latency_target && lat >= tg->td->filtered_latency) { int bucket; unsigned int threshold; @@ -2417,14 +2425,20 @@ void blk_throtl_exit(struct request_queue *q) void blk_throtl_register_queue(struct request_queue *q) { struct throtl_data *td; + int i; td = q->td; BUG_ON(!td); - if (blk_queue_nonrot(q)) + if (blk_queue_nonrot(q)) { td->throtl_slice = DFL_THROTL_SLICE_SSD; - else + td->filtered_latency = LATENCY_FILTERED_SSD; + } else { td->throtl_slice = DFL_THROTL_SLICE_HD; + td->filtered_latency = LATENCY_FILTERED_HD; + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) + td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY; + } #ifndef CONFIG_BLK_DEV_THROTTLING_LOW /* if no low limit, use previous default */ td->throtl_slice = DFL_THROTL_SLICE_HD; diff --git a/block/bounce.c b/block/bounce.c index 1cb5dd3a5da1..e4703181d97f 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -143,7 +143,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) mempool_free(bvec->bv_page, pool); } - bio_orig->bi_error = bio->bi_error; + bio_orig->bi_status = bio->bi_status; bio_endio(bio_orig); bio_put(bio); } @@ -163,7 +163,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) { struct bio *bio_orig = bio->bi_private; - if (!bio->bi_error) + if (!bio->bi_status) copy_to_high_bio_irq(bio_orig, bio); bounce_end_io(bio, pool); diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 0a23dbba2d30..c4513b23f57a 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref) struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, scsi_req(rq)->result); + blk_end_request_all(rq, BLK_STS_OK); put_device(job->dev); /* release reference for the request */ @@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q) ret = bsg_create_job(dev, req); if (ret) { scsi_req(req)->result = ret; - blk_end_request_all(req, ret); + blk_end_request_all(req, BLK_STS_OK); spin_lock_irq(q->queue_lock); continue; } @@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); + queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); blk_queue_softirq_done(q, bsg_softirq_done); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); diff --git a/block/bsg.c b/block/bsg.c index 6fd08544d77e..59d02dd31b0c 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -294,14 +294,14 @@ out: * async completion call-back from the block layer, when scsi/ide/whatever * calls end_that_request_last() on a request */ -static void bsg_rq_end_io(struct request *rq, int uptodate) +static void bsg_rq_end_io(struct request *rq, blk_status_t status) { struct bsg_command *bc = rq->end_io_data; struct bsg_device *bd = bc->bd; unsigned long flags; - dprintk("%s: finished rq %p bc %p, bio %p stat %d\n", - bd->name, rq, bc, bc->bio, uptodate); + dprintk("%s: finished rq %p bc %p, bio %p\n", + bd->name, rq, bc, bc->bio); bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); @@ -750,6 +750,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode, #ifdef BSG_DEBUG unsigned char buf[32]; #endif + + if (!blk_queue_scsi_passthrough(rq)) { + WARN_ONCE(true, "Attempt to register a non-SCSI queue\n"); + return ERR_PTR(-EINVAL); + } + if (!blk_get_queue(rq)) return ERR_PTR(-ENXIO); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b7e9c7feeab2..3d5c28945719 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -982,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) return min_vdisktime; } -static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) -{ - s64 delta = (s64)(vdisktime - min_vdisktime); - if (delta < 0) - min_vdisktime = vdisktime; - - return min_vdisktime; -} - static void update_min_vdisktime(struct cfq_rb_root *st) { struct cfq_group *cfqg; diff --git a/block/t10-pi.c b/block/t10-pi.c index 680c6d636298..350b3cbcf9e5 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len) * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref * tag. */ -static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn, - unsigned int type) +static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter, + csum_fn *fn, unsigned int type) { unsigned int i; @@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn, iter->seed++; } - return 0; + return BLK_STS_OK; } -static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn, - unsigned int type) +static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, + csum_fn *fn, unsigned int type) { unsigned int i; @@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn, "(rcvd %04x, want %04x)\n", iter->disk_name, (unsigned long long)iter->seed, be16_to_cpu(pi->guard_tag), be16_to_cpu(csum)); - return -EILSEQ; + return BLK_STS_PROTECTION; } next: @@ -117,45 +117,45 @@ next: iter->seed++; } - return 0; + return BLK_STS_OK; } -static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) { return t10_pi_generate(iter, t10_pi_crc_fn, 1); } -static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) { return t10_pi_generate(iter, t10_pi_ip_fn, 1); } -static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) { return t10_pi_verify(iter, t10_pi_crc_fn, 1); } -static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) { return t10_pi_verify(iter, t10_pi_ip_fn, 1); } -static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) { return t10_pi_generate(iter, t10_pi_crc_fn, 3); } -static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) { return t10_pi_generate(iter, t10_pi_ip_fn, 3); } -static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) { return t10_pi_verify(iter, t10_pi_crc_fn, 3); } -static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) +static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) { return t10_pi_verify(iter, t10_pi_ip_fn, 3); } |