diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 10 | ||||
-rw-r--r-- | block/blk-cgroup.c | 28 | ||||
-rw-r--r-- | block/blk-core.c | 15 | ||||
-rw-r--r-- | block/blk-mq.c | 81 | ||||
-rw-r--r-- | block/blk-mq.h | 7 | ||||
-rw-r--r-- | block/genhd.c | 12 | ||||
-rw-r--r-- | block/partition-generic.c | 10 |
7 files changed, 119 insertions, 44 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f0ecd98509d8..771ae9730ac6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4934,8 +4934,16 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) bool new_queue = false; bool bfqq_already_existing = false, split = false; - if (!rq->elv.icq) + /* + * Even if we don't have an icq attached, we should still clear + * the scheduler pointers, as they might point to previously + * allocated bic/bfqq structs. + */ + if (!rq->elv.icq) { + rq->elv.priv[0] = rq->elv.priv[1] = NULL; return; + } + bic = icq_to_bic(rq->elv.icq); spin_lock_irq(&bfqd->lock); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1c16694ae145..eb85cb87c40f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1177,26 +1177,20 @@ int blkcg_init_queue(struct request_queue *q) preloaded = !radix_tree_preload(GFP_KERNEL); - /* - * Make sure the root blkg exists and count the existing blkgs. As - * @q is bypassing at this point, blkg_lookup_create() can't be - * used. Open code insertion. - */ + /* Make sure the root blkg exists. */ rcu_read_lock(); spin_lock_irq(q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); + if (IS_ERR(blkg)) + goto err_unlock; + q->root_blkg = blkg; + q->root_rl.blkg = blkg; spin_unlock_irq(q->queue_lock); rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); - if (IS_ERR(blkg)) - return PTR_ERR(blkg); - - q->root_blkg = blkg; - q->root_rl.blkg = blkg; - ret = blk_throtl_init(q); if (ret) { spin_lock_irq(q->queue_lock); @@ -1204,6 +1198,13 @@ int blkcg_init_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); } return ret; + +err_unlock: + spin_unlock_irq(q->queue_lock); + rcu_read_unlock(); + if (preloaded) + radix_tree_preload_end(); + return PTR_ERR(blkg); } /** @@ -1410,9 +1411,6 @@ void blkcg_deactivate_policy(struct request_queue *q, __clear_bit(pol->plid, q->blkcg_pols); list_for_each_entry(blkg, &q->blkg_list, q_node) { - /* grab blkcg lock too while removing @pd from @blkg */ - spin_lock(&blkg->blkcg->lock); - if (blkg->pd[pol->plid]) { if (!blkg->pd[pol->plid]->offline && pol->pd_offline_fn) { @@ -1422,8 +1420,6 @@ void blkcg_deactivate_policy(struct request_queue *q, pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } - - spin_unlock(&blkg->blkcg->lock); } spin_unlock_irq(q->queue_lock); diff --git a/block/blk-core.c b/block/blk-core.c index 806ce2442819..85909b431eb0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -201,6 +201,10 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->part = NULL; seqcount_init(&rq->gstate_seq); u64_stats_init(&rq->aborted_gstate_sync); + /* + * See comment of blk_mq_init_request + */ + WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); } EXPORT_SYMBOL(blk_rq_init); @@ -915,7 +919,6 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) while (true) { bool success = false; - int ret; rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { @@ -947,14 +950,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) */ smp_rmb(); - ret = wait_event_interruptible(q->mq_freeze_wq, - (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || - blk_queue_dying(q)); + wait_event(q->mq_freeze_wq, + (atomic_read(&q->mq_freeze_depth) == 0 && + (preempt || !blk_queue_preempt_only(q))) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; - if (ret) - return ret; } } diff --git a/block/blk-mq.c b/block/blk-mq.c index 0dc9e341c2a7..9ce9cac16c3f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -95,18 +95,15 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) { - /* - * index[0] counts the specific partition that was asked - * for. index[1] counts the ones that are active on the - * whole device, so increment that if mi->part is indeed - * a partition, and not a whole device. - */ - if (rq->part == mi->part) - mi->inflight[0]++; - if (mi->part->partno) - mi->inflight[1]++; - } + /* + * index[0] counts the specific partition that was asked for. index[1] + * counts the ones that are active on the whole device, so increment + * that if mi->part is indeed a partition, and not a whole device. + */ + if (rq->part == mi->part) + mi->inflight[0]++; + if (mi->part->partno) + mi->inflight[1]++; } void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, @@ -118,6 +115,25 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); } +static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, + bool reserved) +{ + struct mq_inflight *mi = priv; + + if (rq->part == mi->part) + mi->inflight[rq_data_dir(rq)]++; +} + +void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]) +{ + struct mq_inflight mi = { .part = part, .inflight = inflight, }; + + inflight[0] = inflight[1] = 0; + blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi); +} + void blk_freeze_queue_start(struct request_queue *q) { int freeze_depth; @@ -2042,6 +2058,13 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, seqcount_init(&rq->gstate_seq); u64_stats_init(&rq->aborted_gstate_sync); + /* + * start gstate with gen 1 instead of 0, otherwise it will be equal + * to aborted_gstate, and be identified timed out by + * blk_mq_terminate_expired. + */ + WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); + return 0; } @@ -2329,7 +2352,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_map_swqueue(struct request_queue *q) { - unsigned int i; + unsigned int i, hctx_idx; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -2346,8 +2369,23 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Map software to hardware queues. + * + * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { + hctx_idx = q->mq_map[i]; + /* unmapped hw queue can be remapped after CPU topo changed */ + if (!set->tags[hctx_idx] && + !__blk_mq_alloc_rq_map(set, hctx_idx)) { + /* + * If tags initialization fail for some hctx, + * that hctx won't be brought online. In this + * case, remap the current ctx to hctx[0] which + * is guaranteed to always have tags allocated + */ + q->mq_map[i] = 0; + } + ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue(q, i); @@ -2359,8 +2397,21 @@ static void blk_mq_map_swqueue(struct request_queue *q) mutex_unlock(&q->sysfs_lock); queue_for_each_hw_ctx(q, hctx, i) { - /* every hctx should get mapped by at least one CPU */ - WARN_ON(!hctx->nr_ctx); + /* + * If no software queues are mapped to this hardware queue, + * disable it and free the request entries. + */ + if (!hctx->nr_ctx) { + /* Never unmap queue 0. We need it as a + * fallback in case of a new remap fails + * allocation + */ + if (i && set->tags[i]) + blk_mq_free_map_and_requests(set, i); + + hctx->tags = NULL; + continue; + } hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); diff --git a/block/blk-mq.h b/block/blk-mq.h index 88c558f71819..e1bb420dc5d6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -7,6 +7,9 @@ struct blk_mq_tag_set; +/** + * struct blk_mq_ctx - State for a software queue facing the submitting CPUs + */ struct blk_mq_ctx { struct { spinlock_t lock; @@ -185,7 +188,9 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) } void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); + unsigned int inflight[2]); +void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]); static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx) { diff --git a/block/genhd.c b/block/genhd.c index dc7e089373b9..c4513fe1adda 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -82,6 +82,18 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part, } } +void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]) +{ + if (q->mq_ops) { + blk_mq_in_flight_rw(q, part, inflight); + return; + } + + inflight[0] = atomic_read(&part->in_flight[0]); + inflight[1] = atomic_read(&part->in_flight[1]); +} + struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) { struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); diff --git a/block/partition-generic.c b/block/partition-generic.c index 08dabcd8b6ae..db57cced9b98 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -145,13 +145,15 @@ ssize_t part_stat_show(struct device *dev, jiffies_to_msecs(part_stat_read(p, time_in_queue))); } -ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct hd_struct *p = dev_to_part(dev); + struct request_queue *q = part_to_disk(p)->queue; + unsigned int inflight[2]; - return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), - atomic_read(&p->in_flight[1])); + part_in_flight_rw(q, p, inflight); + return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } #ifdef CONFIG_FAIL_MAKE_REQUEST |