summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bfq-iosched.c10
-rw-r--r--block/blk-cgroup.c28
-rw-r--r--block/blk-core.c15
-rw-r--r--block/blk-mq.c81
-rw-r--r--block/blk-mq.h7
-rw-r--r--block/genhd.c12
-rw-r--r--block/partition-generic.c10
7 files changed, 119 insertions, 44 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index f0ecd98509d8..771ae9730ac6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4934,8 +4934,16 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
bool new_queue = false;
bool bfqq_already_existing = false, split = false;
- if (!rq->elv.icq)
+ /*
+ * Even if we don't have an icq attached, we should still clear
+ * the scheduler pointers, as they might point to previously
+ * allocated bic/bfqq structs.
+ */
+ if (!rq->elv.icq) {
+ rq->elv.priv[0] = rq->elv.priv[1] = NULL;
return;
+ }
+
bic = icq_to_bic(rq->elv.icq);
spin_lock_irq(&bfqd->lock);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1c16694ae145..eb85cb87c40f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1177,26 +1177,20 @@ int blkcg_init_queue(struct request_queue *q)
preloaded = !radix_tree_preload(GFP_KERNEL);
- /*
- * Make sure the root blkg exists and count the existing blkgs. As
- * @q is bypassing at this point, blkg_lookup_create() can't be
- * used. Open code insertion.
- */
+ /* Make sure the root blkg exists. */
rcu_read_lock();
spin_lock_irq(q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
+ if (IS_ERR(blkg))
+ goto err_unlock;
+ q->root_blkg = blkg;
+ q->root_rl.blkg = blkg;
spin_unlock_irq(q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
- if (IS_ERR(blkg))
- return PTR_ERR(blkg);
-
- q->root_blkg = blkg;
- q->root_rl.blkg = blkg;
-
ret = blk_throtl_init(q);
if (ret) {
spin_lock_irq(q->queue_lock);
@@ -1204,6 +1198,13 @@ int blkcg_init_queue(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
}
return ret;
+
+err_unlock:
+ spin_unlock_irq(q->queue_lock);
+ rcu_read_unlock();
+ if (preloaded)
+ radix_tree_preload_end();
+ return PTR_ERR(blkg);
}
/**
@@ -1410,9 +1411,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
__clear_bit(pol->plid, q->blkcg_pols);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
- /* grab blkcg lock too while removing @pd from @blkg */
- spin_lock(&blkg->blkcg->lock);
-
if (blkg->pd[pol->plid]) {
if (!blkg->pd[pol->plid]->offline &&
pol->pd_offline_fn) {
@@ -1422,8 +1420,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
-
- spin_unlock(&blkg->blkcg->lock);
}
spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-core.c b/block/blk-core.c
index 806ce2442819..85909b431eb0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -201,6 +201,10 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->part = NULL;
seqcount_init(&rq->gstate_seq);
u64_stats_init(&rq->aborted_gstate_sync);
+ /*
+ * See comment of blk_mq_init_request
+ */
+ WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
}
EXPORT_SYMBOL(blk_rq_init);
@@ -915,7 +919,6 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
while (true) {
bool success = false;
- int ret;
rcu_read_lock();
if (percpu_ref_tryget_live(&q->q_usage_counter)) {
@@ -947,14 +950,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
*/
smp_rmb();
- ret = wait_event_interruptible(q->mq_freeze_wq,
- (atomic_read(&q->mq_freeze_depth) == 0 &&
- (preempt || !blk_queue_preempt_only(q))) ||
- blk_queue_dying(q));
+ wait_event(q->mq_freeze_wq,
+ (atomic_read(&q->mq_freeze_depth) == 0 &&
+ (preempt || !blk_queue_preempt_only(q))) ||
+ blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
- if (ret)
- return ret;
}
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0dc9e341c2a7..9ce9cac16c3f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -95,18 +95,15 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
- if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) {
- /*
- * index[0] counts the specific partition that was asked
- * for. index[1] counts the ones that are active on the
- * whole device, so increment that if mi->part is indeed
- * a partition, and not a whole device.
- */
- if (rq->part == mi->part)
- mi->inflight[0]++;
- if (mi->part->partno)
- mi->inflight[1]++;
- }
+ /*
+ * index[0] counts the specific partition that was asked for. index[1]
+ * counts the ones that are active on the whole device, so increment
+ * that if mi->part is indeed a partition, and not a whole device.
+ */
+ if (rq->part == mi->part)
+ mi->inflight[0]++;
+ if (mi->part->partno)
+ mi->inflight[1]++;
}
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
@@ -118,6 +115,25 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
}
+static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, void *priv,
+ bool reserved)
+{
+ struct mq_inflight *mi = priv;
+
+ if (rq->part == mi->part)
+ mi->inflight[rq_data_dir(rq)]++;
+}
+
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
+{
+ struct mq_inflight mi = { .part = part, .inflight = inflight, };
+
+ inflight[0] = inflight[1] = 0;
+ blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+}
+
void blk_freeze_queue_start(struct request_queue *q)
{
int freeze_depth;
@@ -2042,6 +2058,13 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
seqcount_init(&rq->gstate_seq);
u64_stats_init(&rq->aborted_gstate_sync);
+ /*
+ * start gstate with gen 1 instead of 0, otherwise it will be equal
+ * to aborted_gstate, and be identified timed out by
+ * blk_mq_terminate_expired.
+ */
+ WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
+
return 0;
}
@@ -2329,7 +2352,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
static void blk_mq_map_swqueue(struct request_queue *q)
{
- unsigned int i;
+ unsigned int i, hctx_idx;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
@@ -2346,8 +2369,23 @@ static void blk_mq_map_swqueue(struct request_queue *q)
/*
* Map software to hardware queues.
+ *
+ * If the cpu isn't present, the cpu is mapped to first hctx.
*/
for_each_possible_cpu(i) {
+ hctx_idx = q->mq_map[i];
+ /* unmapped hw queue can be remapped after CPU topo changed */
+ if (!set->tags[hctx_idx] &&
+ !__blk_mq_alloc_rq_map(set, hctx_idx)) {
+ /*
+ * If tags initialization fail for some hctx,
+ * that hctx won't be brought online. In this
+ * case, remap the current ctx to hctx[0] which
+ * is guaranteed to always have tags allocated
+ */
+ q->mq_map[i] = 0;
+ }
+
ctx = per_cpu_ptr(q->queue_ctx, i);
hctx = blk_mq_map_queue(q, i);
@@ -2359,8 +2397,21 @@ static void blk_mq_map_swqueue(struct request_queue *q)
mutex_unlock(&q->sysfs_lock);
queue_for_each_hw_ctx(q, hctx, i) {
- /* every hctx should get mapped by at least one CPU */
- WARN_ON(!hctx->nr_ctx);
+ /*
+ * If no software queues are mapped to this hardware queue,
+ * disable it and free the request entries.
+ */
+ if (!hctx->nr_ctx) {
+ /* Never unmap queue 0. We need it as a
+ * fallback in case of a new remap fails
+ * allocation
+ */
+ if (i && set->tags[i])
+ blk_mq_free_map_and_requests(set, i);
+
+ hctx->tags = NULL;
+ continue;
+ }
hctx->tags = set->tags[i];
WARN_ON(!hctx->tags);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 88c558f71819..e1bb420dc5d6 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -7,6 +7,9 @@
struct blk_mq_tag_set;
+/**
+ * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
+ */
struct blk_mq_ctx {
struct {
spinlock_t lock;
@@ -185,7 +188,9 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
}
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
+ unsigned int inflight[2]);
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2]);
static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
{
diff --git a/block/genhd.c b/block/genhd.c
index dc7e089373b9..c4513fe1adda 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -82,6 +82,18 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part,
}
}
+void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
+{
+ if (q->mq_ops) {
+ blk_mq_in_flight_rw(q, part, inflight);
+ return;
+ }
+
+ inflight[0] = atomic_read(&part->in_flight[0]);
+ inflight[1] = atomic_read(&part->in_flight[1]);
+}
+
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno)
{
struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 08dabcd8b6ae..db57cced9b98 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -145,13 +145,15 @@ ssize_t part_stat_show(struct device *dev,
jiffies_to_msecs(part_stat_read(p, time_in_queue)));
}
-ssize_t part_inflight_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ unsigned int inflight[2];
- return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
- atomic_read(&p->in_flight[1]));
+ part_in_flight_rw(q, p, inflight);
+ return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
}
#ifdef CONFIG_FAIL_MAKE_REQUEST