summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Makefile1
-rw-r--r--block/badblocks.c2
-rw-r--r--block/bdev.c7
-rw-r--r--block/bfq-iosched.c51
-rw-r--r--block/bfq-iosched.h5
-rw-r--r--block/bio.c20
-rw-r--r--block/blk-cgroup-fc-appid.c57
-rw-r--r--block/blk-cgroup.c176
-rw-r--r--block/blk-cgroup.h140
-rw-r--r--block/blk-core.c83
-rw-r--r--block/blk-crypto-fallback.c1
-rw-r--r--block/blk-ia-ranges.c7
-rw-r--r--block/blk-iocost.c88
-rw-r--r--block/blk-iolatency.c130
-rw-r--r--block/blk-map.c5
-rw-r--r--block/blk-mq-tag.c1
-rw-r--r--block/blk-mq.c163
-rw-r--r--block/blk-throttle.c5
-rw-r--r--block/bounce.c1
-rw-r--r--block/fops.c34
-rw-r--r--block/mq-deadline.c1
-rw-r--r--block/partitions/acorn.c4
-rw-r--r--block/partitions/atari.c1
-rw-r--r--block/partitions/ldm.c15
24 files changed, 571 insertions, 427 deletions
diff --git a/block/Makefile b/block/Makefile
index 3950ecbc5c26..4e01bb71ad6e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
+obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
diff --git a/block/badblocks.c b/block/badblocks.c
index d39056630d9c..3afb550c0f7b 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
s >>= bb->shift;
target += (1<<bb->shift) - 1;
target >>= bb->shift;
- sectors = target - s;
}
/* 'target' is now the first block after the bad range */
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
s += (1<<bb->shift) - 1;
s >>= bb->shift;
target >>= bb->shift;
- sectors = target - s;
}
write_seqlock_irq(&bb->lock);
diff --git a/block/bdev.c b/block/bdev.c
index 7bf88e591aaf..5fe06c1f2def 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -200,6 +200,13 @@ int sync_blockdev(struct block_device *bdev)
}
EXPORT_SYMBOL(sync_blockdev);
+int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
+{
+ return filemap_write_and_wait_range(bdev->bd_inode->i_mapping,
+ lstart, lend);
+}
+EXPORT_SYMBOL(sync_blockdev_range);
+
/*
* Write out and wait upon all dirty data associated with this
* device. Filesystem data as well as the underlying block
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e47c75f1fa0f..0d46cb728bbf 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
*/
static const unsigned long bfq_late_stable_merging = 600;
-#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
+#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
*/
void bfq_schedule_dispatch(struct bfq_data *bfqd)
{
+ lockdep_assert_held(&bfqd->lock);
+
if (bfqd->queued != 0) {
bfq_log(bfqd, "schedule dispatch");
blk_mq_run_hw_queues(bfqd->queue, true);
@@ -569,7 +571,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
struct bfq_entity *entity = &bfqq->entity;
struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
struct bfq_entity **entities = inline_entities;
- int depth, level;
+ int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
int class_idx = bfqq->ioprio_class - 1;
struct bfq_sched_data *sched_data;
unsigned long wsum;
@@ -578,15 +580,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
if (!entity->on_st_or_in_serv)
return false;
+retry:
+ spin_lock_irq(&bfqd->lock);
/* +1 for bfqq entity, root cgroup not included */
depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
- if (depth > BFQ_LIMIT_INLINE_DEPTH) {
+ if (depth > alloc_depth) {
+ spin_unlock_irq(&bfqd->lock);
+ if (entities != inline_entities)
+ kfree(entities);
entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO);
if (!entities)
return false;
+ alloc_depth = depth;
+ goto retry;
}
- spin_lock_irq(&bfqd->lock);
sched_data = entity->sched_data;
/* Gather our ancestors as we need to traverse them in reverse order */
level = 0;
@@ -2127,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->last_completed_rq_bfqq ||
bfqd->last_completed_rq_bfqq == bfqq ||
bfq_bfqq_has_short_ttime(bfqq) ||
- bfqq->dispatched > 0 ||
- now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
- bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+ now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
return;
/*
@@ -2202,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
bfqq->queued[rq_is_sync(rq)]++;
- bfqd->queued++;
+ /*
+ * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+ * may be read without holding the lock in bfq_has_work().
+ */
+ WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
- if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+ if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
bfq_check_waker(bfqd, bfqq, now_ns);
/*
@@ -2394,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
if (rq->queuelist.prev != &rq->queuelist)
list_del_init(&rq->queuelist);
bfqq->queued[sync]--;
- bfqd->queued--;
+ /*
+ * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+ * may be read without holding the lock in bfq_has_work().
+ */
+ WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
elv_rb_del(&bfqq->sort_list, rq);
elv_rqhash_del(q, rq);
@@ -5055,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
/*
- * Avoiding lock: a race on bfqd->busy_queues should cause at
+ * Avoiding lock: a race on bfqd->queued should cause at
* most a call to dispatch for nothing
*/
return !list_empty_careful(&bfqd->dispatch) ||
- bfq_tot_busy_queues(bfqd) > 0;
+ READ_ONCE(bfqd->queued);
}
static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
bfq_schedule_dispatch(bfqd);
}
-static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
-{
- bfqq_request_freed(bfqq);
- bfq_put_queue(bfqq);
-}
-
/*
* The processes associated with bfqq may happen to generate their
* cumulative I/O at a lower rate than the rate at which the device
@@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
bfq_completed_request(bfqq, bfqd);
}
- bfq_finish_requeue_request_body(bfqq);
+ bfqq_request_freed(bfqq);
+ bfq_put_queue(bfqq);
+ RQ_BIC(rq)->requests--;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_request_allocated(bfqq);
bfqq->ref++;
+ bic->requests++;
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
rq, bfqq, bfqq->ref);
@@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_bfqq_expire(bfqd, bfqq, true, reason);
schedule_dispatch:
- spin_unlock_irqrestore(&bfqd->lock, flags);
bfq_schedule_dispatch(bfqd);
+ spin_unlock_irqrestore(&bfqd->lock, flags);
}
/*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 978ef5d6fe6a..ca8177d7bf7c 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -468,6 +468,7 @@ struct bfq_io_cq {
struct bfq_queue *stable_merge_bfqq;
bool stably_merged; /* non splittable if true */
+ unsigned int requests; /* Number of requests this process has in flight */
};
/**
@@ -1102,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
break; \
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
blk_add_cgroup_trace_msg((bfqd)->queue, \
- bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
+ &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \
"%s " fmt, pid_str, ##args); \
} while (0)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
blk_add_cgroup_trace_msg((bfqd)->queue, \
- bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
+ &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \
} while (0)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
diff --git a/block/bio.c b/block/bio.c
index 212ccd5b5212..f92d0223247b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -722,6 +722,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
bio_alloc_cache_prune(cache, -1U);
}
free_percpu(bs->cache);
+ bs->cache = NULL;
}
/**
@@ -761,14 +762,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
- if (bio->bi_bdev == bio_src->bi_bdev &&
- bio_flagged(bio_src, BIO_REMAPPED))
- bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_iter = bio_src->bi_iter;
- bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
+ if (bio->bi_bdev) {
+ if (bio->bi_bdev == bio_src->bi_bdev &&
+ bio_flagged(bio_src, BIO_REMAPPED))
+ bio_set_flag(bio, BIO_REMAPPED);
+ bio_clone_blkg_association(bio, bio_src);
+ }
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
@@ -1365,10 +1367,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
- void *src_buf;
+ void *src_buf = bvec_kmap_local(&src_bv);
+ void *dst_buf = bvec_kmap_local(&dst_bv);
+
+ memcpy(dst_buf, src_buf, bytes);
- src_buf = bvec_kmap_local(&src_bv);
- memcpy_to_bvec(&dst_bv, src_buf);
+ kunmap_local(dst_buf);
kunmap_local(src_buf);
bio_advance_iter_single(src, src_iter, bytes);
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
new file mode 100644
index 000000000000..760a2e1878dd
--- /dev/null
+++ b/block/blk-cgroup-fc-appid.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "blk-cgroup.h"
+
+/**
+ * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
+ * @app_id: application identifier
+ * @cgrp_id: cgroup id
+ * @app_id_len: size of application identifier
+ */
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
+{
+ struct cgroup *cgrp;
+ struct cgroup_subsys_state *css;
+ struct blkcg *blkcg;
+ int ret = 0;
+
+ if (app_id_len > FC_APPID_LEN)
+ return -EINVAL;
+
+ cgrp = cgroup_get_from_id(cgrp_id);
+ if (!cgrp)
+ return -ENOENT;
+ css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+ if (!css) {
+ ret = -ENOENT;
+ goto out_cgrp_put;
+ }
+ blkcg = css_to_blkcg(css);
+ /*
+ * There is a slight race condition on setting the appid.
+ * Worst case an I/O may not find the right id.
+ * This is no different from the I/O we let pass while obtaining
+ * the vmid from the fabric.
+ * Adding the overhead of a lock is not necessary.
+ */
+ strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+ css_put(css);
+out_cgrp_put:
+ cgroup_put(cgrp);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
+
+/**
+ * blkcg_get_fc_appid - get the fc app identifier associated with a bio
+ * @bio: target bio
+ *
+ * On success return the fc_app_id, on failure return NULL
+ */
+char *blkcg_get_fc_appid(struct bio *bio)
+{
+ if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
+ return NULL;
+ return bio->bi_blkg->blkcg->fc_app_id;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8dfe62786cd5..764e740b0c0f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
+
static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -156,6 +173,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
}
/**
+ * bio_blkcg_css - return the blkcg CSS associated with a bio
+ * @bio: target bio
+ *
+ * This returns the CSS for the blkcg associated with a bio, or %NULL if not
+ * associated. Callers are expected to either handle %NULL or know association
+ * has been done prior to calling this.
+ */
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
+{
+ if (!bio || !bio->bi_blkg)
+ return NULL;
+ return &bio->bi_blkg->blkcg->css;
+}
+EXPORT_SYMBOL_GPL(bio_blkcg_css);
+
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg. Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+ return css_to_blkcg(blkcg->css.parent);
+}
+
+/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
* @q: request_queue the new blkg is associated with
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_gq *blkg;
int i, ret;
- WARN_ON_ONCE(!rcu_read_lock_held());
lockdep_assert_held(&q->queue_lock);
/* request_queue is dying, do not create/recreate a blkg */
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
struct blkg_iostat_set *bis = &blkg->iostat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
- bool has_stats = false;
const char *dname;
unsigned seq;
int i;
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
} while (u64_stats_fetch_retry(&bis->sync, seq));
if (rbytes || wbytes || rios || wios) {
- has_stats = true;
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
rbytes, wbytes, rios, wios,
dbytes, dios);
}
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
- has_stats = true;
seq_printf(s, " use_delay=%d delay_nsec=%llu",
atomic_read(&blkg->use_delay),
atomic64_read(&blkg->delay_nsec));
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
if (!blkg->pd[i] || !pol->pd_stat_fn)
continue;
- if (pol->pd_stat_fn(blkg->pd[i], s))
- has_stats = true;
+ pol->pd_stat_fn(blkg->pd[i], s);
}
- if (has_stats)
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
static int blkcg_print_stat(struct seq_file *sf, void *v)
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
{ } /* terminate */
};
+#ifdef CONFIG_CGROUP_WRITEBACK
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
+{
+ return &css_to_blkcg(css)->cgwb_list;
+}
+#endif
+
/*
* blkcg destruction is a three-stage process.
*
@@ -1016,25 +1061,6 @@ static struct cftype blkcg_legacy_files[] = {
*/
/**
- * blkcg_css_offline - cgroup css_offline callback
- * @css: css of interest
- *
- * This function is called when @css is about to go away. Here the cgwbs are
- * offlined first and only once writeback associated with the blkcg has
- * finished do we start step 2 (see above).
- */
-static void blkcg_css_offline(struct cgroup_subsys_state *css)
-{
- struct blkcg *blkcg = css_to_blkcg(css);
-
- /* this prevents anyone from attaching or migrating to this blkcg */
- wb_blkcg_offline(blkcg);
-
- /* put the base online pin allowing step 2 to be triggered */
- blkcg_unpin_online(blkcg);
-}
-
-/**
* blkcg_destroy_blkgs - responsible for shooting down blkgs
* @blkcg: blkcg of interest
*
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
*
* This is the blkcg counterpart of ioc_release_fn().
*/
-void blkcg_destroy_blkgs(struct blkcg *blkcg)
+static void blkcg_destroy_blkgs(struct blkcg *blkcg)
{
might_sleep();
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
spin_unlock_irq(&blkcg->lock);
}
+/**
+ * blkcg_pin_online - pin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * While pinned, a blkcg is kept online. This is primarily used to
+ * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
+ * while an associated cgwb is still active.
+ */
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
+{
+ refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
+}
+
+/**
+ * blkcg_unpin_online - unpin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * This is primarily used to impedance-match blkg and cgwb lifetimes so
+ * that blkg doesn't go offline while an associated cgwb is still active.
+ * When this count goes to zero, all active cgwbs have finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ */
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
+{
+ struct blkcg *blkcg = css_to_blkcg(blkcg_css);
+
+ do {
+ if (!refcount_dec_and_test(&blkcg->online_pin))
+ break;
+ blkcg_destroy_blkgs(blkcg);
+ blkcg = blkcg_parent(blkcg);
+ } while (blkcg);
+}
+
+/**
+ * blkcg_css_offline - cgroup css_offline callback
+ * @css: css of interest
+ *
+ * This function is called when @css is about to go away. Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
+ */
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
+{
+ /* this prevents anyone from attaching or migrating to this blkcg */
+ wb_blkcg_offline(css);
+
+ /* put the base online pin allowing step 2 to be triggered */
+ blkcg_unpin_online(css);
+}
+
static void blkcg_css_free(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
@@ -1163,8 +1240,7 @@ unlock:
static int blkcg_css_online(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg *parent = blkcg_parent(blkcg);
+ struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
/*
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
* parent so that offline always happens towards the root.
*/
if (parent)
- blkcg_pin_online(parent);
+ blkcg_pin_online(css);
return 0;
}
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
preloaded = !radix_tree_preload(GFP_KERNEL);
/* Make sure the root blkg exists. */
- rcu_read_lock();
+ /* spin_lock_irq can serve as RCU read-side critical section. */
spin_lock_irq(&q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
if (IS_ERR(blkg))
goto err_unlock;
q->root_blkg = blkg;
spin_unlock_irq(&q->queue_lock);
- rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
@@ -1234,7 +1309,6 @@ err_destroy_all:
return ret;
err_unlock:
spin_unlock_irq(&q->queue_lock);
- rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
return PTR_ERR(blkg);
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
void blkcg_maybe_throttle_current(void)
{
struct request_queue *q = current->throttle_queue;
- struct cgroup_subsys_state *css;
struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool use_memdelay = current->use_memdelay;
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
current->use_memdelay = false;
rcu_read_lock();
- css = kthread_blkcg();
- if (css)
- blkcg = css_to_blkcg(css);
- else
- blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
-
+ blkcg = css_to_blkcg(blkcg_css());
if (!blkcg)
goto out;
blkg = blkg_lookup(blkcg, q);
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
rcu_read_lock();
if (bio->bi_blkg)
- css = &bio_blkcg(bio)->css;
+ css = bio_blkcg_css(bio);
else
css = blkcg_css();
@@ -1906,12 +1974,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
*/
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
{
- if (src->bi_blkg) {
- if (dst->bi_blkg)
- blkg_put(dst->bi_blkg);
- blkg_get(src->bi_blkg);
- dst->bi_blkg = src->bi_blkg;
- }
+ if (src->bi_blkg)
+ bio_associate_blkg_from_css(dst, bio_blkcg_css(src));
}
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
@@ -1950,6 +2014,22 @@ void blk_cgroup_bio_start(struct bio *bio)
put_cpu();
}
+bool blk_cgroup_congested(void)
+{
+ struct cgroup_subsys_state *css;
+ bool ret = false;
+
+ rcu_read_lock();
+ for (css = blkcg_css(); css; css = css->parent) {
+ if (atomic_read(&css->cgroup->congestion_count)) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static int __init blkcg_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 47e1e38390c9..d4de0a35e066 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,13 +15,101 @@
*/
#include <linux/blk-cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/kthread.h>
#include <linux/blk-mq.h>
+struct blkcg_gq;
+struct blkg_policy_data;
+
+
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
#ifdef CONFIG_BLK_CGROUP
+enum blkg_iostat_type {
+ BLKG_IOSTAT_READ,
+ BLKG_IOSTAT_WRITE,
+ BLKG_IOSTAT_DISCARD,
+
+ BLKG_IOSTAT_NR,
+};
+
+struct blkg_iostat {
+ u64 bytes[BLKG_IOSTAT_NR];
+ u64 ios[BLKG_IOSTAT_NR];
+};
+
+struct blkg_iostat_set {
+ struct u64_stats_sync sync;
+ struct blkg_iostat cur;
+ struct blkg_iostat last;
+};
+
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
+ /* Pointer to the associated request_queue */
+ struct request_queue *q;
+ struct list_head q_node;
+ struct hlist_node blkcg_node;
+ struct blkcg *blkcg;
+
+ /* all non-root blkcg_gq's are guaranteed to have access to parent */
+ struct blkcg_gq *parent;
+
+ /* reference count */
+ struct percpu_ref refcnt;
+
+ /* is this blkg online? protected by both blkcg and q locks */
+ bool online;
+
+ struct blkg_iostat_set __percpu *iostat_cpu;
+ struct blkg_iostat_set iostat;
+
+ struct blkg_policy_data *pd[BLKCG_MAX_POLS];
+
+ spinlock_t async_bio_lock;
+ struct bio_list async_bios;
+ union {
+ struct work_struct async_bio_work;
+ struct work_struct free_work;
+ };
+
+ atomic_t use_delay;
+ atomic64_t delay_nsec;
+ atomic64_t delay_start;
+ u64 last_delay;
+ int last_use;
+
+ struct rcu_head rcu_head;
+};
+
+struct blkcg {
+ struct cgroup_subsys_state css;
+ spinlock_t lock;
+ refcount_t online_pin;
+
+ struct radix_tree_root blkg_tree;
+ struct blkcg_gq __rcu *blkg_hint;
+ struct hlist_head blkg_list;
+
+ struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
+
+ struct list_head all_blkcgs_node;
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
+ char fc_app_id[FC_APPID_LEN];
+#endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+ struct list_head cgwb_list;
+#endif
+};
+
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
/*
* A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
* request_queue (q). This is used by blkcg policies which need to track
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
struct seq_file *s);
struct blkcg_policy {
@@ -123,52 +211,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
- struct cgroup_subsys_state *css;
-
- css = kthread_blkcg();
- if (css)
- return css;
- return task_css(current, io_cgrp_id);
-}
-
-/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use. The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio. This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it. However, the latter potentially gets
- * it from task_css(). This can race against task migration and the cgroup
- * dying. It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return css_to_blkcg(blkcg_css());
-}
-
-/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
* @return: true if this bio needs to be submitted with the root blkg context.
*
* In order to avoid priority inversions we sometimes need to issue a bio as if
* it were attached to the root blkg, and then backcharge to the actual owning
- * blkg. The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio. Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
+ * blkg. The idea is we do bio_blkcg_css() to look up the actual context for
+ * the bio and attach the appropriate blkg to the bio. Then we call this helper
+ * and if it is true run with the root blkg for that queue and then do any
* backcharging to the originating cgroup once the io is complete.
*/
static inline bool bio_issue_as_root_blkg(struct bio *bio)
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
struct blkcg_policy {
};
-#ifdef CONFIG_BLOCK
+struct blkcg {
+};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
-
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
-#endif /* CONFIG_BLOCK */
#endif /* CONFIG_BLK_CGROUP */
#endif /* _BLK_CGROUP_PRIVATE_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index ee18b6a699bd..06ff5bbfe8f6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -50,7 +50,6 @@
#include "blk-pm.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
-#include "blk-rq-qos.h"
struct dentry *blk_debugfs_root;
@@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q)
*/
blk_freeze_queue(q);
- /* cleanup rq qos structures for queue without disk */
- rq_qos_exit(q);
-
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
@@ -592,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
- "%pg: rw=%d, want=%llu, limit=%llu\n",
- current->comm,
- bio->bi_bdev, bio->bi_opf,
- bio_end_sector(bio), maxsector);
+ "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
+ current->comm, bio->bi_bdev, bio->bi_opf,
+ bio->bi_iter.bi_sector, nr_sectors, maxsector);
return -EIO;
}
return 0;
@@ -893,19 +888,11 @@ void submit_bio(struct bio *bio)
if (blkcg_punt_bio_submit(bio))
return;
- /*
- * If it's a regular read/write or a barrier with data attached,
- * go through the normal accounting stuff before submission.
- */
- if (bio_has_data(bio)) {
- unsigned int count = bio_sectors(bio);
-
- if (op_is_write(bio_op(bio))) {
- count_vm_events(PGPGOUT, count);
- } else {
- task_io_account_read(bio->bi_iter.bi_size);
- count_vm_events(PGPGIN, count);
- }
+ if (bio_op(bio) == REQ_OP_READ) {
+ task_io_account_read(bio->bi_iter.bi_size);
+ count_vm_events(PGPGIN, bio_sectors(bio));
+ } else if (bio_op(bio) == REQ_OP_WRITE) {
+ count_vm_events(PGPGOUT, bio_sectors(bio));
}
/*
@@ -952,7 +939,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
blk_flush_plug(current->plug, false);
- if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
+ if (bio_queue_enter(bio))
return 0;
if (queue_is_mq(q)) {
ret = blk_mq_poll(q, cookie, iob, flags);
@@ -1022,21 +1009,22 @@ again:
}
}
-static unsigned long __part_start_io_acct(struct block_device *part,
- unsigned int sectors, unsigned int op,
- unsigned long start_time)
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+ unsigned int sectors, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
part_stat_lock();
- update_io_ticks(part, start_time, false);
- part_stat_inc(part, ios[sgrp]);
- part_stat_add(part, sectors[sgrp], sectors);
- part_stat_local_inc(part, in_flight[op_is_write(op)]);
+ update_io_ticks(bdev, start_time, false);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
+ part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
return start_time;
}
+EXPORT_SYMBOL(bdev_start_io_acct);
/**
* bio_start_io_acct_time - start I/O accounting for bio based drivers
@@ -1045,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
*/
void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
{
- __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), start_time);
+ bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
@@ -1058,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
- unsigned int op)
-{
- return __part_start_io_acct(disk->part0, sectors, op, jiffies);
-}
-EXPORT_SYMBOL(disk_start_io_acct);
-
-static void __part_end_io_acct(struct block_device *part, unsigned int op,
- unsigned long start_time)
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
unsigned long duration = now - start_time;
part_stat_lock();
- update_io_ticks(part, now, true);
- part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_local_dec(part, in_flight[op_is_write(op)]);
+ update_io_ticks(bdev, now, true);
+ part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
+ part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
}
+EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
- struct block_device *orig_bdev)
+ struct block_device *orig_bdev)
{
- __part_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
- unsigned long start_time)
-{
- __part_end_io_acct(disk->part0, op, start_time);
-}
-EXPORT_SYMBOL(disk_end_io_acct);
-
/**
* blk_lld_busy - Check if underlying low-level drivers of a device are busy
* @q : the queue of the device being checked
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 5d1aa5b1d30a..621abd1b0e4d 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -179,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
bio->bi_io_vec[bio->bi_vcnt++] = bv;
bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
return bio;
}
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index 18c68d8b9138..56ed48d2954e 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -54,13 +54,8 @@ static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
container_of(attr, struct blk_ia_range_sysfs_entry, attr);
struct blk_independent_access_range *iar =
container_of(kobj, struct blk_independent_access_range, kobj);
- ssize_t ret;
- mutex_lock(&iar->queue->sysfs_lock);
- ret = entry->show(iar, buf);
- mutex_unlock(&iar->queue->sysfs_lock);
-
- return ret;
+ return entry->show(iar, buf);
}
static const struct sysfs_ops blk_ia_range_sysfs_ops = {
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 70a0a3d680a3..33a11ba971ea 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -533,8 +533,7 @@ struct ioc_gq {
/* statistics */
struct iocg_pcpu_stat __percpu *pcpu_stat;
- struct iocg_stat local_stat;
- struct iocg_stat desc_stat;
+ struct iocg_stat stat;
struct iocg_stat last_stat;
u64 last_stat_abs_vusage;
u64 usage_delta_us;
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
return true;
} else {
if (iocg->indelay_since) {
- iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
+ iocg->stat.indelay_us += now->now - iocg->indelay_since;
iocg->indelay_since = 0;
}
iocg->delay = 0;
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
/* if debt is paid in full, restore inuse */
if (!iocg->abs_vdebt) {
- iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
+ iocg->stat.indebt_us += now->now - iocg->indebt_since;
iocg->indebt_since = 0;
propagate_weights(iocg, iocg->active, iocg->last_inuse,
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
if (!waitqueue_active(&iocg->waitq)) {
if (iocg->wait_since) {
- iocg->local_stat.wait_us += now->now - iocg->wait_since;
+ iocg->stat.wait_us += now->now - iocg->wait_since;
iocg->wait_since = 0;
}
return;
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
}
}
+/* propagate the deltas to the parent */
+static void iocg_flush_stat_upward(struct ioc_gq *iocg)
+{
+ if (iocg->level > 0) {
+ struct iocg_stat *parent_stat =
+ &iocg->ancestors[iocg->level - 1]->stat;
+
+ parent_stat->usage_us +=
+ iocg->stat.usage_us - iocg->last_stat.usage_us;
+ parent_stat->wait_us +=
+ iocg->stat.wait_us - iocg->last_stat.wait_us;
+ parent_stat->indebt_us +=
+ iocg->stat.indebt_us - iocg->last_stat.indebt_us;
+ parent_stat->indelay_us +=
+ iocg->stat.indelay_us - iocg->last_stat.indelay_us;
+ }
+
+ iocg->last_stat = iocg->stat;
+}
+
/* collect per-cpu counters and propagate the deltas to the parent */
-static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
+static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
- struct iocg_stat new_stat;
u64 abs_vusage = 0;
u64 vusage_delta;
int cpu;
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
iocg->last_stat_abs_vusage = abs_vusage;
iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
- iocg->local_stat.usage_us += iocg->usage_delta_us;
-
- /* propagate upwards */
- new_stat.usage_us =
- iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
- new_stat.wait_us =
- iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
- new_stat.indebt_us =
- iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
- new_stat.indelay_us =
- iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
-
- /* propagate the deltas to the parent */
- if (iocg->level > 0) {
- struct iocg_stat *parent_stat =
- &iocg->ancestors[iocg->level - 1]->desc_stat;
+ iocg->stat.usage_us += iocg->usage_delta_us;
- parent_stat->usage_us +=
- new_stat.usage_us - iocg->last_stat.usage_us;
- parent_stat->wait_us +=
- new_stat.wait_us - iocg->last_stat.wait_us;
- parent_stat->indebt_us +=
- new_stat.indebt_us - iocg->last_stat.indebt_us;
- parent_stat->indelay_us +=
- new_stat.indelay_us - iocg->last_stat.indelay_us;
- }
-
- iocg->last_stat = new_stat;
+ iocg_flush_stat_upward(iocg);
}
/* get stat counters ready for reading on all active iocgs */
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
/* flush leaves and build inner node walk list */
list_for_each_entry(iocg, target_iocgs, active_list) {
- iocg_flush_stat_one(iocg, now);
+ iocg_flush_stat_leaf(iocg, now);
iocg_build_inner_walk(iocg, &inner_walk);
}
/* keep flushing upwards by walking the inner list backwards */
list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
- iocg_flush_stat_one(iocg, now);
+ iocg_flush_stat_upward(iocg);
list_del_init(&iocg->walk_list);
}
}
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
/* flush wait and indebt stat deltas */
if (iocg->wait_since) {
- iocg->local_stat.wait_us += now->now - iocg->wait_since;
+ iocg->stat.wait_us += now->now - iocg->wait_since;
iocg->wait_since = now->now;
}
if (iocg->indebt_since) {
- iocg->local_stat.indebt_us +=
+ iocg->stat.indebt_us +=
now->now - iocg->indebt_since;
iocg->indebt_since = now->now;
}
if (iocg->indelay_since) {
- iocg->local_stat.indelay_us +=
+ iocg->stat.indelay_us +=
now->now - iocg->indelay_since;
iocg->indelay_since = now->now;
}
@@ -2322,7 +2315,17 @@ static void ioc_timer_fn(struct timer_list *timer)
iocg->hweight_donating = hwa;
iocg->hweight_after_donation = new_hwi;
list_add(&iocg->surplus_list, &surpluses);
- } else {
+ } else if (!iocg->abs_vdebt) {
+ /*
+ * @iocg doesn't have enough to donate. Reset
+ * its inuse to active.
+ *
+ * Don't reset debtors as their inuse's are
+ * owned by debt handling. This shouldn't affect
+ * donation calculuation in any meaningful way
+ * as @iocg doesn't have a meaningful amount of
+ * share anyway.
+ */
TRACE_IOCG_PATH(inuse_shortage, iocg, &now,
iocg->inuse, iocg->active,
iocg->hweight_inuse, new_hwi);
@@ -2995,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
kfree(iocg);
}
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
if (!ioc->enabled)
- return false;
+ return;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3017,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
iocg->last_stat.wait_us,
iocg->last_stat.indebt_us,
iocg->last_stat.indelay_us);
- return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 2f33932e72e3..9568bf8dfe82 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -87,7 +87,17 @@ struct iolatency_grp;
struct blk_iolatency {
struct rq_qos rqos;
struct timer_list timer;
- atomic_t enabled;
+
+ /*
+ * ->enabled is the master enable switch gating the throttling logic and
+ * inflight tracking. The number of cgroups which have iolat enabled is
+ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
+ * from ->enable_work with the request_queue frozen. For details, See
+ * blkiolatency_enable_work_fn().
+ */
+ bool enabled;
+ atomic_t enable_cnt;
+ struct work_struct enable_work;
};
static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
@@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
return container_of(rqos, struct blk_iolatency, rqos);
}
-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
-{
- return atomic_read(&blkiolat->enabled) > 0;
-}
-
struct child_latency_info {
spinlock_t lock;
@@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
struct blkcg_gq *blkg = bio->bi_blkg;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- if (!blk_iolatency_enabled(blkiolat))
+ if (!blkiolat->enabled)
return;
while (blkg && blkg->parent) {
@@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
u64 window_start;
u64 now;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- bool enabled = false;
int inflight = 0;
blkg = bio->bi_blkg;
@@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
if (!iolat)
return;
- enabled = blk_iolatency_enabled(iolat->blkiolat);
- if (!enabled)
+ if (!iolat->blkiolat->enabled)
return;
now = ktime_to_ns(ktime_get());
@@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
del_timer_sync(&blkiolat->timer);
+ flush_work(&blkiolat->enable_work);
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
kfree(blkiolat);
}
@@ -716,6 +720,44 @@ next:
rcu_read_unlock();
}
+/**
+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
+ * @work: enable_work of the blk_iolatency of interest
+ *
+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
+ * is relatively expensive as it involves walking up the hierarchy twice for
+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
+ * want to disable the in-flight tracking.
+ *
+ * We have to make sure that the counting is balanced - we don't want to leak
+ * the in-flight counts by disabling accounting in the completion path while IOs
+ * are in flight. This is achieved by ensuring that no IO is in flight by
+ * freezing the queue while flipping ->enabled. As this requires a sleepable
+ * context, ->enabled flipping is punted to this work function.
+ */
+static void blkiolatency_enable_work_fn(struct work_struct *work)
+{
+ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
+ enable_work);
+ bool enabled;
+
+ /*
+ * There can only be one instance of this function running for @blkiolat
+ * and it's guaranteed to be executed at least once after the latest
+ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
+ * sufficient.
+ *
+ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
+ * in blkcg_iolatency_exit().
+ */
+ enabled = atomic_read(&blkiolat->enable_cnt);
+ if (enabled != blkiolat->enabled) {
+ blk_mq_freeze_queue(blkiolat->rqos.q);
+ blkiolat->enabled = enabled;
+ blk_mq_unfreeze_queue(blkiolat->rqos.q);
+ }
+}
+
int blk_iolatency_init(struct request_queue *q)
{
struct blk_iolatency *blkiolat;
@@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q)
}
timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
return 0;
}
-/*
- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
- * return 0.
- */
-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
{
struct iolatency_grp *iolat = blkg_to_lat(blkg);
+ struct blk_iolatency *blkiolat = iolat->blkiolat;
u64 oldval = iolat->min_lat_nsec;
iolat->min_lat_nsec = val;
@@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
BLKIOLATENCY_MAX_WIN_SIZE);
- if (!oldval && val)
- return 1;
+ if (!oldval && val) {
+ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
+ schedule_work(&blkiolat->enable_work);
+ }
if (oldval && !val) {
blkcg_clear_delay(blkg);
- return -1;
+ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
+ schedule_work(&blkiolat->enable_work);
}
- return 0;
}
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 lat_val = 0;
u64 oldval;
int ret;
- int enable = 0;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
if (ret)
@@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
blkg = ctx.blkg;
oldval = iolat->min_lat_nsec;
- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
- if (enable) {
- if (!blk_get_queue(blkg->q)) {
- ret = -ENODEV;
- goto out;
- }
-
- blkg_get(blkg);
- }
-
- if (oldval != iolat->min_lat_nsec) {
+ iolatency_set_min_lat_nsec(blkg, lat_val);
+ if (oldval != iolat->min_lat_nsec)
iolatency_clear_scaling(blkg);
- }
-
ret = 0;
out:
blkg_conf_finish(&ctx);
- if (ret == 0 && enable) {
- struct iolatency_grp *tmp = blkg_to_lat(blkg);
- struct blk_iolatency *blkiolat = tmp->blkiolat;
-
- blk_mq_freeze_queue(blkg->q);
-
- if (enable == 1)
- atomic_inc(&blkiolat->enabled);
- else if (enable == -1)
- atomic_dec(&blkiolat->enabled);
- else
- WARN_ON_ONCE(1);
-
- blk_mq_unfreeze_queue(blkg->q);
-
- blkg_put(blkg);
- blk_put_queue(blkg->q);
- }
return ret ?: nbytes;
}
@@ -891,7 +903,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
return 0;
}
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
@@ -914,17 +926,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
(unsigned long long)stat.ps.missed,
(unsigned long long)stat.ps.total,
iolat->rq_depth.max_depth);
- return true;
}
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return false;
+ return;
if (iolat->ssd)
return iolatency_ssd_stat(iolat, s);
@@ -937,7 +948,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
else
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
iolat->rq_depth.max_depth, avg_lat, cur_win);
- return true;
}
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
@@ -1007,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
struct blkcg_gq *blkg = lat_to_blkg(iolat);
- struct blk_iolatency *blkiolat = iolat->blkiolat;
- int ret;
- ret = iolatency_set_min_lat_nsec(blkg, 0);
- if (ret == 1)
- atomic_inc(&blkiolat->enabled);
- if (ret == -1)
- atomic_dec(&blkiolat->enabled);
+ iolatency_set_min_lat_nsec(blkg, 0);
iolatency_clear_scaling(blkg);
}
diff --git a/block/blk-map.c b/block/blk-map.c
index 7ffde64f9019..df8b066cd548 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -262,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
- if (unlikely(offs & queue_dma_alignment(rq->q))) {
- ret = -EINVAL;
+ if (unlikely(offs & queue_dma_alignment(rq->q)))
j = 0;
- } else {
+ else {
for (j = 0; j < npages; j++) {
struct page *page = pages[j];
unsigned int n = PAGE_SIZE - offs;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 68ac23d0b640..2dcd738c6952 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -228,7 +228,6 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
BUG_ON(real_tag >= tags->nr_tags);
sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
} else {
- BUG_ON(tag >= tags->nr_reserved_tags);
sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
}
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c4370d276170..e9bf950983c7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -133,7 +133,8 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv,
{
struct mq_inflight *mi = priv;
- if ((!mi->part->bd_partno || rq->part == mi->part) &&
+ if (rq->part && blk_do_io_stat(rq) &&
+ (!mi->part->bd_partno || rq->part == mi->part) &&
blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
mi->inflight[rq_data_dir(rq)]++;
@@ -1083,7 +1084,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
/*
- * For a polled request, always complete locallly, it's pointless
+ * For a polled request, always complete locally, it's pointless
* to redirect the completion.
*/
if (rq->cmd_flags & REQ_POLLED)
@@ -1131,14 +1132,7 @@ void blk_mq_start_request(struct request *rq)
trace_block_rq_issue(rq);
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
- u64 start_time;
-#ifdef CONFIG_BLK_CGROUP
- if (rq->bio)
- start_time = bio_issue_time(&rq->bio->bi_issue);
- else
-#endif
- start_time = ktime_get_ns();
- rq->io_start_time_ns = start_time;
+ rq->io_start_time_ns = ktime_get_ns();
rq->stats_sectors = blk_rq_sectors(rq);
rq->rq_flags |= RQF_STATS;
rq_qos_issue(q, rq);
@@ -1158,29 +1152,44 @@ void blk_mq_start_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_start_request);
-/**
- * blk_end_sync_rq - executes a completion event on a request
- * @rq: request to complete
- * @error: end I/O status of the request
+/*
+ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
*/
-static void blk_end_sync_rq(struct request *rq, blk_status_t error)
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
{
- struct completion *waiting = rq->end_io_data;
+ if (plug->multiple_queues)
+ return BLK_MAX_REQUEST_COUNT * 2;
+ return BLK_MAX_REQUEST_COUNT;
+}
- rq->end_io_data = (void *)(uintptr_t)error;
+static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+{
+ struct request *last = rq_list_peek(&plug->mq_list);
- /*
- * complete last, if this is a stack request the process (and thus
- * the rq pointer) could be invalid right after this complete()
- */
- complete(waiting);
+ if (!plug->rq_count) {
+ trace_block_plug(rq->q);
+ } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
+ (!blk_queue_nomerges(rq->q) &&
+ blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+ blk_mq_flush_plug_list(plug, false);
+ trace_block_plug(rq->q);
+ }
+
+ if (!plug->multiple_queues && last && last->q != rq->q)
+ plug->multiple_queues = true;
+ if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+ plug->has_elevator = true;
+ rq->rq_next = NULL;
+ rq_list_add(&plug->mq_list, rq);
+ plug->rq_count++;
}
/**
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
* @rq: request to insert
* @at_head: insert request at head or tail of queue
- * @done: I/O completion handler
*
* Description:
* Insert a fully prepared request at the back of the I/O scheduler queue
@@ -1189,23 +1198,32 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
* Note:
* This function will invoke @done directly if the queue is dead.
*/
-void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
+void blk_execute_rq_nowait(struct request *rq, bool at_head)
{
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));
- rq->end_io = done;
-
blk_account_io_start(rq);
-
- /*
- * don't check dying flag for MQ because the request won't
- * be reused after dying flag is set
- */
- blk_mq_sched_insert_request(rq, at_head, true, false);
+ if (current->plug)
+ blk_add_rq_to_plug(current->plug, rq);
+ else
+ blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
+struct blk_rq_wait {
+ struct completion done;
+ blk_status_t ret;
+};
+
+static void blk_end_sync_rq(struct request *rq, blk_status_t ret)
+{
+ struct blk_rq_wait *wait = rq->end_io_data;
+
+ wait->ret = ret;
+ complete(&wait->done);
+}
+
static bool blk_rq_is_poll(struct request *rq)
{
if (!rq->mq_hctx)
@@ -1237,25 +1255,37 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
*/
blk_status_t blk_execute_rq(struct request *rq, bool at_head)
{
- DECLARE_COMPLETION_ONSTACK(wait);
- unsigned long hang_check;
+ struct blk_rq_wait wait = {
+ .done = COMPLETION_INITIALIZER_ONSTACK(wait.done),
+ };
+
+ WARN_ON(irqs_disabled());
+ WARN_ON(!blk_rq_is_passthrough(rq));
rq->end_io_data = &wait;
- blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
+ rq->end_io = blk_end_sync_rq;
- /* Prevent hang_check timer from firing at us during very long I/O */
- hang_check = sysctl_hung_task_timeout_secs;
+ blk_account_io_start(rq);
+ blk_mq_sched_insert_request(rq, at_head, true, false);
- if (blk_rq_is_poll(rq))
- blk_rq_poll_completion(rq, &wait);
- else if (hang_check)
- while (!wait_for_completion_io_timeout(&wait,
- hang_check * (HZ/2)))
- ;
- else
- wait_for_completion_io(&wait);
+ if (blk_rq_is_poll(rq)) {
+ blk_rq_poll_completion(rq, &wait.done);
+ } else {
+ /*
+ * Prevent hang_check timer from firing at us during very long
+ * I/O
+ */
+ unsigned long hang_check = sysctl_hung_task_timeout_secs;
+
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&wait.done,
+ hang_check * (HZ/2)))
+ ;
+ else
+ wait_for_completion_io(&wait.done);
+ }
- return (blk_status_t)(uintptr_t)rq->end_io_data;
+ return wait.ret;
}
EXPORT_SYMBOL(blk_execute_rq);
@@ -2130,8 +2160,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q)
*/
static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
-
+ struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
/*
* If the IO scheduler does not respect hardware queues when
* dispatching, we just don't bother with multiple HW queues and
@@ -2139,8 +2168,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
* just causes lock contention inside the scheduler and pointless cache
* bouncing.
*/
- hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
- raw_smp_processor_id());
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
+
if (!blk_mq_hctx_stopped(hctx))
return hctx;
return NULL;
@@ -2683,40 +2712,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
hctx->queue->mq_ops->commit_rqs(hctx);
}
-/*
- * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
- * queues. This is important for md arrays to benefit from merging
- * requests.
- */
-static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
-{
- if (plug->multiple_queues)
- return BLK_MAX_REQUEST_COUNT * 2;
- return BLK_MAX_REQUEST_COUNT;
-}
-
-static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
-{
- struct request *last = rq_list_peek(&plug->mq_list);
-
- if (!plug->rq_count) {
- trace_block_plug(rq->q);
- } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
- (!blk_queue_nomerges(rq->q) &&
- blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
- blk_mq_flush_plug_list(plug, false);
- trace_block_plug(rq->q);
- }
-
- if (!plug->multiple_queues && last && last->q != rq->q)
- plug->multiple_queues = true;
- if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
- plug->has_elevator = true;
- rq->rq_next = NULL;
- rq_list_add(&plug->mq_list, rq);
- plug->rq_count++;
-}
-
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs)
{
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 469c483719be..139b2d7a99e2 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
break; \
if ((__tg)) { \
blk_add_cgroup_trace_msg(__td->queue, \
- tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
+ &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
} else { \
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
} \
@@ -2189,13 +2189,14 @@ again:
}
out_unlock:
- spin_unlock_irq(&q->queue_lock);
bio_set_flag(bio, BIO_THROTTLED);
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
#endif
+ spin_unlock_irq(&q->queue_lock);
+
rcu_read_unlock();
return throttled;
}
diff --git a/block/bounce.c b/block/bounce.c
index 467be46d0e65..8f7b6fe3b4db 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
goto err_put;
bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
return bio;
diff --git a/block/fops.c b/block/fops.c
index e3643362c244..d6b3276a6c68 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
#define DIO_INLINE_BIO_VECS 4
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
- struct task_struct *waiter = bio->bi_private;
-
- WRITE_ONCE(bio->bi_private, NULL);
- blk_wake_io_task(waiter);
-}
-
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages)
{
@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
- bio.bi_private = current;
- bio.bi_end_io = blkdev_bio_end_io_simple;
bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT)
bio.bi_opf |= REQ_NOWAIT;
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(&bio, iocb);
- submit_bio(&bio);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio.bi_private))
- break;
- if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
+ submit_bio_wait(&bio);
bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status))
@@ -392,9 +372,9 @@ static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
return block_write_full_page(page, blkdev_get_block, wbc);
}
-static int blkdev_readpage(struct file * file, struct page * page)
+static int blkdev_read_folio(struct file *file, struct folio *folio)
{
- return block_read_full_page(page, blkdev_get_block);
+ return block_read_full_folio(folio, blkdev_get_block);
}
static void blkdev_readahead(struct readahead_control *rac)
@@ -403,11 +383,9 @@ static void blkdev_readahead(struct readahead_control *rac)
}
static int blkdev_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags, struct page **pagep,
- void **fsdata)
+ loff_t pos, unsigned len, struct page **pagep, void **fsdata)
{
- return block_write_begin(mapping, pos, len, flags, pagep,
- blkdev_get_block);
+ return block_write_begin(mapping, pos, len, pagep, blkdev_get_block);
}
static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -432,7 +410,7 @@ static int blkdev_writepages(struct address_space *mapping,
const struct address_space_operations def_blk_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
- .readpage = blkdev_readpage,
+ .read_folio = blkdev_read_folio,
.readahead = blkdev_readahead,
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 3ed5eaf3446a..6ed602b2f80a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
if (at_head) {
list_add(&rq->queuelist, &per_prio->dispatch);
+ rq->fifo_time = jiffies;
} else {
deadline_add_rq_rb(per_prio, rq);
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index 2c381c694c57..d2fc122d7426 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
#ifdef CONFIG_ACORN_PARTITION_RISCIX
case PARTITION_RISCIX_SCSI:
case PARTITION_RISCIX_MFM:
- slot = riscix_partition(state, start_sect, slot,
+ riscix_partition(state, start_sect, slot,
nr_sects);
break;
#endif
case PARTITION_LINUX:
- slot = linux_partition(state, start_sect, slot,
+ linux_partition(state, start_sect, slot,
nr_sects);
break;
}
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index da5994175416..9655c728262a 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state)
/* accept only GEM,BGM,RAW,LNX,SWP partitions */
if (!((pi->flg & 1) && OK_id(pi->id)))
continue;
- part_fmt = 2;
put_partition (state, slot,
be32_to_cpu(pi->st),
be32_to_cpu(pi->siz));
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index 27f6c7d9c776..38e58960ae03 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
len = r_cols;
} else {
r_stripe = 0;
- r_cols = 0;
len = r_parent;
}
if (len < 0)
@@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
len = r_id2;
- } else {
- r_id1 = 0;
- r_id2 = 0;
+ } else
len = r_diskid;
- }
if (len < 0)
return false;
@@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
len = r_id2;
- } else {
- r_id1 = 0;
- r_id2 = 0;
+ } else
len = r_name;
- }
if (len < 0)
return false;
@@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
return false;
}
len = r_index;
- } else {
- r_index = 0;
+ } else
len = r_diskid;
- }
if (len < 0) {
ldm_error("len %d < 0", len);
return false;