diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/badblocks.c | 2 | ||||
-rw-r--r-- | block/bdev.c | 7 | ||||
-rw-r--r-- | block/bfq-iosched.c | 51 | ||||
-rw-r--r-- | block/bfq-iosched.h | 5 | ||||
-rw-r--r-- | block/bio.c | 20 | ||||
-rw-r--r-- | block/blk-cgroup-fc-appid.c | 57 | ||||
-rw-r--r-- | block/blk-cgroup.c | 176 | ||||
-rw-r--r-- | block/blk-cgroup.h | 140 | ||||
-rw-r--r-- | block/blk-core.c | 83 | ||||
-rw-r--r-- | block/blk-crypto-fallback.c | 1 | ||||
-rw-r--r-- | block/blk-ia-ranges.c | 7 | ||||
-rw-r--r-- | block/blk-iocost.c | 88 | ||||
-rw-r--r-- | block/blk-iolatency.c | 130 | ||||
-rw-r--r-- | block/blk-map.c | 5 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 163 | ||||
-rw-r--r-- | block/blk-throttle.c | 5 | ||||
-rw-r--r-- | block/bounce.c | 1 | ||||
-rw-r--r-- | block/fops.c | 34 | ||||
-rw-r--r-- | block/mq-deadline.c | 1 | ||||
-rw-r--r-- | block/partitions/acorn.c | 4 | ||||
-rw-r--r-- | block/partitions/atari.c | 1 | ||||
-rw-r--r-- | block/partitions/ldm.c | 15 |
24 files changed, 571 insertions, 427 deletions
diff --git a/block/Makefile b/block/Makefile index 3950ecbc5c26..4e01bb71ad6e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o +obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o diff --git a/block/badblocks.c b/block/badblocks.c index d39056630d9c..3afb550c0f7b 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors, s >>= bb->shift; target += (1<<bb->shift) - 1; target >>= bb->shift; - sectors = target - s; } /* 'target' is now the first block after the bad range */ @@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) s += (1<<bb->shift) - 1; s >>= bb->shift; target >>= bb->shift; - sectors = target - s; } write_seqlock_irq(&bb->lock); diff --git a/block/bdev.c b/block/bdev.c index 7bf88e591aaf..5fe06c1f2def 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -200,6 +200,13 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); +int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) +{ + return filemap_write_and_wait_range(bdev->bd_inode->i_mapping, + lstart, lend); +} +EXPORT_SYMBOL(sync_blockdev_range); + /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e47c75f1fa0f..0d46cb728bbf 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600; */ static const unsigned long bfq_late_stable_merging = 600; -#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) +#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0])) #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) @@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q) */ void bfq_schedule_dispatch(struct bfq_data *bfqd) { + lockdep_assert_held(&bfqd->lock); + if (bfqd->queued != 0) { bfq_log(bfqd, "schedule dispatch"); blk_mq_run_hw_queues(bfqd->queue, true); @@ -569,7 +571,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH]; struct bfq_entity **entities = inline_entities; - int depth, level; + int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH; int class_idx = bfqq->ioprio_class - 1; struct bfq_sched_data *sched_data; unsigned long wsum; @@ -578,15 +580,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) if (!entity->on_st_or_in_serv) return false; +retry: + spin_lock_irq(&bfqd->lock); /* +1 for bfqq entity, root cgroup not included */ depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1; - if (depth > BFQ_LIMIT_INLINE_DEPTH) { + if (depth > alloc_depth) { + spin_unlock_irq(&bfqd->lock); + if (entities != inline_entities) + kfree(entities); entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO); if (!entities) return false; + alloc_depth = depth; + goto retry; } - spin_lock_irq(&bfqd->lock); sched_data = entity->sched_data; /* Gather our ancestors as we need to traverse them in reverse order */ level = 0; @@ -2127,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->last_completed_rq_bfqq || bfqd->last_completed_rq_bfqq == bfqq || bfq_bfqq_has_short_ttime(bfqq) || - bfqq->dispatched > 0 || - now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC || - bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq) + now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC) return; /* @@ -2202,9 +2208,13 @@ static void bfq_add_request(struct request *rq) bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); bfqq->queued[rq_is_sync(rq)]++; - bfqd->queued++; + /* + * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it + * may be read without holding the lock in bfq_has_work(). + */ + WRITE_ONCE(bfqd->queued, bfqd->queued + 1); - if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { + if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) { bfq_check_waker(bfqd, bfqq, now_ns); /* @@ -2394,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q, if (rq->queuelist.prev != &rq->queuelist) list_del_init(&rq->queuelist); bfqq->queued[sync]--; - bfqd->queued--; + /* + * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it + * may be read without holding the lock in bfq_has_work(). + */ + WRITE_ONCE(bfqd->queued, bfqd->queued - 1); elv_rb_del(&bfqq->sort_list, rq); elv_rqhash_del(q, rq); @@ -5055,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; /* - * Avoiding lock: a race on bfqd->busy_queues should cause at + * Avoiding lock: a race on bfqd->queued should cause at * most a call to dispatch for nothing */ return !list_empty_careful(&bfqd->dispatch) || - bfq_tot_busy_queues(bfqd) > 0; + READ_ONCE(bfqd->queued); } static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) @@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_schedule_dispatch(bfqd); } -static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) -{ - bfqq_request_freed(bfqq); - bfq_put_queue(bfqq); -} - /* * The processes associated with bfqq may happen to generate their * cumulative I/O at a lower rate than the rate at which the device @@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); } - bfq_finish_requeue_request_body(bfqq); + bfqq_request_freed(bfqq); + bfq_put_queue(bfqq); + RQ_BIC(rq)->requests--; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfqq_request_allocated(bfqq); bfqq->ref++; + bic->requests++; bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", rq, bfqq, bfqq->ref); @@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_bfqq_expire(bfqd, bfqq, true, reason); schedule_dispatch: - spin_unlock_irqrestore(&bfqd->lock, flags); bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(&bfqd->lock, flags); } /* diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 978ef5d6fe6a..ca8177d7bf7c 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -468,6 +468,7 @@ struct bfq_io_cq { struct bfq_queue *stable_merge_bfqq; bool stably_merged; /* non splittable if true */ + unsigned int requests; /* Number of requests this process has in flight */ }; /** @@ -1102,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq); break; \ bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \ blk_add_cgroup_trace_msg((bfqd)->queue, \ - bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ + &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \ "%s " fmt, pid_str, ##args); \ } while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ blk_add_cgroup_trace_msg((bfqd)->queue, \ - bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \ + &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \ } while (0) #else /* CONFIG_BFQ_GROUP_IOSCHED */ diff --git a/block/bio.c b/block/bio.c index 212ccd5b5212..f92d0223247b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -722,6 +722,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs) bio_alloc_cache_prune(cache, -1U); } free_percpu(bs->cache); + bs->cache = NULL; } /** @@ -761,14 +762,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) bio_set_flag(bio, BIO_CLONED); if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); - if (bio->bi_bdev == bio_src->bi_bdev && - bio_flagged(bio_src, BIO_REMAPPED)) - bio_set_flag(bio, BIO_REMAPPED); bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_iter = bio_src->bi_iter; - bio_clone_blkg_association(bio, bio_src); - blkcg_bio_issue_init(bio); + if (bio->bi_bdev) { + if (bio->bi_bdev == bio_src->bi_bdev && + bio_flagged(bio_src, BIO_REMAPPED)) + bio_set_flag(bio, BIO_REMAPPED); + bio_clone_blkg_association(bio, bio_src); + } if (bio_crypt_clone(bio, bio_src, gfp) < 0) return -ENOMEM; @@ -1365,10 +1367,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio_vec src_bv = bio_iter_iovec(src, *src_iter); struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter); unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); - void *src_buf; + void *src_buf = bvec_kmap_local(&src_bv); + void *dst_buf = bvec_kmap_local(&dst_bv); + + memcpy(dst_buf, src_buf, bytes); - src_buf = bvec_kmap_local(&src_bv); - memcpy_to_bvec(&dst_bv, src_buf); + kunmap_local(dst_buf); kunmap_local(src_buf); bio_advance_iter_single(src, src_iter, bytes); diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c new file mode 100644 index 000000000000..760a2e1878dd --- /dev/null +++ b/block/blk-cgroup-fc-appid.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "blk-cgroup.h" + +/** + * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg + * @app_id: application identifier + * @cgrp_id: cgroup id + * @app_id_len: size of application identifier + */ +int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len) +{ + struct cgroup *cgrp; + struct cgroup_subsys_state *css; + struct blkcg *blkcg; + int ret = 0; + + if (app_id_len > FC_APPID_LEN) + return -EINVAL; + + cgrp = cgroup_get_from_id(cgrp_id); + if (!cgrp) + return -ENOENT; + css = cgroup_get_e_css(cgrp, &io_cgrp_subsys); + if (!css) { + ret = -ENOENT; + goto out_cgrp_put; + } + blkcg = css_to_blkcg(css); + /* + * There is a slight race condition on setting the appid. + * Worst case an I/O may not find the right id. + * This is no different from the I/O we let pass while obtaining + * the vmid from the fabric. + * Adding the overhead of a lock is not necessary. + */ + strlcpy(blkcg->fc_app_id, app_id, app_id_len); + css_put(css); +out_cgrp_put: + cgroup_put(cgrp); + return ret; +} +EXPORT_SYMBOL_GPL(blkcg_set_fc_appid); + +/** + * blkcg_get_fc_appid - get the fc app identifier associated with a bio + * @bio: target bio + * + * On success return the fc_app_id, on failure return NULL + */ +char *blkcg_get_fc_appid(struct bio *bio) +{ + if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0') + return NULL; + return bio->bi_blkg->blkcg->fc_app_id; +} +EXPORT_SYMBOL_GPL(blkcg_get_fc_appid); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8dfe62786cd5..764e740b0c0f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq; #define BLKG_DESTROY_BATCH_SIZE 64 +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -156,6 +173,33 @@ static void blkg_async_bio_workfn(struct work_struct *work) } /** + * bio_blkcg_css - return the blkcg CSS associated with a bio + * @bio: target bio + * + * This returns the CSS for the blkcg associated with a bio, or %NULL if not + * associated. Callers are expected to either handle %NULL or know association + * has been done prior to calling this. + */ +struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio) +{ + if (!bio || !bio->bi_blkg) + return NULL; + return &bio->bi_blkg->blkcg->css; +} +EXPORT_SYMBOL_GPL(bio_blkcg_css); + +/** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + return css_to_blkcg(blkcg->css.parent); +} + +/** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with * @q: request_queue the new blkg is associated with @@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct blkcg_gq *blkg; int i, ret; - WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(&q->queue_lock); /* request_queue is dying, do not create/recreate a blkg */ @@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) { struct blkg_iostat_set *bis = &blkg->iostat; u64 rbytes, wbytes, rios, wios, dbytes, dios; - bool has_stats = false; const char *dname; unsigned seq; int i; @@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) } while (u64_stats_fetch_retry(&bis->sync, seq)); if (rbytes || wbytes || rios || wios) { - has_stats = true; seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu", rbytes, wbytes, rios, wios, dbytes, dios); } if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { - has_stats = true; seq_printf(s, " use_delay=%d delay_nsec=%llu", atomic_read(&blkg->use_delay), atomic64_read(&blkg->delay_nsec)); @@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) if (!blkg->pd[i] || !pol->pd_stat_fn) continue; - if (pol->pd_stat_fn(blkg->pd[i], s)) - has_stats = true; + pol->pd_stat_fn(blkg->pd[i], s); } - if (has_stats) - seq_printf(s, "\n"); + seq_puts(s, "\n"); } static int blkcg_print_stat(struct seq_file *sf, void *v) @@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = { { } /* terminate */ }; +#ifdef CONFIG_CGROUP_WRITEBACK +struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css) +{ + return &css_to_blkcg(css)->cgwb_list; +} +#endif + /* * blkcg destruction is a three-stage process. * @@ -1016,25 +1061,6 @@ static struct cftype blkcg_legacy_files[] = { */ /** - * blkcg_css_offline - cgroup css_offline callback - * @css: css of interest - * - * This function is called when @css is about to go away. Here the cgwbs are - * offlined first and only once writeback associated with the blkcg has - * finished do we start step 2 (see above). - */ -static void blkcg_css_offline(struct cgroup_subsys_state *css) -{ - struct blkcg *blkcg = css_to_blkcg(css); - - /* this prevents anyone from attaching or migrating to this blkcg */ - wb_blkcg_offline(blkcg); - - /* put the base online pin allowing step 2 to be triggered */ - blkcg_unpin_online(blkcg); -} - -/** * blkcg_destroy_blkgs - responsible for shooting down blkgs * @blkcg: blkcg of interest * @@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) * * This is the blkcg counterpart of ioc_release_fn(). */ -void blkcg_destroy_blkgs(struct blkcg *blkcg) +static void blkcg_destroy_blkgs(struct blkcg *blkcg) { might_sleep(); @@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) spin_unlock_irq(&blkcg->lock); } +/** + * blkcg_pin_online - pin online state + * @blkcg_css: blkcg of interest + * + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. + */ +void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css) +{ + refcount_inc(&css_to_blkcg(blkcg_css)->online_pin); +} + +/** + * blkcg_unpin_online - unpin online state + * @blkcg_css: blkcg of interest + * + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the + * blkcg can continue destruction by calling blkcg_destroy_blkgs(). + */ +void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css) +{ + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; + blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); +} + +/** + * blkcg_css_offline - cgroup css_offline callback + * @css: css of interest + * + * This function is called when @css is about to go away. Here the cgwbs are + * offlined first and only once writeback associated with the blkcg has + * finished do we start step 2 (see above). + */ +static void blkcg_css_offline(struct cgroup_subsys_state *css) +{ + /* this prevents anyone from attaching or migrating to this blkcg */ + wb_blkcg_offline(css); + + /* put the base online pin allowing step 2 to be triggered */ + blkcg_unpin_online(css); +} + static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); @@ -1163,8 +1240,7 @@ unlock: static int blkcg_css_online(struct cgroup_subsys_state *css) { - struct blkcg *blkcg = css_to_blkcg(css); - struct blkcg *parent = blkcg_parent(blkcg); + struct blkcg *parent = blkcg_parent(css_to_blkcg(css)); /* * blkcg_pin_online() is used to delay blkcg offline so that blkgs @@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) * parent so that offline always happens towards the root. */ if (parent) - blkcg_pin_online(parent); + blkcg_pin_online(css); return 0; } @@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q) preloaded = !radix_tree_preload(GFP_KERNEL); /* Make sure the root blkg exists. */ - rcu_read_lock(); + /* spin_lock_irq can serve as RCU read-side critical section. */ spin_lock_irq(&q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); if (IS_ERR(blkg)) goto err_unlock; q->root_blkg = blkg; spin_unlock_irq(&q->queue_lock); - rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); @@ -1234,7 +1309,6 @@ err_destroy_all: return ret; err_unlock: spin_unlock_irq(&q->queue_lock); - rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); return PTR_ERR(blkg); @@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) void blkcg_maybe_throttle_current(void) { struct request_queue *q = current->throttle_queue; - struct cgroup_subsys_state *css; struct blkcg *blkcg; struct blkcg_gq *blkg; bool use_memdelay = current->use_memdelay; @@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void) current->use_memdelay = false; rcu_read_lock(); - css = kthread_blkcg(); - if (css) - blkcg = css_to_blkcg(css); - else - blkcg = css_to_blkcg(task_css(current, io_cgrp_id)); - + blkcg = css_to_blkcg(blkcg_css()); if (!blkcg) goto out; blkg = blkg_lookup(blkcg, q); @@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio) rcu_read_lock(); if (bio->bi_blkg) - css = &bio_blkcg(bio)->css; + css = bio_blkcg_css(bio); else css = blkcg_css(); @@ -1906,12 +1974,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg); */ void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) { - if (dst->bi_blkg) - blkg_put(dst->bi_blkg); - blkg_get(src->bi_blkg); - dst->bi_blkg = src->bi_blkg; - } + if (src->bi_blkg) + bio_associate_blkg_from_css(dst, bio_blkcg_css(src)); } EXPORT_SYMBOL_GPL(bio_clone_blkg_association); @@ -1950,6 +2014,22 @@ void blk_cgroup_bio_start(struct bio *bio) put_cpu(); } +bool blk_cgroup_congested(void) +{ + struct cgroup_subsys_state *css; + bool ret = false; + + rcu_read_lock(); + for (css = blkcg_css(); css; css = css->parent) { + if (atomic_read(&css->cgroup->congestion_count)) { + ret = true; + break; + } + } + rcu_read_unlock(); + return ret; +} + static int __init blkcg_init(void) { blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 47e1e38390c9..d4de0a35e066 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -15,13 +15,101 @@ */ #include <linux/blk-cgroup.h> +#include <linux/cgroup.h> +#include <linux/kthread.h> #include <linux/blk-mq.h> +struct blkcg_gq; +struct blkg_policy_data; + + /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) #ifdef CONFIG_BLK_CGROUP +enum blkg_iostat_type { + BLKG_IOSTAT_READ, + BLKG_IOSTAT_WRITE, + BLKG_IOSTAT_DISCARD, + + BLKG_IOSTAT_NR, +}; + +struct blkg_iostat { + u64 bytes[BLKG_IOSTAT_NR]; + u64 ios[BLKG_IOSTAT_NR]; +}; + +struct blkg_iostat_set { + struct u64_stats_sync sync; + struct blkg_iostat cur; + struct blkg_iostat last; +}; + +/* association between a blk cgroup and a request queue */ +struct blkcg_gq { + /* Pointer to the associated request_queue */ + struct request_queue *q; + struct list_head q_node; + struct hlist_node blkcg_node; + struct blkcg *blkcg; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + + /* reference count */ + struct percpu_ref refcnt; + + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + + struct blkg_iostat_set __percpu *iostat_cpu; + struct blkg_iostat_set iostat; + + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; + + spinlock_t async_bio_lock; + struct bio_list async_bios; + union { + struct work_struct async_bio_work; + struct work_struct free_work; + }; + + atomic_t use_delay; + atomic64_t delay_nsec; + atomic64_t delay_start; + u64 last_delay; + int last_use; + + struct rcu_head rcu_head; +}; + +struct blkcg { + struct cgroup_subsys_state css; + spinlock_t lock; + refcount_t online_pin; + + struct radix_tree_root blkg_tree; + struct blkcg_gq __rcu *blkg_hint; + struct hlist_head blkg_list; + + struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; + + struct list_head all_blkcgs_node; +#ifdef CONFIG_BLK_CGROUP_FC_APPID + char fc_app_id[FC_APPID_LEN]; +#endif +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; +#endif +}; + +static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct blkcg, css) : NULL; +} + /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track @@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, +typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, struct seq_file *s); struct blkcg_policy { @@ -123,52 +211,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, void blkg_conf_finish(struct blkg_conf_ctx *ctx); /** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} - -/** - * __bio_blkcg - internal, inconsistent version to get blkcg - * - * DO NOT USE. - * This function is inconsistent and consequently is dangerous to use. The - * first part of the function returns a blkcg where a reference is owned by the - * bio. This means it does not need to be rcu protected as it cannot go away - * with the bio owning a reference to it. However, the latter potentially gets - * it from task_css(). This can race against task migration and the cgroup - * dying. It is also semantically different as it must be called rcu protected - * and is susceptible to failure when trying to get a reference to it. - * Therefore, it is not ok to assume that *_get() will always succeed on the - * blkcg returned here. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - -/** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg * @return: true if this bio needs to be submitted with the root blkg context. * * In order to avoid priority inversions we sometimes need to issue a bio as if * it were attached to the root blkg, and then backcharge to the actual owning - * blkg. The idea is we do bio_blkcg() to look up the actual context for the - * bio and attach the appropriate blkg to the bio. Then we call this helper and - * if it is true run with the root blkg for that queue and then do any + * blkg. The idea is we do bio_blkcg_css() to look up the actual context for + * the bio and attach the appropriate blkg to the bio. Then we call this helper + * and if it is true run with the root blkg for that queue and then do any * backcharging to the originating cgroup once the io is complete. */ static inline bool bio_issue_as_root_blkg(struct bio *bio) @@ -457,7 +507,8 @@ struct blkcg_policy_data { struct blkcg_policy { }; -#ifdef CONFIG_BLOCK +struct blkcg { +}; static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) @@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } - static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, struct blkcg_policy *pol) { return NULL; } static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } @@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) -#endif /* CONFIG_BLOCK */ #endif /* CONFIG_BLK_CGROUP */ #endif /* _BLK_CGROUP_PRIVATE_H */ diff --git a/block/blk-core.c b/block/blk-core.c index ee18b6a699bd..06ff5bbfe8f6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -50,7 +50,6 @@ #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" -#include "blk-rq-qos.h" struct dentry *blk_debugfs_root; @@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); - /* cleanup rq qos structures for queue without disk */ - rq_qos_exit(q); - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); blk_sync_queue(q); @@ -592,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio) (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" - "%pg: rw=%d, want=%llu, limit=%llu\n", - current->comm, - bio->bi_bdev, bio->bi_opf, - bio_end_sector(bio), maxsector); + "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n", + current->comm, bio->bi_bdev, bio->bi_opf, + bio->bi_iter.bi_sector, nr_sectors, maxsector); return -EIO; } return 0; @@ -893,19 +888,11 @@ void submit_bio(struct bio *bio) if (blkcg_punt_bio_submit(bio)) return; - /* - * If it's a regular read/write or a barrier with data attached, - * go through the normal accounting stuff before submission. - */ - if (bio_has_data(bio)) { - unsigned int count = bio_sectors(bio); - - if (op_is_write(bio_op(bio))) { - count_vm_events(PGPGOUT, count); - } else { - task_io_account_read(bio->bi_iter.bi_size); - count_vm_events(PGPGIN, count); - } + if (bio_op(bio) == REQ_OP_READ) { + task_io_account_read(bio->bi_iter.bi_size); + count_vm_events(PGPGIN, bio_sectors(bio)); + } else if (bio_op(bio) == REQ_OP_WRITE) { + count_vm_events(PGPGOUT, bio_sectors(bio)); } /* @@ -952,7 +939,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) blk_flush_plug(current->plug, false); - if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) + if (bio_queue_enter(bio)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); @@ -1022,21 +1009,22 @@ again: } } -static unsigned long __part_start_io_acct(struct block_device *part, - unsigned int sectors, unsigned int op, - unsigned long start_time) +unsigned long bdev_start_io_acct(struct block_device *bdev, + unsigned int sectors, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); part_stat_lock(); - update_io_ticks(part, start_time, false); - part_stat_inc(part, ios[sgrp]); - part_stat_add(part, sectors[sgrp], sectors); - part_stat_local_inc(part, in_flight[op_is_write(op)]); + update_io_ticks(bdev, start_time, false); + part_stat_inc(bdev, ios[sgrp]); + part_stat_add(bdev, sectors[sgrp], sectors); + part_stat_local_inc(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); return start_time; } +EXPORT_SYMBOL(bdev_start_io_acct); /** * bio_start_io_acct_time - start I/O accounting for bio based drivers @@ -1045,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part, */ void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) { - __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), start_time); + bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), start_time); } EXPORT_SYMBOL_GPL(bio_start_io_acct_time); @@ -1058,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time); */ unsigned long bio_start_io_acct(struct bio *bio) { - return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), jiffies); + return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); -unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, - unsigned int op) -{ - return __part_start_io_acct(disk->part0, sectors, op, jiffies); -} -EXPORT_SYMBOL(disk_start_io_acct); - -static void __part_end_io_acct(struct block_device *part, unsigned int op, - unsigned long start_time) +void bdev_end_io_acct(struct block_device *bdev, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); unsigned long duration = now - start_time; part_stat_lock(); - update_io_ticks(part, now, true); - part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); - part_stat_local_dec(part, in_flight[op_is_write(op)]); + update_io_ticks(bdev, now, true); + part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); + part_stat_local_dec(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); } +EXPORT_SYMBOL(bdev_end_io_acct); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, - struct block_device *orig_bdev) + struct block_device *orig_bdev) { - __part_end_io_acct(orig_bdev, bio_op(bio), start_time); + bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); } EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); -void disk_end_io_acct(struct gendisk *disk, unsigned int op, - unsigned long start_time) -{ - __part_end_io_acct(disk->part0, op, start_time); -} -EXPORT_SYMBOL(disk_end_io_acct); - /** * blk_lld_busy - Check if underlying low-level drivers of a device are busy * @q : the queue of the device being checked diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index 5d1aa5b1d30a..621abd1b0e4d 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -179,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) bio->bi_io_vec[bio->bi_vcnt++] = bv; bio_clone_blkg_association(bio, bio_src); - blkcg_bio_issue_init(bio); return bio; } diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index 18c68d8b9138..56ed48d2954e 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -54,13 +54,8 @@ static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj, container_of(attr, struct blk_ia_range_sysfs_entry, attr); struct blk_independent_access_range *iar = container_of(kobj, struct blk_independent_access_range, kobj); - ssize_t ret; - mutex_lock(&iar->queue->sysfs_lock); - ret = entry->show(iar, buf); - mutex_unlock(&iar->queue->sysfs_lock); - - return ret; + return entry->show(iar, buf); } static const struct sysfs_ops blk_ia_range_sysfs_ops = { diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 70a0a3d680a3..33a11ba971ea 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -533,8 +533,7 @@ struct ioc_gq { /* statistics */ struct iocg_pcpu_stat __percpu *pcpu_stat; - struct iocg_stat local_stat; - struct iocg_stat desc_stat; + struct iocg_stat stat; struct iocg_stat last_stat; u64 last_stat_abs_vusage; u64 usage_delta_us; @@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) return true; } else { if (iocg->indelay_since) { - iocg->local_stat.indelay_us += now->now - iocg->indelay_since; + iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = 0; } iocg->delay = 0; @@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay, /* if debt is paid in full, restore inuse */ if (!iocg->abs_vdebt) { - iocg->local_stat.indebt_us += now->now - iocg->indebt_since; + iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = 0; propagate_weights(iocg, iocg->active, iocg->last_inuse, @@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt, if (!waitqueue_active(&iocg->waitq)) { if (iocg->wait_since) { - iocg->local_stat.wait_us += now->now - iocg->wait_since; + iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = 0; } return; @@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg, } } +/* propagate the deltas to the parent */ +static void iocg_flush_stat_upward(struct ioc_gq *iocg) +{ + if (iocg->level > 0) { + struct iocg_stat *parent_stat = + &iocg->ancestors[iocg->level - 1]->stat; + + parent_stat->usage_us += + iocg->stat.usage_us - iocg->last_stat.usage_us; + parent_stat->wait_us += + iocg->stat.wait_us - iocg->last_stat.wait_us; + parent_stat->indebt_us += + iocg->stat.indebt_us - iocg->last_stat.indebt_us; + parent_stat->indelay_us += + iocg->stat.indelay_us - iocg->last_stat.indelay_us; + } + + iocg->last_stat = iocg->stat; +} + /* collect per-cpu counters and propagate the deltas to the parent */ -static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now) +static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; - struct iocg_stat new_stat; u64 abs_vusage = 0; u64 vusage_delta; int cpu; @@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now) iocg->last_stat_abs_vusage = abs_vusage; iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate); - iocg->local_stat.usage_us += iocg->usage_delta_us; - - /* propagate upwards */ - new_stat.usage_us = - iocg->local_stat.usage_us + iocg->desc_stat.usage_us; - new_stat.wait_us = - iocg->local_stat.wait_us + iocg->desc_stat.wait_us; - new_stat.indebt_us = - iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us; - new_stat.indelay_us = - iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us; - - /* propagate the deltas to the parent */ - if (iocg->level > 0) { - struct iocg_stat *parent_stat = - &iocg->ancestors[iocg->level - 1]->desc_stat; + iocg->stat.usage_us += iocg->usage_delta_us; - parent_stat->usage_us += - new_stat.usage_us - iocg->last_stat.usage_us; - parent_stat->wait_us += - new_stat.wait_us - iocg->last_stat.wait_us; - parent_stat->indebt_us += - new_stat.indebt_us - iocg->last_stat.indebt_us; - parent_stat->indelay_us += - new_stat.indelay_us - iocg->last_stat.indelay_us; - } - - iocg->last_stat = new_stat; + iocg_flush_stat_upward(iocg); } /* get stat counters ready for reading on all active iocgs */ @@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now) /* flush leaves and build inner node walk list */ list_for_each_entry(iocg, target_iocgs, active_list) { - iocg_flush_stat_one(iocg, now); + iocg_flush_stat_leaf(iocg, now); iocg_build_inner_walk(iocg, &inner_walk); } /* keep flushing upwards by walking the inner list backwards */ list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) { - iocg_flush_stat_one(iocg, now); + iocg_flush_stat_upward(iocg); list_del_init(&iocg->walk_list); } } @@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) /* flush wait and indebt stat deltas */ if (iocg->wait_since) { - iocg->local_stat.wait_us += now->now - iocg->wait_since; + iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = now->now; } if (iocg->indebt_since) { - iocg->local_stat.indebt_us += + iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = now->now; } if (iocg->indelay_since) { - iocg->local_stat.indelay_us += + iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = now->now; } @@ -2322,7 +2315,17 @@ static void ioc_timer_fn(struct timer_list *timer) iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); - } else { + } else if (!iocg->abs_vdebt) { + /* + * @iocg doesn't have enough to donate. Reset + * its inuse to active. + * + * Don't reset debtors as their inuse's are + * owned by debt handling. This shouldn't affect + * donation calculuation in any meaningful way + * as @iocg doesn't have a meaningful amount of + * share anyway. + */ TRACE_IOCG_PATH(inuse_shortage, iocg, &now, iocg->inuse, iocg->active, iocg->hweight_inuse, new_hwi); @@ -2995,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd) kfree(iocg); } -static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) +static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; if (!ioc->enabled) - return false; + return; if (iocg->level == 0) { unsigned vp10k = DIV64_U64_ROUND_CLOSEST( @@ -3017,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) iocg->last_stat.wait_us, iocg->last_stat.indebt_us, iocg->last_stat.indelay_us); - return true; } static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2f33932e72e3..9568bf8dfe82 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -87,7 +87,17 @@ struct iolatency_grp; struct blk_iolatency { struct rq_qos rqos; struct timer_list timer; - atomic_t enabled; + + /* + * ->enabled is the master enable switch gating the throttling logic and + * inflight tracking. The number of cgroups which have iolat enabled is + * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly + * from ->enable_work with the request_queue frozen. For details, See + * blkiolatency_enable_work_fn(). + */ + bool enabled; + atomic_t enable_cnt; + struct work_struct enable_work; }; static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) @@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) return container_of(rqos, struct blk_iolatency, rqos); } -static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat) -{ - return atomic_read(&blkiolat->enabled) > 0; -} - struct child_latency_info { spinlock_t lock; @@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); - if (!blk_iolatency_enabled(blkiolat)) + if (!blkiolat->enabled) return; while (blkg && blkg->parent) { @@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 window_start; u64 now; bool issue_as_root = bio_issue_as_root_blkg(bio); - bool enabled = false; int inflight = 0; blkg = bio->bi_blkg; @@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) if (!iolat) return; - enabled = blk_iolatency_enabled(iolat->blkiolat); - if (!enabled) + if (!iolat->blkiolat->enabled) return; now = ktime_to_ns(ktime_get()); @@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); del_timer_sync(&blkiolat->timer); + flush_work(&blkiolat->enable_work); blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency); kfree(blkiolat); } @@ -716,6 +720,44 @@ next: rcu_read_unlock(); } +/** + * blkiolatency_enable_work_fn - Enable or disable iolatency on the device + * @work: enable_work of the blk_iolatency of interest + * + * iolatency needs to keep track of the number of in-flight IOs per cgroup. This + * is relatively expensive as it involves walking up the hierarchy twice for + * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we + * want to disable the in-flight tracking. + * + * We have to make sure that the counting is balanced - we don't want to leak + * the in-flight counts by disabling accounting in the completion path while IOs + * are in flight. This is achieved by ensuring that no IO is in flight by + * freezing the queue while flipping ->enabled. As this requires a sleepable + * context, ->enabled flipping is punted to this work function. + */ +static void blkiolatency_enable_work_fn(struct work_struct *work) +{ + struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency, + enable_work); + bool enabled; + + /* + * There can only be one instance of this function running for @blkiolat + * and it's guaranteed to be executed at least once after the latest + * ->enabled_cnt modification. Acting on the latest ->enable_cnt is + * sufficient. + * + * Also, we know @blkiolat is safe to access as ->enable_work is flushed + * in blkcg_iolatency_exit(). + */ + enabled = atomic_read(&blkiolat->enable_cnt); + if (enabled != blkiolat->enabled) { + blk_mq_freeze_queue(blkiolat->rqos.q); + blkiolat->enabled = enabled; + blk_mq_unfreeze_queue(blkiolat->rqos.q); + } +} + int blk_iolatency_init(struct request_queue *q) { struct blk_iolatency *blkiolat; @@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q) } timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); + INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); return 0; } -/* - * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise - * return 0. - */ -static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, BLKIOLATENCY_MAX_WIN_SIZE); - if (!oldval && val) - return 1; + if (!oldval && val) { + if (atomic_inc_return(&blkiolat->enable_cnt) == 1) + schedule_work(&blkiolat->enable_work); + } if (oldval && !val) { blkcg_clear_delay(blkg); - return -1; + if (atomic_dec_return(&blkiolat->enable_cnt) == 0) + schedule_work(&blkiolat->enable_work); } - return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; - int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - enable = iolatency_set_min_lat_nsec(blkg, lat_val); - if (enable) { - if (!blk_get_queue(blkg->q)) { - ret = -ENODEV; - goto out; - } - - blkg_get(blkg); - } - - if (oldval != iolat->min_lat_nsec) { + iolatency_set_min_lat_nsec(blkg, lat_val); + if (oldval != iolat->min_lat_nsec) iolatency_clear_scaling(blkg); - } - ret = 0; out: blkg_conf_finish(&ctx); - if (ret == 0 && enable) { - struct iolatency_grp *tmp = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = tmp->blkiolat; - - blk_mq_freeze_queue(blkg->q); - - if (enable == 1) - atomic_inc(&blkiolat->enabled); - else if (enable == -1) - atomic_dec(&blkiolat->enabled); - else - WARN_ON_ONCE(1); - - blk_mq_unfreeze_queue(blkg->q); - - blkg_put(blkg); - blk_put_queue(blkg->q); - } return ret ?: nbytes; } @@ -891,7 +903,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v) return 0; } -static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) +static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) { struct latency_stat stat; int cpu; @@ -914,17 +926,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) (unsigned long long)stat.ps.missed, (unsigned long long)stat.ps.total, iolat->rq_depth.max_depth); - return true; } -static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) +static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct iolatency_grp *iolat = pd_to_lat(pd); unsigned long long avg_lat; unsigned long long cur_win; if (!blkcg_debug_stats) - return false; + return; if (iolat->ssd) return iolatency_ssd_stat(iolat, s); @@ -937,7 +948,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) else seq_printf(s, " depth=%u avg_lat=%llu win=%llu", iolat->rq_depth.max_depth, avg_lat, cur_win); - return true; } static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, @@ -1007,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); - struct blk_iolatency *blkiolat = iolat->blkiolat; - int ret; - ret = iolatency_set_min_lat_nsec(blkg, 0); - if (ret == 1) - atomic_inc(&blkiolat->enabled); - if (ret == -1) - atomic_dec(&blkiolat->enabled); + iolatency_set_min_lat_nsec(blkg, 0); iolatency_clear_scaling(blkg); } diff --git a/block/blk-map.c b/block/blk-map.c index 7ffde64f9019..df8b066cd548 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -262,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - if (unlikely(offs & queue_dma_alignment(rq->q))) { - ret = -EINVAL; + if (unlikely(offs & queue_dma_alignment(rq->q))) j = 0; - } else { + else { for (j = 0; j < npages; j++) { struct page *page = pages[j]; unsigned int n = PAGE_SIZE - offs; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 68ac23d0b640..2dcd738c6952 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -228,7 +228,6 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, BUG_ON(real_tag >= tags->nr_tags); sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); } else { - BUG_ON(tag >= tags->nr_reserved_tags); sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); } } diff --git a/block/blk-mq.c b/block/blk-mq.c index c4370d276170..e9bf950983c7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -133,7 +133,8 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv, { struct mq_inflight *mi = priv; - if ((!mi->part->bd_partno || rq->part == mi->part) && + if (rq->part && blk_do_io_stat(rq) && + (!mi->part->bd_partno || rq->part == mi->part) && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; @@ -1083,7 +1084,7 @@ bool blk_mq_complete_request_remote(struct request *rq) WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); /* - * For a polled request, always complete locallly, it's pointless + * For a polled request, always complete locally, it's pointless * to redirect the completion. */ if (rq->cmd_flags & REQ_POLLED) @@ -1131,14 +1132,7 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { - u64 start_time; -#ifdef CONFIG_BLK_CGROUP - if (rq->bio) - start_time = bio_issue_time(&rq->bio->bi_issue); - else -#endif - start_time = ktime_get_ns(); - rq->io_start_time_ns = start_time; + rq->io_start_time_ns = ktime_get_ns(); rq->stats_sectors = blk_rq_sectors(rq); rq->rq_flags |= RQF_STATS; rq_qos_issue(q, rq); @@ -1158,29 +1152,44 @@ void blk_mq_start_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_start_request); -/** - * blk_end_sync_rq - executes a completion event on a request - * @rq: request to complete - * @error: end I/O status of the request +/* + * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple + * queues. This is important for md arrays to benefit from merging + * requests. */ -static void blk_end_sync_rq(struct request *rq, blk_status_t error) +static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) { - struct completion *waiting = rq->end_io_data; + if (plug->multiple_queues) + return BLK_MAX_REQUEST_COUNT * 2; + return BLK_MAX_REQUEST_COUNT; +} - rq->end_io_data = (void *)(uintptr_t)error; +static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) +{ + struct request *last = rq_list_peek(&plug->mq_list); - /* - * complete last, if this is a stack request the process (and thus - * the rq pointer) could be invalid right after this complete() - */ - complete(waiting); + if (!plug->rq_count) { + trace_block_plug(rq->q); + } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || + (!blk_queue_nomerges(rq->q) && + blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { + blk_mq_flush_plug_list(plug, false); + trace_block_plug(rq->q); + } + + if (!plug->multiple_queues && last && last->q != rq->q) + plug->multiple_queues = true; + if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) + plug->has_elevator = true; + rq->rq_next = NULL; + rq_list_add(&plug->mq_list, rq); + plug->rq_count++; } /** * blk_execute_rq_nowait - insert a request to I/O scheduler for execution * @rq: request to insert * @at_head: insert request at head or tail of queue - * @done: I/O completion handler * * Description: * Insert a fully prepared request at the back of the I/O scheduler queue @@ -1189,23 +1198,32 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error) * Note: * This function will invoke @done directly if the queue is dead. */ -void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done) +void blk_execute_rq_nowait(struct request *rq, bool at_head) { WARN_ON(irqs_disabled()); WARN_ON(!blk_rq_is_passthrough(rq)); - rq->end_io = done; - blk_account_io_start(rq); - - /* - * don't check dying flag for MQ because the request won't - * be reused after dying flag is set - */ - blk_mq_sched_insert_request(rq, at_head, true, false); + if (current->plug) + blk_add_rq_to_plug(current->plug, rq); + else + blk_mq_sched_insert_request(rq, at_head, true, false); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); +struct blk_rq_wait { + struct completion done; + blk_status_t ret; +}; + +static void blk_end_sync_rq(struct request *rq, blk_status_t ret) +{ + struct blk_rq_wait *wait = rq->end_io_data; + + wait->ret = ret; + complete(&wait->done); +} + static bool blk_rq_is_poll(struct request *rq) { if (!rq->mq_hctx) @@ -1237,25 +1255,37 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait) */ blk_status_t blk_execute_rq(struct request *rq, bool at_head) { - DECLARE_COMPLETION_ONSTACK(wait); - unsigned long hang_check; + struct blk_rq_wait wait = { + .done = COMPLETION_INITIALIZER_ONSTACK(wait.done), + }; + + WARN_ON(irqs_disabled()); + WARN_ON(!blk_rq_is_passthrough(rq)); rq->end_io_data = &wait; - blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq); + rq->end_io = blk_end_sync_rq; - /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + blk_account_io_start(rq); + blk_mq_sched_insert_request(rq, at_head, true, false); - if (blk_rq_is_poll(rq)) - blk_rq_poll_completion(rq, &wait); - else if (hang_check) - while (!wait_for_completion_io_timeout(&wait, - hang_check * (HZ/2))) - ; - else - wait_for_completion_io(&wait); + if (blk_rq_is_poll(rq)) { + blk_rq_poll_completion(rq, &wait.done); + } else { + /* + * Prevent hang_check timer from firing at us during very long + * I/O + */ + unsigned long hang_check = sysctl_hung_task_timeout_secs; + + if (hang_check) + while (!wait_for_completion_io_timeout(&wait.done, + hang_check * (HZ/2))) + ; + else + wait_for_completion_io(&wait.done); + } - return (blk_status_t)(uintptr_t)rq->end_io_data; + return wait.ret; } EXPORT_SYMBOL(blk_execute_rq); @@ -2130,8 +2160,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q) */ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) { - struct blk_mq_hw_ctx *hctx; - + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); /* * If the IO scheduler does not respect hardware queues when * dispatching, we just don't bother with multiple HW queues and @@ -2139,8 +2168,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) * just causes lock contention inside the scheduler and pointless cache * bouncing. */ - hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT, - raw_smp_processor_id()); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx); + if (!blk_mq_hctx_stopped(hctx)) return hctx; return NULL; @@ -2683,40 +2712,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, hctx->queue->mq_ops->commit_rqs(hctx); } -/* - * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple - * queues. This is important for md arrays to benefit from merging - * requests. - */ -static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) -{ - if (plug->multiple_queues) - return BLK_MAX_REQUEST_COUNT * 2; - return BLK_MAX_REQUEST_COUNT; -} - -static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) -{ - struct request *last = rq_list_peek(&plug->mq_list); - - if (!plug->rq_count) { - trace_block_plug(rq->q); - } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || - (!blk_queue_nomerges(rq->q) && - blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { - blk_mq_flush_plug_list(plug, false); - trace_block_plug(rq->q); - } - - if (!plug->multiple_queues && last && last->q != rq->q) - plug->multiple_queues = true; - if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) - plug->has_elevator = true; - rq->rq_next = NULL; - rq_list_add(&plug->mq_list, rq); - plug->rq_count++; -} - static bool blk_mq_attempt_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 469c483719be..139b2d7a99e2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) break; \ if ((__tg)) { \ blk_add_cgroup_trace_msg(__td->queue, \ - tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\ + &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\ } else { \ blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \ } \ @@ -2189,13 +2189,14 @@ again: } out_unlock: - spin_unlock_irq(&q->queue_lock); bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY; #endif + spin_unlock_irq(&q->queue_lock); + rcu_read_unlock(); return throttled; } diff --git a/block/bounce.c b/block/bounce.c index 467be46d0e65..8f7b6fe3b4db 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) goto err_put; bio_clone_blkg_association(bio, bio_src); - blkcg_bio_issue_init(bio); return bio; diff --git a/block/fops.c b/block/fops.c index e3643362c244..d6b3276a6c68 100644 --- a/block/fops.c +++ b/block/fops.c @@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) #define DIO_INLINE_BIO_VECS 4 -static void blkdev_bio_end_io_simple(struct bio *bio) -{ - struct task_struct *waiter = bio->bi_private; - - WRITE_ONCE(bio->bi_private, NULL); - blk_wake_io_task(waiter); -} - static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, unsigned int nr_pages) { @@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; - bio.bi_private = current; - bio.bi_end_io = blkdev_bio_end_io_simple; bio.bi_ioprio = iocb->ki_ioprio; ret = bio_iov_iter_get_pages(&bio, iter); @@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iocb->ki_flags & IOCB_NOWAIT) bio.bi_opf |= REQ_NOWAIT; - if (iocb->ki_flags & IOCB_HIPRI) - bio_set_polled(&bio, iocb); - submit_bio(&bio); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!READ_ONCE(bio.bi_private)) - break; - if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) - blk_io_schedule(); - } - __set_current_state(TASK_RUNNING); + submit_bio_wait(&bio); bio_release_pages(&bio, should_dirty); if (unlikely(bio.bi_status)) @@ -392,9 +372,9 @@ static int blkdev_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, blkdev_get_block, wbc); } -static int blkdev_readpage(struct file * file, struct page * page) +static int blkdev_read_folio(struct file *file, struct folio *folio) { - return block_read_full_page(page, blkdev_get_block); + return block_read_full_folio(folio, blkdev_get_block); } static void blkdev_readahead(struct readahead_control *rac) @@ -403,11 +383,9 @@ static void blkdev_readahead(struct readahead_control *rac) } static int blkdev_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, struct page **pagep, - void **fsdata) + loff_t pos, unsigned len, struct page **pagep, void **fsdata) { - return block_write_begin(mapping, pos, len, flags, pagep, - blkdev_get_block); + return block_write_begin(mapping, pos, len, pagep, blkdev_get_block); } static int blkdev_write_end(struct file *file, struct address_space *mapping, @@ -432,7 +410,7 @@ static int blkdev_writepages(struct address_space *mapping, const struct address_space_operations def_blk_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, - .readpage = blkdev_readpage, + .read_folio = blkdev_read_folio, .readahead = blkdev_readahead, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 3ed5eaf3446a..6ed602b2f80a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, if (at_head) { list_add(&rq->queuelist, &per_prio->dispatch); + rq->fifo_time = jiffies; } else { deadline_add_rq_rb(per_prio, rq); diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c index 2c381c694c57..d2fc122d7426 100644 --- a/block/partitions/acorn.c +++ b/block/partitions/acorn.c @@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state) #ifdef CONFIG_ACORN_PARTITION_RISCIX case PARTITION_RISCIX_SCSI: case PARTITION_RISCIX_MFM: - slot = riscix_partition(state, start_sect, slot, + riscix_partition(state, start_sect, slot, nr_sects); break; #endif case PARTITION_LINUX: - slot = linux_partition(state, start_sect, slot, + linux_partition(state, start_sect, slot, nr_sects); break; } diff --git a/block/partitions/atari.c b/block/partitions/atari.c index da5994175416..9655c728262a 100644 --- a/block/partitions/atari.c +++ b/block/partitions/atari.c @@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state) /* accept only GEM,BGM,RAW,LNX,SWP partitions */ if (!((pi->flg & 1) && OK_id(pi->id))) continue; - part_fmt = 2; put_partition (state, slot, be32_to_cpu(pi->st), be32_to_cpu(pi->siz)); diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 27f6c7d9c776..38e58960ae03 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb) len = r_cols; } else { r_stripe = 0; - r_cols = 0; len = r_parent; } if (len < 0) @@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb) r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid); r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1); len = r_id2; - } else { - r_id1 = 0; - r_id2 = 0; + } else len = r_diskid; - } if (len < 0) return false; @@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb) r_id1 = ldm_relative (buffer, buflen, 0x44, r_name); r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1); len = r_id2; - } else { - r_id1 = 0; - r_id2 = 0; + } else len = r_name; - } if (len < 0) return false; @@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) return false; } len = r_index; - } else { - r_index = 0; + } else len = r_diskid; - } if (len < 0) { ldm_error("len %d < 0", len); return false; |