diff options
Diffstat (limited to 'include/linux/blk-cgroup.h')
-rw-r--r-- | include/linux/blk-cgroup.h | 146 |
1 files changed, 143 insertions, 3 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6c666fd7de3c..34aec30e06c7 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -35,6 +35,7 @@ enum blkg_rwstat_type { BLKG_RWSTAT_WRITE, BLKG_RWSTAT_SYNC, BLKG_RWSTAT_ASYNC, + BLKG_RWSTAT_DISCARD, BLKG_RWSTAT_NR, BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, @@ -136,6 +137,12 @@ struct blkcg_gq { struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct rcu_head rcu_head; + + atomic_t use_delay; + atomic64_t delay_nsec; + atomic64_t delay_start; + u64 last_delay; + int last_use; }; typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); @@ -148,6 +155,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); +typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, + size_t size); struct blkcg_policy { int plid; @@ -167,6 +176,7 @@ struct blkcg_policy { blkcg_pol_offline_pd_fn *pd_offline_fn; blkcg_pol_free_pd_fn *pd_free_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; + blkcg_pol_stat_pd_fn *pd_stat_fn; }; extern struct blkcg blkcg_root; @@ -238,6 +248,42 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) return css_to_blkcg(task_css(current, io_cgrp_id)); } +static inline bool blk_cgroup_congested(void) +{ + struct cgroup_subsys_state *css; + bool ret = false; + + rcu_read_lock(); + css = kthread_blkcg(); + if (!css) + css = task_css(current, io_cgrp_id); + while (css) { + if (atomic_read(&css->cgroup->congestion_count)) { + ret = true; + break; + } + css = css->parent; + } + rcu_read_unlock(); + return ret; +} + +/** + * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg + * @return: true if this bio needs to be submitted with the root blkg context. + * + * In order to avoid priority inversions we sometimes need to issue a bio as if + * it were attached to the root blkg, and then backcharge to the actual owning + * blkg. The idea is we do bio_blkcg() to look up the actual context for the + * bio and attach the appropriate blkg to the bio. Then we call this helper and + * if it is true run with the root blkg for that queue and then do any + * backcharging to the originating cgroup once the io is complete. + */ +static inline bool bio_issue_as_root_blkg(struct bio *bio) +{ + return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; +} + /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest @@ -296,6 +342,17 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, } /** + * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair + * @q: request_queue of interest + * + * Lookup blkg for @q at the root level. See also blkg_lookup(). + */ +static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) +{ + return q->root_blkg; +} + +/** * blkg_to_pdata - get policy private data * @blkg: blkg of interest * @pol: policy of interest @@ -355,6 +412,21 @@ static inline void blkg_get(struct blkcg_gq *blkg) atomic_inc(&blkg->refcnt); } +/** + * blkg_try_get - try and get a blkg reference + * @blkg: blkg to get + * + * This is for use when doing an RCU lookup of the blkg. We may be in the midst + * of freeing this blkg, so we can only use it if the refcnt is not zero. + */ +static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +{ + if (atomic_inc_not_zero(&blkg->refcnt)) + return blkg; + return NULL; +} + + void __blkg_release_rcu(struct rcu_head *rcu); /** @@ -589,7 +661,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, { struct percpu_counter *cnt; - if (op_is_write(op)) + if (op_is_discard(op)) + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; + else if (op_is_write(op)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; @@ -706,8 +780,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, if (!throtl) { blkg = blkg ?: q->root_blkg; - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, - bio->bi_iter.bi_size); + /* + * If the bio is flagged with BIO_QUEUE_ENTERED it means this + * is a split bio and we would have already accounted for the + * size of the bio. + */ + if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) + blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, + bio->bi_iter.bi_size); blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } @@ -715,6 +795,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, return !throtl; } +static inline void blkcg_use_delay(struct blkcg_gq *blkg) +{ + if (atomic_add_return(1, &blkg->use_delay) == 1) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); +} + +static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) +{ + int old = atomic_read(&blkg->use_delay); + + if (old == 0) + return 0; + + /* + * We do this song and dance because we can race with somebody else + * adding or removing delay. If we just did an atomic_dec we'd end up + * negative and we'd already be in trouble. We need to subtract 1 and + * then check to see if we were the last delay so we can drop the + * congestion count on the cgroup. + */ + while (old) { + int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); + if (cur == old) + break; + old = cur; + } + + if (old == 0) + return 0; + if (old == 1) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); + return 1; +} + +static inline void blkcg_clear_delay(struct blkcg_gq *blkg) +{ + int old = atomic_read(&blkg->use_delay); + if (!old) + return; + /* We only want 1 person clearing the congestion count for this blkg. */ + while (old) { + int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); + if (cur == old) { + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); + break; + } + old = cur; + } +} + +void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); +void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_maybe_throttle_current(void); #else /* CONFIG_BLK_CGROUP */ struct blkcg { @@ -734,9 +867,16 @@ struct blkcg_policy { #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) +static inline void blkcg_maybe_throttle_current(void) { } +static inline bool blk_cgroup_congested(void) { return false; } + #ifdef CONFIG_BLOCK +static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } + static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } +static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) +{ return NULL; } static inline int blkcg_init_queue(struct request_queue *q) { return 0; } static inline void blkcg_drain_queue(struct request_queue *q) { } static inline void blkcg_exit_queue(struct request_queue *q) { } |