From 3c547865902e9fc30dc15941f326fd8039c6628d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:10 -0800 Subject: blkcg: make blkcg_gq's hierarchical Currently a child blkg (blkcg_gq) can be created even if its parent doesn't exist. ie. Given a blkg, it's not guaranteed that its ancestors will exist. This makes it difficult to implement proper hierarchy support for blkcg policies. Always create blkgs recursively and make a child blkg hold a reference to its parent. blkg->parent is added so that finding the parent is easy. blkcg_parent() is also added in the process. This change can be visible to userland. e.g. while issuing IO in a nested cgroup didn't affect the ancestors at all, now it will initialize all ancestor blkgs and zero stats for the request_queue will always appear on them. While this is userland visible, this shouldn't cause any functional difference. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- block/blk-cgroup.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'block/blk-cgroup.h') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 24597309e23d..b26ed58899fe 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -94,8 +94,13 @@ struct blkcg_gq { struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + /* request allocation list for this blkcg-q pair */ struct request_list rl; + /* reference count */ int refcnt; @@ -180,6 +185,19 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) return task_blkcg(current); } +/** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + struct cgroup *pcg = blkcg->css.cgroup->parent; + + return pcg ? cgroup_to_blkcg(pcg) : NULL; +} + /** * blkg_to_pdata - get policy private data * @blkg: blkg of interest -- cgit v1.2.3 From e71357e118bdd4057e3bc020b9d80fecdd08f588 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:10 -0800 Subject: cfq-iosched: add leaf_weight cfq blkcg is about to grow proper hierarchy handling, where a child blkg's weight would nest inside the parent's. This makes tasks in a blkg to compete against both tasks in the sibling blkgs and the tasks of child blkgs. We're gonna use the existing weight as the group weight which decides the blkg's weight against its siblings. This patch introduces a new weight - leaf_weight - which decides the weight of a blkg against the child blkgs. It's named leaf_weight because another way to look at it is that each internal blkg nodes have a hidden child leaf node which contains all its tasks and leaf_weight is the weight of the leaf node and handled the same as the weight of the child blkgs. This patch only adds leaf_weight fields and exposes it to userland. The new weight isn't actually used anywhere yet. Note that cfq-iosched currently offcially supports only single level hierarchy and root blkgs compete with the first level blkgs - ie. root weight is basically being used as leaf_weight. For root blkgs, the two weights are kept in sync for backward compatibility. v2: cfqd->root_group->leaf_weight initialization was missing from cfq_init_queue() causing divide by zero when !CONFIG_CFQ_GROUP_SCHED. Fix it. Reported by Fengguang. Signed-off-by: Tejun Heo Cc: Fengguang Wu --- block/blk-cgroup.c | 4 +- block/blk-cgroup.h | 1 + block/cfq-iosched.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 130 insertions(+), 9 deletions(-) (limited to 'block/blk-cgroup.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 942f344fdfa7..10e1df9da46e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -26,7 +26,8 @@ static DEFINE_MUTEX(blkcg_pol_mutex); -struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; +struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, + .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; @@ -710,6 +711,7 @@ static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) return ERR_PTR(-ENOMEM); blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; + blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ done: spin_lock_init(&blkcg->lock); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index b26ed58899fe..24462258200e 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -54,6 +54,7 @@ struct blkcg { /* TODO: per-policy storage in blkcg */ unsigned int cfq_weight; /* belongs to cfq */ + unsigned int cfq_leaf_weight; }; struct blkg_stat { diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index bc076f45ca46..175218d66d67 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -223,10 +223,21 @@ struct cfq_group { /* group service_tree key */ u64 vdisktime; + + /* + * There are two weights - (internal) weight is the weight of this + * cfqg against the sibling cfqgs. leaf_weight is the wight of + * this cfqg against the child cfqgs. For the root cfqg, both + * weights are kept in sync for backward compatibility. + */ unsigned int weight; unsigned int new_weight; unsigned int dev_weight; + unsigned int leaf_weight; + unsigned int new_leaf_weight; + unsigned int dev_leaf_weight; + /* number of cfqq currently on this group */ int nr_cfqq; @@ -1182,10 +1193,16 @@ static void cfq_update_group_weight(struct cfq_group *cfqg) { BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); + if (cfqg->new_weight) { cfqg->weight = cfqg->new_weight; cfqg->new_weight = 0; } + + if (cfqg->new_leaf_weight) { + cfqg->leaf_weight = cfqg->new_leaf_weight; + cfqg->new_leaf_weight = 0; + } } static void @@ -1348,6 +1365,7 @@ static void cfq_pd_init(struct blkcg_gq *blkg) cfq_init_cfqg_base(cfqg); cfqg->weight = blkg->blkcg->cfq_weight; + cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; } /* @@ -1404,6 +1422,26 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, return 0; } +static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct cfq_group *cfqg = pd_to_cfqg(pd); + + if (!cfqg->dev_leaf_weight) + return 0; + return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); +} + +static int cfqg_print_leaf_weight_device(struct cgroup *cgrp, + struct cftype *cft, + struct seq_file *sf) +{ + blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), + cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0, + false); + return 0; +} + static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { @@ -1411,8 +1449,16 @@ static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, return 0; } -static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, - const char *buf) +static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + seq_printf(sf, "%u\n", + cgroup_to_blkcg(cgrp)->cfq_leaf_weight); + return 0; +} + +static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf, bool is_leaf_weight) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkg_conf_ctx ctx; @@ -1426,8 +1472,13 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, ret = -EINVAL; cfqg = blkg_to_cfqg(ctx.blkg); if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { - cfqg->dev_weight = ctx.v; - cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight; + if (!is_leaf_weight) { + cfqg->dev_weight = ctx.v; + cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; + } else { + cfqg->dev_leaf_weight = ctx.v; + cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; + } ret = 0; } @@ -1435,7 +1486,20 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, return ret; } -static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return __cfqg_set_weight_device(cgrp, cft, buf, false); +} + +static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return __cfqg_set_weight_device(cgrp, cft, buf, true); +} + +static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val, + bool is_leaf_weight) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkcg_gq *blkg; @@ -1445,19 +1509,41 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) return -EINVAL; spin_lock_irq(&blkcg->lock); - blkcg->cfq_weight = (unsigned int)val; + + if (!is_leaf_weight) + blkcg->cfq_weight = val; + else + blkcg->cfq_leaf_weight = val; hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { struct cfq_group *cfqg = blkg_to_cfqg(blkg); - if (cfqg && !cfqg->dev_weight) - cfqg->new_weight = blkcg->cfq_weight; + if (!cfqg) + continue; + + if (!is_leaf_weight) { + if (!cfqg->dev_weight) + cfqg->new_weight = blkcg->cfq_weight; + } else { + if (!cfqg->dev_leaf_weight) + cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; + } } spin_unlock_irq(&blkcg->lock); return 0; } +static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + return __cfq_set_weight(cgrp, cft, val, false); +} + +static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + return __cfq_set_weight(cgrp, cft, val, true); +} + static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { @@ -1518,6 +1604,37 @@ static struct cftype cfq_blkcg_files[] = { .read_seq_string = cfq_print_weight, .write_u64 = cfq_set_weight, }, + + /* on root, leaf_weight is mapped to weight */ + { + .name = "leaf_weight_device", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cfqg_print_weight_device, + .write_string = cfqg_set_weight_device, + .max_write_len = 256, + }, + { + .name = "leaf_weight", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cfq_print_weight, + .write_u64 = cfq_set_weight, + }, + + /* no such mapping necessary for !roots */ + { + .name = "leaf_weight_device", + .flags = CFTYPE_NOT_ON_ROOT, + .read_seq_string = cfqg_print_leaf_weight_device, + .write_string = cfqg_set_leaf_weight_device, + .max_write_len = 256, + }, + { + .name = "leaf_weight", + .flags = CFTYPE_NOT_ON_ROOT, + .read_seq_string = cfq_print_leaf_weight, + .write_u64 = cfq_set_leaf_weight, + }, + { .name = "time", .private = offsetof(struct cfq_group, stats.time), @@ -3992,6 +4109,7 @@ static int cfq_init_queue(struct request_queue *q) cfq_init_cfqg_base(cfqd->root_group); #endif cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; + cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; /* * Not strictly needed (since RB_ROOT just clears the node and we -- cgit v1.2.3 From b276a876a014c5fa58a16f247c0933f6c42112e3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:12 -0800 Subject: blkcg: add blkg_policy_data->plid Add pd->plid so that the policy a pd belongs to can be identified easily. This will be used to implement hierarchical blkg_[rw]stats. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- block/blk-cgroup.c | 2 ++ block/blk-cgroup.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'block/blk-cgroup.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 10e1df9da46e..3a8de321d1f6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -113,6 +113,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->pd[i] = pd; pd->blkg = blkg; + pd->plid = i; /* invoke per-policy init */ if (pol->pd_init_fn) @@ -908,6 +909,7 @@ int blkcg_activate_policy(struct request_queue *q, blkg->pd[pol->plid] = pd; pd->blkg = blkg; + pd->plid = pol->plid; pol->pd_init_fn(blkg); spin_unlock(&blkg->blkcg->lock); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 24462258200e..40f5b9768aac 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -81,8 +81,9 @@ struct blkg_rwstat { * beginning and pd_size can't be smaller than pd. */ struct blkg_policy_data { - /* the blkg this per-policy data belongs to */ + /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; + int plid; /* used during policy activation */ struct list_head alloc_node; -- cgit v1.2.3 From f427d909648aa592c9588d0f66b5b457752a0cd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:12 -0800 Subject: blkcg: implement blkcg_policy->on/offline_pd_fn() and blkcg_gq->online Add two blkcg_policy methods, ->online_pd_fn() and ->offline_pd_fn(), which are invoked as the policy_data gets activated and deactivated while holding both blkcg and q locks. Also, add blkcg_gq->online bool, which is set and cleared as the blkcg_gq gets activated and deactivated. This flag also is toggled while holding both blkcg and q locks. These will be used to implement hierarchical stats. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- block/blk-cgroup.c | 21 ++++++++++++++++++++- block/blk-cgroup.h | 7 +++++++ 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'block/blk-cgroup.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3a8de321d1f6..4d625d28e070 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -182,7 +182,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct blkcg_gq *new_blkg) { struct blkcg_gq *blkg; - int ret; + int i, ret; WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); @@ -218,7 +218,15 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); list_add(&blkg->q_node, &q->blkg_list); + + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_online_fn) + pol->pd_online_fn(blkg); + } } + blkg->online = true; spin_unlock(&blkcg->lock); if (!ret) @@ -291,6 +299,7 @@ EXPORT_SYMBOL_GPL(blkg_lookup_create); static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; + int i; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); @@ -299,6 +308,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_offline_fn) + pol->pd_offline_fn(blkg); + } + blkg->online = false; + radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); list_del_init(&blkg->q_node); hlist_del_init_rcu(&blkg->blkcg_node); @@ -956,6 +973,8 @@ void blkcg_deactivate_policy(struct request_queue *q, /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(&blkg->blkcg->lock); + if (pol->pd_offline_fn) + pol->pd_offline_fn(blkg); if (pol->pd_exit_fn) pol->pd_exit_fn(blkg); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 40f5b9768aac..678e89ed8ecb 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -106,12 +106,17 @@ struct blkcg_gq { /* reference count */ int refcnt; + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct rcu_head rcu_head; }; typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); @@ -124,6 +129,8 @@ struct blkcg_policy { /* operations */ blkcg_pol_init_pd_fn *pd_init_fn; + blkcg_pol_online_pd_fn *pd_online_fn; + blkcg_pol_offline_pd_fn *pd_offline_fn; blkcg_pol_exit_pd_fn *pd_exit_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; }; -- cgit v1.2.3 From 4d5e80a76074786a49879ff482a83e72ad634606 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:12 -0800 Subject: blkcg: s/blkg_rwstat_sum()/blkg_rwstat_total()/ Rename blkg_rwstat_sum() to blkg_rwstat_total(). sum will be used for summing up stats from multiple blkgs. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- block/blk-cgroup.h | 4 ++-- block/cfq-iosched.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'block/blk-cgroup.h') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 678e89ed8ecb..586c0ac3309a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -461,14 +461,14 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) } /** - * blkg_rwstat_sum - read the total count of a blkg_rwstat + * blkg_rwstat_total - read the total count of a blkg_rwstat * @rwstat: blkg_rwstat to read * * Return the total count of @rwstat regardless of the IO direction. This * function can be called without synchronization and takes care of u64 * atomicity. */ -static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat) +static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e8f31069b379..d43145cc0088 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -536,7 +536,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { struct cfqg_stats *stats = &cfqg->stats; - if (blkg_rwstat_sum(&stats->queued)) + if (blkg_rwstat_total(&stats->queued)) return; /* @@ -580,7 +580,7 @@ static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) struct cfqg_stats *stats = &cfqg->stats; blkg_stat_add(&stats->avg_queue_size_sum, - blkg_rwstat_sum(&stats->queued)); + blkg_rwstat_total(&stats->queued)); blkg_stat_add(&stats->avg_queue_size_samples, 1); cfqg_stats_update_group_wait_time(stats); } -- cgit v1.2.3 From 16b3de6652c7aef151f38726faf90f0dbc9e9c71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:12 -0800 Subject: blkcg: implement blkg_[rw]stat_recursive_sum() and blkg_[rw]stat_merge() Implement blkg_[rw]stat_recursive_sum() and blkg_[rw]stat_merge(). The former two collect the [rw]stats designated by the target policy data and offset from the pd's subtree. The latter two add one [rw]stat to another. Note that the recursive sum functions require the queue lock to be held on entry to make blkg online test reliable. This is necessary to properly handle stats of a dying blkg. These will be used to implement hierarchical stats. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- block/blk-cgroup.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++ block/blk-cgroup.h | 35 ++++++++++++++++++ 2 files changed, 142 insertions(+) (limited to 'block/blk-cgroup.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3aec4cdc8968..f9797b244eb3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -32,6 +32,26 @@ EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, + struct request_queue *q, bool update_hint); + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_cgrp: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip + * subtree. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ + cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ + if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ + (p_blkg)->q, false))) + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -127,6 +147,17 @@ err_free: return NULL; } +/** + * __blkg_lookup - internal version of blkg_lookup() + * @blkcg: blkcg of interest + * @q: request_queue of interest + * @update_hint: whether to update lookup hint with the result or not + * + * This is internal version and shouldn't be used by policy + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of + * @q's bypass state. If @update_hint is %true, the caller should be + * holding @q->queue_lock and lookup hint is updated on success. + */ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, bool update_hint) { @@ -585,6 +616,82 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, } EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); +/** + * blkg_stat_recursive_sum - collect hierarchical blkg_stat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_stat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup *pos_cgrp; + u64 sum; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + sum = blkg_stat_read((void *)pd + off); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_stat *stat = (void *)pos_pd + off; + + if (pos_blkg->online) + sum += blkg_stat_read(stat); + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); + +/** + * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_rwstat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup *pos_cgrp; + struct blkg_rwstat sum; + int i; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + sum = blkg_rwstat_read((void *)pd + off); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_rwstat *rwstat = (void *)pos_pd + off; + struct blkg_rwstat tmp; + + if (!pos_blkg->online) + continue; + + tmp = blkg_rwstat_read(rwstat); + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + sum.cnt[i] += tmp.cnt[i]; + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); + /** * blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 586c0ac3309a..f2b292925ccd 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -164,6 +164,10 @@ u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off); + struct blkg_conf_ctx { struct gendisk *disk; struct blkcg_gq *blkg; @@ -413,6 +417,18 @@ static inline void blkg_stat_reset(struct blkg_stat *stat) stat->cnt = 0; } +/** + * blkg_stat_merge - merge a blkg_stat into another + * @to: the destination blkg_stat + * @from: the source + * + * Add @from's count to @to. + */ +static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) +{ + blkg_stat_add(to, blkg_stat_read(from)); +} + /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat @@ -484,6 +500,25 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); } +/** + * blkg_rwstat_merge - merge a blkg_rwstat into another + * @to: the destination blkg_rwstat + * @from: the source + * + * Add @from's counts to @to. + */ +static inline void blkg_rwstat_merge(struct blkg_rwstat *to, + struct blkg_rwstat *from) +{ + struct blkg_rwstat v = blkg_rwstat_read(from); + int i; + + u64_stats_update_begin(&to->syncp); + for (i = 0; i < BLKG_RWSTAT_NR; i++) + to->cnt[i] += v.cnt[i]; + u64_stats_update_end(&to->syncp); +} + #else /* CONFIG_BLK_CGROUP */ struct cgroup; -- cgit v1.2.3