diff options
Diffstat (limited to 'block/blk-stat.c')
-rw-r--r-- | block/blk-stat.c | 311 |
1 files changed, 147 insertions, 164 deletions
diff --git a/block/blk-stat.c b/block/blk-stat.c index 4681c488c262..0d8721a60db9 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -4,6 +4,7 @@ * Copyright (C) 2016 Jens Axboe */ #include <linux/kernel.h> +#include <linux/rculist.h> #include <linux/blk-mq.h> #include "blk-stat.h" @@ -11,6 +12,24 @@ #define BLK_RQ_STAT_BATCH 64 +struct blk_queue_stats { + struct list_head callbacks; + spinlock_t lock; +}; + +unsigned int blk_stat_rq_ddir(const struct request *rq) +{ + return rq_data_dir(rq); +} +EXPORT_SYMBOL_GPL(blk_stat_rq_ddir); + +static void blk_stat_init(struct blk_rq_stat *stat) +{ + stat->min = -1ULL; + stat->max = stat->nr_samples = stat->mean = 0; + stat->batch = stat->nr_batch = 0; +} + static void blk_stat_flush_batch(struct blk_rq_stat *stat) { const s32 nr_batch = READ_ONCE(stat->nr_batch); @@ -50,207 +69,171 @@ static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) dst->nr_samples += src->nr_samples; } -static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst) +static void __blk_stat_add(struct blk_rq_stat *stat, u64 value) { - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - uint64_t latest = 0; - int i, j, nr; - - blk_stat_init(&dst[READ]); - blk_stat_init(&dst[WRITE]); - - nr = 0; - do { - uint64_t newest = 0; - - queue_for_each_hw_ctx(q, hctx, i) { - hctx_for_each_ctx(hctx, ctx, j) { - blk_stat_flush_batch(&ctx->stat[READ]); - blk_stat_flush_batch(&ctx->stat[WRITE]); - - if (!ctx->stat[READ].nr_samples && - !ctx->stat[WRITE].nr_samples) - continue; - if (ctx->stat[READ].time > newest) - newest = ctx->stat[READ].time; - if (ctx->stat[WRITE].time > newest) - newest = ctx->stat[WRITE].time; - } - } + stat->min = min(stat->min, value); + stat->max = max(stat->max, value); - /* - * No samples - */ - if (!newest) - break; - - if (newest > latest) - latest = newest; - - queue_for_each_hw_ctx(q, hctx, i) { - hctx_for_each_ctx(hctx, ctx, j) { - if (ctx->stat[READ].time == newest) { - blk_stat_sum(&dst[READ], - &ctx->stat[READ]); - nr++; - } - if (ctx->stat[WRITE].time == newest) { - blk_stat_sum(&dst[WRITE], - &ctx->stat[WRITE]); - nr++; - } - } - } - /* - * If we race on finding an entry, just loop back again. - * Should be very rare. - */ - } while (!nr); + if (stat->batch + value < stat->batch || + stat->nr_batch + 1 == BLK_RQ_STAT_BATCH) + blk_stat_flush_batch(stat); - dst[READ].time = dst[WRITE].time = latest; + stat->batch += value; + stat->nr_batch++; } -void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst) +void blk_stat_add(struct request *rq) { - if (q->mq_ops) - blk_mq_stat_get(q, dst); - else { - blk_stat_flush_batch(&q->rq_stats[READ]); - blk_stat_flush_batch(&q->rq_stats[WRITE]); - memcpy(&dst[READ], &q->rq_stats[READ], - sizeof(struct blk_rq_stat)); - memcpy(&dst[WRITE], &q->rq_stats[WRITE], - sizeof(struct blk_rq_stat)); + struct request_queue *q = rq->q; + struct blk_stat_callback *cb; + struct blk_rq_stat *stat; + int bucket; + s64 now, value; + + now = __blk_stat_time(ktime_to_ns(ktime_get())); + if (now < blk_stat_time(&rq->issue_stat)) + return; + + value = now - blk_stat_time(&rq->issue_stat); + + rcu_read_lock(); + list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { + if (blk_stat_is_active(cb)) { + bucket = cb->bucket_fn(rq); + stat = &this_cpu_ptr(cb->cpu_stat)[bucket]; + __blk_stat_add(stat, value); + } } + rcu_read_unlock(); } -void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst) +static void blk_stat_timer_fn(unsigned long data) { - struct blk_mq_ctx *ctx; - unsigned int i, nr; + struct blk_stat_callback *cb = (void *)data; + unsigned int bucket; + int cpu; - nr = 0; - do { - uint64_t newest = 0; + for (bucket = 0; bucket < cb->buckets; bucket++) + blk_stat_init(&cb->stat[bucket]); - hctx_for_each_ctx(hctx, ctx, i) { - blk_stat_flush_batch(&ctx->stat[READ]); - blk_stat_flush_batch(&ctx->stat[WRITE]); + for_each_online_cpu(cpu) { + struct blk_rq_stat *cpu_stat; - if (!ctx->stat[READ].nr_samples && - !ctx->stat[WRITE].nr_samples) - continue; - - if (ctx->stat[READ].time > newest) - newest = ctx->stat[READ].time; - if (ctx->stat[WRITE].time > newest) - newest = ctx->stat[WRITE].time; + cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu); + for (bucket = 0; bucket < cb->buckets; bucket++) { + blk_stat_sum(&cb->stat[bucket], &cpu_stat[bucket]); + blk_stat_init(&cpu_stat[bucket]); } + } - if (!newest) - break; - - hctx_for_each_ctx(hctx, ctx, i) { - if (ctx->stat[READ].time == newest) { - blk_stat_sum(&dst[READ], &ctx->stat[READ]); - nr++; - } - if (ctx->stat[WRITE].time == newest) { - blk_stat_sum(&dst[WRITE], &ctx->stat[WRITE]); - nr++; - } - } - /* - * If we race on finding an entry, just loop back again. - * Should be very rare, as the window is only updated - * occasionally - */ - } while (!nr); + cb->timer_fn(cb); } -static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now) +struct blk_stat_callback * +blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *), + unsigned int (*bucket_fn)(const struct request *), + unsigned int buckets, void *data) { - stat->min = -1ULL; - stat->max = stat->nr_samples = stat->mean = 0; - stat->batch = stat->nr_batch = 0; - stat->time = time_now & BLK_STAT_NSEC_MASK; -} + struct blk_stat_callback *cb; -void blk_stat_init(struct blk_rq_stat *stat) -{ - __blk_stat_init(stat, ktime_to_ns(ktime_get())); -} + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return NULL; -static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now) -{ - return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK); -} + cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat), + GFP_KERNEL); + if (!cb->stat) { + kfree(cb); + return NULL; + } + cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat), + __alignof__(struct blk_rq_stat)); + if (!cb->cpu_stat) { + kfree(cb->stat); + kfree(cb); + return NULL; + } -bool blk_stat_is_current(struct blk_rq_stat *stat) -{ - return __blk_stat_is_current(stat, ktime_to_ns(ktime_get())); + cb->timer_fn = timer_fn; + cb->bucket_fn = bucket_fn; + cb->data = data; + cb->buckets = buckets; + setup_timer(&cb->timer, blk_stat_timer_fn, (unsigned long)cb); + + return cb; } +EXPORT_SYMBOL_GPL(blk_stat_alloc_callback); -void blk_stat_add(struct blk_rq_stat *stat, struct request *rq) +void blk_stat_add_callback(struct request_queue *q, + struct blk_stat_callback *cb) { - s64 now, value; + unsigned int bucket; + int cpu; - now = __blk_stat_time(ktime_to_ns(ktime_get())); - if (now < blk_stat_time(&rq->issue_stat)) - return; + for_each_possible_cpu(cpu) { + struct blk_rq_stat *cpu_stat; - if (!__blk_stat_is_current(stat, now)) - __blk_stat_init(stat, now); + cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu); + for (bucket = 0; bucket < cb->buckets; bucket++) + blk_stat_init(&cpu_stat[bucket]); + } - value = now - blk_stat_time(&rq->issue_stat); - if (value > stat->max) - stat->max = value; - if (value < stat->min) - stat->min = value; + spin_lock(&q->stats->lock); + list_add_tail_rcu(&cb->list, &q->stats->callbacks); + set_bit(QUEUE_FLAG_STATS, &q->queue_flags); + spin_unlock(&q->stats->lock); +} +EXPORT_SYMBOL_GPL(blk_stat_add_callback); - if (stat->batch + value < stat->batch || - stat->nr_batch + 1 == BLK_RQ_STAT_BATCH) - blk_stat_flush_batch(stat); +void blk_stat_remove_callback(struct request_queue *q, + struct blk_stat_callback *cb) +{ + spin_lock(&q->stats->lock); + list_del_rcu(&cb->list); + if (list_empty(&q->stats->callbacks)) + clear_bit(QUEUE_FLAG_STATS, &q->queue_flags); + spin_unlock(&q->stats->lock); - stat->batch += value; - stat->nr_batch++; + del_timer_sync(&cb->timer); } +EXPORT_SYMBOL_GPL(blk_stat_remove_callback); -void blk_stat_clear(struct request_queue *q) +static void blk_stat_free_callback_rcu(struct rcu_head *head) { - if (q->mq_ops) { - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; - - queue_for_each_hw_ctx(q, hctx, i) { - hctx_for_each_ctx(hctx, ctx, j) { - blk_stat_init(&ctx->stat[READ]); - blk_stat_init(&ctx->stat[WRITE]); - } - } - } else { - blk_stat_init(&q->rq_stats[READ]); - blk_stat_init(&q->rq_stats[WRITE]); - } + struct blk_stat_callback *cb; + + cb = container_of(head, struct blk_stat_callback, rcu); + free_percpu(cb->cpu_stat); + kfree(cb->stat); + kfree(cb); } -void blk_stat_set_issue_time(struct blk_issue_stat *stat) +void blk_stat_free_callback(struct blk_stat_callback *cb) { - stat->time = (stat->time & BLK_STAT_MASK) | - (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK); + call_rcu(&cb->rcu, blk_stat_free_callback_rcu); } +EXPORT_SYMBOL_GPL(blk_stat_free_callback); -/* - * Enable stat tracking, return whether it was enabled - */ -bool blk_stat_enable(struct request_queue *q) +struct blk_queue_stats *blk_alloc_queue_stats(void) { - if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { - set_bit(QUEUE_FLAG_STATS, &q->queue_flags); - return false; - } + struct blk_queue_stats *stats; + + stats = kmalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) + return NULL; + + INIT_LIST_HEAD(&stats->callbacks); + spin_lock_init(&stats->lock); + + return stats; +} + +void blk_free_queue_stats(struct blk_queue_stats *stats) +{ + if (!stats) + return; + + WARN_ON(!list_empty(&stats->callbacks)); - return true; + kfree(stats); } |