diff options
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r-- | drivers/md/bcache/alloc.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/closure.c | 17 | ||||
-rw-r--r-- | drivers/md/bcache/closure.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 33 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 16 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 19 | ||||
-rw-r--r-- | drivers/md/bcache/util.c | 50 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 25 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.h | 21 |
13 files changed, 121 insertions, 79 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ca4abe1ccd8d..cacbe2dbd5c3 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -68,6 +68,8 @@ #include <linux/random.h> #include <trace/events/bcache.h> +#define MAX_OPEN_BUCKETS 128 + /* Bucket heap / gen */ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) @@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c) spin_lock_init(&c->data_bucket_lock); - for (i = 0; i < 6; i++) { + for (i = 0; i < MAX_OPEN_BUCKETS; i++) { struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); if (!b) return -ENOMEM; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dee542fff68e..2ed9bd231d84 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 864e673aec39..1841d0359bac 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -64,27 +64,16 @@ EXPORT_SYMBOL(closure_put); void __closure_wake_up(struct closure_waitlist *wait_list) { struct llist_node *list; - struct closure *cl; + struct closure *cl, *t; struct llist_node *reverse = NULL; list = llist_del_all(&wait_list->list); /* We first reverse the list to preserve FIFO ordering and fairness */ - - while (list) { - struct llist_node *t = list; - list = llist_next(list); - - t->next = reverse; - reverse = t; - } + reverse = llist_reverse_order(list); /* Then do the wakeups */ - - while (reverse) { - cl = container_of(reverse, struct closure, list); - reverse = llist_next(reverse); - + llist_for_each_entry_safe(cl, t, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 1ec84ca81146..295b7e43f92c 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) * been dropped with closure_put()), it will resume execution at @fn running out * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). * - * NOTE: This macro expands to a return in the calling function! - * * This is because after calling continue_at() you no longer have a ref on @cl, * and whatever @cl owns may be freed out from under you - a running closure fn * has a ref on its own closure which continue_at() drops. @@ -340,8 +338,6 @@ do { \ * Causes @fn to be executed out of @cl, in @wq context (or called directly if * @wq is NULL). * - * NOTE: like continue_at(), this macro expands to a return in the caller! - * * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, * thus it's not safe to touch anything protected by @cl after a * continue_at_nobarrier(). diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 35a5a7210e51..61076eda2e6d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -49,7 +49,7 @@ void bch_btree_verify(struct btree *b) v->keys.ops = b->keys.ops; bio = bch_bbio_alloc(b->c); - bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; + bio_set_dev(bio, PTR_CACHE(b->c, &b->key, 0)->bdev); bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9; bio->bi_opf = REQ_OP_READ | REQ_META; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 6a9b85095e7b..7e871bdc0097 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -34,7 +34,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) struct bbio *b = container_of(bio, struct bbio, bio); bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); - bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; + bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); b->submit_time_us = local_clock_us(); closure_bio_submit(bio, bio->bi_private); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0352d05e495c..7e1d1c3ba33a 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -53,7 +53,7 @@ reread: left = ca->sb.bucket_size - offset; bio_reset(bio); bio->bi_iter.bi_sector = bucket + offset; - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = len << 9; bio->bi_end_io = journal_read_endio; @@ -452,7 +452,7 @@ static void do_journal_discard(struct cache *ca) bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); bio->bi_iter.bi_sector = bucket_to_sector(ca->set, ca->sb.d[ja->discard_idx]); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; @@ -623,7 +623,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 019b3df9f1c6..681b4f12b05a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. @@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (!congested && !dc->sequential_cutoff) goto rescale; - if (!congested && - mode == CACHE_MODE_WRITEBACK && - op_is_write(bio->bi_opf) && - op_is_sync(bio->bi_opf)) - goto rescale; - spin_lock(&dc->io_lock); hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) @@ -607,7 +601,8 @@ static void request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - generic_end_io_acct(bio_data_dir(s->orig_bio), + struct request_queue *q = s->orig_bio->bi_disk->queue; + generic_end_io_acct(q, bio_data_dir(s->orig_bio), &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); @@ -734,7 +729,7 @@ static void cached_dev_read_done(struct closure *cl) if (s->iop.bio) { bio_reset(s->iop.bio); s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector; - s->iop.bio->bi_bdev = s->cache_miss->bi_bdev; + bio_copy_dev(s->iop.bio, s->cache_miss); s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; bch_bio_map(s->iop.bio, NULL); @@ -793,7 +788,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, !(bio->bi_opf & REQ_META) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, - bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); + get_capacity(bio->bi_disk) - bio_end_sector(bio)); s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); @@ -819,7 +814,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_submit; cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector; - cache_bio->bi_bdev = miss->bi_bdev; + bio_copy_dev(cache_bio, miss); cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; cache_bio->bi_end_io = request_endio; @@ -918,7 +913,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, dc->disk.bio_split); - flush->bi_bdev = bio->bi_bdev; + bio_copy_dev(flush, bio); flush->bi_end_io = request_endio; flush->bi_private = cl; flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; @@ -955,13 +950,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) { struct search *s; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct bcache_device *d = bio->bi_disk->private_data; struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); - generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); - bio->bi_bdev = dc->bdev; + bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { @@ -1071,10 +1066,10 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, { struct search *s; struct closure *cl; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct bcache_device *d = bio->bi_disk->private_data; int rw = bio_data_dir(bio); - generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); cl = &s->cl; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8352fad765f6..fc0a31b13ac4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -257,7 +257,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_init(cl, parent); bio_reset(bio); - bio->bi_bdev = dc->bdev; + bio_set_dev(bio, dc->bdev); bio->bi_end_io = write_bdev_super_endio; bio->bi_private = dc; @@ -303,7 +303,7 @@ void bcache_write_super(struct cache_set *c) SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); bio_reset(bio); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_end_io = write_super_endio; bio->bi_private = ca; @@ -508,7 +508,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, closure_init_stack(cl); bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size; - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = prio_endio; @@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); @@ -1228,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); @@ -1374,9 +1377,6 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; - if (!c) - closure_return(cl); - bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); @@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index f90f13616980..104c57cd666c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -192,7 +192,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -227,7 +227,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; @@ -615,8 +615,21 @@ STORE(__bch_cache_set) bch_cache_accounting_clear(&c->accounting); } - if (attr == &sysfs_trigger_gc) + if (attr == &sysfs_trigger_gc) { + /* + * Garbage collection thread only works when sectors_to_gc < 0, + * when users write to sysfs entry trigger_gc, most of time + * they want to forcibly triger gargage collection. Here -1 is + * set to c->sectors_to_gc, to make gc_should_run() give a + * chance to permit gc thread to run. "give a chance" means + * before going into gc_should_run(), there is still chance + * that c->sectors_to_gc being set to other positive value. So + * writing sysfs entry trigger_gc won't always make sure gc + * thread takes effect. + */ + atomic_set(&c->sectors_to_gc, -1); wake_up_gc(c); + } if (attr == &sysfs_prune_cache) { struct shrink_control sc; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 8c3a938f4bf0..176d3c2ef5f5 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; - - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } - - if (!u) - return sprintf(buf, "%llu", v); - - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); - - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + int u = 0, t; + + uint64_t q; + + if (v < 0) + q = -v; + else + q = v; + + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; + + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 42c66e76f05e..e663ca082183 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -21,7 +21,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); @@ -181,12 +182,12 @@ static void write_dirty(struct closure *cl) dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); io->bio.bi_iter.bi_sector = KEY_START(&w->key); - io->bio.bi_bdev = io->dc->bdev; + bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -250,8 +251,7 @@ static void read_dirty(struct cached_dev *dc) dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); - io->bio.bi_bdev = PTR_CACHE(dc->disk.c, - &w->key, 0)->bdev; + bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); io->bio.bi_end_io = read_dirty_endio; if (bio_alloc_pages(&io->bio, GFP_KERNEL)) @@ -482,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -516,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 629bd1a502fd..e35421d20d2e 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { @@ -84,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); |