diff options
Diffstat (limited to 'drivers/md/bcache')
28 files changed, 1076 insertions, 797 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 17bf109c58e9..f6e0a8b3a61e 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -1,7 +1,8 @@ config BCACHE tristate "Block device as cache" - ---help--- + select CRC64 + help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. @@ -10,7 +11,7 @@ config BCACHE config BCACHE_DEBUG bool "Bcache debugging" depends on BCACHE - ---help--- + help Don't select this option unless you're a developer Enables extra debugging tools, allows expensive runtime checks to be @@ -20,7 +21,7 @@ config BCACHE_CLOSURES_DEBUG bool "Debug closures" depends on BCACHE select DEBUG_FS - ---help--- + help Keeps all active closures in a linked list and provides a debugfs interface to list them, which makes it possible to see asynchronous operations that get stuck. diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 7fa2631b422c..7a28232d868b 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -87,8 +87,8 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) { struct cache *ca; struct bucket *b; - unsigned next = c->nbuckets * c->sb.bucket_size / 1024; - unsigned i; + unsigned int next = c->nbuckets * c->sb.bucket_size / 1024; + unsigned int i; int r; atomic_sub(sectors, &c->rescale); @@ -169,7 +169,7 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) #define bucket_prio(b) \ ({ \ - unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ \ (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ }) @@ -244,6 +244,7 @@ static void invalidate_buckets_random(struct cache *ca) while (!fifo_full(&ca->free_inc)) { size_t n; + get_random_bytes(&n, sizeof(n)); n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket); @@ -301,7 +302,7 @@ do { \ static int bch_allocator_push(struct cache *ca, long bucket) { - unsigned i; + unsigned int i; /* Prios/gens are actually the most important reserve */ if (fifo_push(&ca->free[RESERVE_PRIO], bucket)) @@ -385,7 +386,7 @@ out: /* Allocation */ -long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) +long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait) { DEFINE_WAIT(w); struct bucket *b; @@ -421,7 +422,7 @@ out: if (expensive_debug_checks(ca->set)) { size_t iter; long i; - unsigned j; + unsigned int j; for (iter = 0; iter < prio_buckets(ca) * 2; iter++) BUG_ON(ca->prio_buckets[iter] == (uint64_t) r); @@ -470,14 +471,14 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b) void bch_bucket_free(struct cache_set *c, struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) __bch_bucket_free(PTR_CACHE(c, k, i), PTR_BUCKET(c, k, i)); } -int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, +int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, struct bkey *k, int n, bool wait) { int i; @@ -510,10 +511,11 @@ err: return -1; } -int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, +int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, struct bkey *k, int n, bool wait) { int ret; + mutex_lock(&c->bucket_lock); ret = __bch_bucket_alloc_set(c, reserve, k, n, wait); mutex_unlock(&c->bucket_lock); @@ -524,8 +526,8 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, struct open_bucket { struct list_head list; - unsigned last_write_point; - unsigned sectors_free; + unsigned int last_write_point; + unsigned int sectors_free; BKEY_PADDED(key); }; @@ -556,7 +558,7 @@ struct open_bucket { */ static struct open_bucket *pick_data_bucket(struct cache_set *c, const struct bkey *search, - unsigned write_point, + unsigned int write_point, struct bkey *alloc) { struct open_bucket *ret, *ret_task = NULL; @@ -595,12 +597,16 @@ found: * * If s->writeback is true, will not fail. */ -bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, - unsigned write_point, unsigned write_prio, bool wait) +bool bch_alloc_sectors(struct cache_set *c, + struct bkey *k, + unsigned int sectors, + unsigned int write_point, + unsigned int write_prio, + bool wait) { struct open_bucket *b; BKEY_PADDED(key) alloc; - unsigned i; + unsigned int i; /* * We might have to allocate a new bucket, which we can't do with a @@ -613,7 +619,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, spin_lock(&c->data_bucket_lock); while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) { - unsigned watermark = write_prio + unsigned int watermark = write_prio ? RESERVE_MOVINGGC : RESERVE_NONE; @@ -702,6 +708,7 @@ int bch_open_buckets_alloc(struct cache_set *c) for (i = 0; i < MAX_OPEN_BUCKETS; i++) { struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) return -ENOMEM; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d6bf294f3907..83504dd8100a 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -252,7 +252,7 @@ struct bcache_device { struct kobject kobj; struct cache_set *c; - unsigned id; + unsigned int id; #define BCACHEDEVNAME_SIZE 12 char name[BCACHEDEVNAME_SIZE]; @@ -264,18 +264,19 @@ struct bcache_device { #define BCACHE_DEV_UNLINK_DONE 2 #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 - unsigned nr_stripes; - unsigned stripe_size; + unsigned int nr_stripes; + unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; struct bio_set bio_split; - unsigned data_csum:1; + unsigned int data_csum:1; - int (*cache_miss)(struct btree *, struct search *, - struct bio *, unsigned); - int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long); + int (*cache_miss)(struct btree *b, struct search *s, + struct bio *bio, unsigned int sectors); + int (*ioctl)(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg); }; struct io { @@ -284,7 +285,7 @@ struct io { struct list_head lru; unsigned long jiffies; - unsigned sequential; + unsigned int sequential; sector_t last; }; @@ -328,13 +329,6 @@ struct cached_dev { */ atomic_t has_dirty; - /* - * Set to zero by things that touch the backing volume-- except - * writeback. Incremented by writeback. Used to determine when to - * accelerate idle writeback. - */ - atomic_t backing_idle; - struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -365,18 +359,18 @@ struct cached_dev { struct cache_accounting accounting; /* The rest of this all shows up in sysfs */ - unsigned sequential_cutoff; - unsigned readahead; + unsigned int sequential_cutoff; + unsigned int readahead; - unsigned io_disable:1; - unsigned verify:1; - unsigned bypass_torture_test:1; + unsigned int io_disable:1; + unsigned int verify:1; + unsigned int bypass_torture_test:1; - unsigned partial_stripes_expensive:1; - unsigned writeback_metadata:1; - unsigned writeback_running:1; + unsigned int partial_stripes_expensive:1; + unsigned int writeback_metadata:1; + unsigned int writeback_running:1; unsigned char writeback_percent; - unsigned writeback_delay; + unsigned int writeback_delay; uint64_t writeback_rate_target; int64_t writeback_rate_proportional; @@ -384,16 +378,16 @@ struct cached_dev { int64_t writeback_rate_integral_scaled; int32_t writeback_rate_change; - unsigned writeback_rate_update_seconds; - unsigned writeback_rate_i_term_inverse; - unsigned writeback_rate_p_term_inverse; - unsigned writeback_rate_minimum; + unsigned int writeback_rate_update_seconds; + unsigned int writeback_rate_i_term_inverse; + unsigned int writeback_rate_p_term_inverse; + unsigned int writeback_rate_minimum; enum stop_on_failure stop_when_cache_set_failed; #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 atomic_t io_errors; - unsigned error_limit; - unsigned offline_seconds; + unsigned int error_limit; + unsigned int offline_seconds; char backing_dev_name[BDEVNAME_SIZE]; }; @@ -423,9 +417,9 @@ struct cache { /* * When allocating new buckets, prio_write() gets first dibs - since we * may not be allocate at all without writing priorities and gens. - * prio_buckets[] contains the last buckets we wrote priorities to (so - * gc can mark them as metadata), prio_next[] contains the buckets - * allocated for the next prio write. + * prio_last_buckets[] contains the last buckets we wrote priorities to + * (so gc can mark them as metadata), prio_buckets[] contains the + * buckets allocated for the next prio write. */ uint64_t *prio_buckets; uint64_t *prio_last_buckets; @@ -454,7 +448,7 @@ struct cache { * until a gc finishes - otherwise we could pointlessly burn a ton of * cpu */ - unsigned invalidate_needs_gc; + unsigned int invalidate_needs_gc; bool discard; /* Get rid of? */ @@ -474,11 +468,12 @@ struct cache { struct gc_stat { size_t nodes; + size_t nodes_pre; size_t key_bytes; size_t nkeys; uint64_t data; /* sectors */ - unsigned in_use; /* percent */ + unsigned int in_use; /* percent */ }; /* @@ -514,6 +509,8 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; + atomic_t idle_counter; + atomic_t at_max_writeback_rate; struct cache_sb sb; @@ -522,9 +519,11 @@ struct cache_set { int caches_loaded; struct bcache_device **devices; - unsigned devices_max_used; + unsigned int devices_max_used; + atomic_t attached_dev_nr; struct list_head cached_devs; uint64_t cached_dev_sectors; + atomic_long_t flash_dev_dirty_sectors; struct closure caching; struct closure sb_write; @@ -550,7 +549,7 @@ struct cache_set { * Default number of pages for a new btree node - may be less than a * full bucket */ - unsigned btree_pages; + unsigned int btree_pages; /* * Lists of struct btrees; lru is the list for structs that have memory @@ -573,7 +572,7 @@ struct cache_set { struct list_head btree_cache_freed; /* Number of elements in btree_cache + btree_cache_freeable lists */ - unsigned btree_cache_used; + unsigned int btree_cache_used; /* * If we need to allocate memory for a new btree node and that @@ -603,6 +602,10 @@ struct cache_set { */ atomic_t rescale; /* + * used for GC, identify if any front side I/Os is inflight + */ + atomic_t search_inflight; + /* * When we invalidate buckets, we use both the priority and the amount * of good data to determine which buckets to reuse first - to weight * those together consistently we keep track of the smallest nonzero @@ -611,8 +614,8 @@ struct cache_set { uint16_t min_prio; /* - * max(gen - last_gc) for all buckets. When it gets too big we have to gc - * to keep gens from wrapping around. + * max(gen - last_gc) for all buckets. When it gets too big we have to + * gc to keep gens from wrapping around. */ uint8_t need_gc; struct gc_stat gc_stats; @@ -647,7 +650,7 @@ struct cache_set { struct mutex verify_lock; #endif - unsigned nr_uuids; + unsigned int nr_uuids; struct uuid_entry *uuids; BKEY_PADDED(uuid_bucket); struct closure uuid_write; @@ -668,12 +671,12 @@ struct cache_set { struct journal journal; #define CONGESTED_MAX 1024 - unsigned congested_last_us; + unsigned int congested_last_us; atomic_t congested; /* The rest of this all shows up in sysfs */ - unsigned congested_read_threshold_us; - unsigned congested_write_threshold_us; + unsigned int congested_read_threshold_us; + unsigned int congested_write_threshold_us; struct time_stats btree_gc_time; struct time_stats btree_split_time; @@ -692,16 +695,16 @@ struct cache_set { ON_ERROR_PANIC, } on_error; #define DEFAULT_IO_ERROR_LIMIT 8 - unsigned error_limit; - unsigned error_decay; + unsigned int error_limit; + unsigned int error_decay; unsigned short journal_delay_ms; bool expensive_debug_checks; - unsigned verify:1; - unsigned key_merging_disabled:1; - unsigned gc_always_rewrite:1; - unsigned shrinker_disabled:1; - unsigned copy_gc_enabled:1; + unsigned int verify:1; + unsigned int key_merging_disabled:1; + unsigned int gc_always_rewrite:1; + unsigned int shrinker_disabled:1; + unsigned int copy_gc_enabled:1; #define BUCKET_HASH_BITS 12 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; @@ -710,7 +713,7 @@ struct cache_set { }; struct bbio { - unsigned submit_time_us; + unsigned int submit_time_us; union { struct bkey key; uint64_t _pad[3]; @@ -727,10 +730,10 @@ struct bbio { #define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE) #define btree_blocks(b) \ - ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits)) + ((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits)) #define btree_default_blocks(c) \ - ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) + ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) #define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) #define bucket_bytes(c) ((c)->sb.bucket_size << 9) @@ -759,21 +762,21 @@ static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) static inline struct cache *PTR_CACHE(struct cache_set *c, const struct bkey *k, - unsigned ptr) + unsigned int ptr) { return c->cache[PTR_DEV(k, ptr)]; } static inline size_t PTR_BUCKET_NR(struct cache_set *c, const struct bkey *k, - unsigned ptr) + unsigned int ptr) { return sector_to_bucket(c, PTR_OFFSET(k, ptr)); } static inline struct bucket *PTR_BUCKET(struct cache_set *c, const struct bkey *k, - unsigned ptr) + unsigned int ptr) { return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr); } @@ -781,17 +784,18 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, static inline uint8_t gen_after(uint8_t a, uint8_t b) { uint8_t r = a - b; + return r > 128U ? 0 : r; } static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k, - unsigned i) + unsigned int i) { return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i)); } static inline bool ptr_available(struct cache_set *c, const struct bkey *k, - unsigned i) + unsigned int i) { return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i); } @@ -877,16 +881,16 @@ static inline uint8_t bucket_gc_gen(struct bucket *b) #define BUCKET_GC_GEN_MAX 96U #define kobj_attribute_write(n, fn) \ - static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) + static struct kobj_attribute ksysfs_##n = __ATTR(n, 0200, NULL, fn) #define kobj_attribute_rw(n, show, store) \ static struct kobj_attribute ksysfs_##n = \ - __ATTR(n, S_IWUSR|S_IRUSR, show, store) + __ATTR(n, 0600, show, store) static inline void wake_up_allocators(struct cache_set *c) { struct cache *ca; - unsigned i; + unsigned int i; for_each_cache(ca, c, i) wake_up_process(ca->alloc_thread); @@ -922,40 +926,43 @@ static inline void wait_for_kthread_stop(void) /* Forward declarations */ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio); -void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); -void bch_bbio_count_io_errors(struct cache_set *, struct bio *, - blk_status_t, const char *); -void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t, - const char *); -void bch_bbio_free(struct bio *, struct cache_set *); -struct bio *bch_bbio_alloc(struct cache_set *); - -void __bch_submit_bbio(struct bio *, struct cache_set *); -void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); - -uint8_t bch_inc_gen(struct cache *, struct bucket *); -void bch_rescale_priorities(struct cache_set *, int); - -bool bch_can_invalidate_bucket(struct cache *, struct bucket *); -void __bch_invalidate_one_bucket(struct cache *, struct bucket *); - -void __bch_bucket_free(struct cache *, struct bucket *); -void bch_bucket_free(struct cache_set *, struct bkey *); - -long bch_bucket_alloc(struct cache *, unsigned, bool); -int __bch_bucket_alloc_set(struct cache_set *, unsigned, - struct bkey *, int, bool); -int bch_bucket_alloc_set(struct cache_set *, unsigned, - struct bkey *, int, bool); -bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned, - unsigned, unsigned, bool); +void bch_count_io_errors(struct cache *ca, blk_status_t error, + int is_read, const char *m); +void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, + blk_status_t error, const char *m); +void bch_bbio_endio(struct cache_set *c, struct bio *bio, + blk_status_t error, const char *m); +void bch_bbio_free(struct bio *bio, struct cache_set *c); +struct bio *bch_bbio_alloc(struct cache_set *c); + +void __bch_submit_bbio(struct bio *bio, struct cache_set *c); +void bch_submit_bbio(struct bio *bio, struct cache_set *c, + struct bkey *k, unsigned int ptr); + +uint8_t bch_inc_gen(struct cache *ca, struct bucket *b); +void bch_rescale_priorities(struct cache_set *c, int sectors); + +bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b); +void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b); + +void __bch_bucket_free(struct cache *ca, struct bucket *b); +void bch_bucket_free(struct cache_set *c, struct bkey *k); + +long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait); +int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, + struct bkey *k, int n, bool wait); +int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, + struct bkey *k, int n, bool wait); +bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, + unsigned int sectors, unsigned int write_point, + unsigned int write_prio, bool wait); bool bch_cached_dev_error(struct cached_dev *dc); __printf(2, 3) -bool bch_cache_set_error(struct cache_set *, const char *, ...); +bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...); -void bch_prio_write(struct cache *); -void bch_write_bdev_super(struct cached_dev *, struct closure *); +void bch_prio_write(struct cache *ca); +void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent); extern struct workqueue_struct *bcache_wq; extern struct mutex bch_register_lock; @@ -967,35 +974,36 @@ extern struct kobj_type bch_cache_set_ktype; extern struct kobj_type bch_cache_set_internal_ktype; extern struct kobj_type bch_cache_ktype; -void bch_cached_dev_release(struct kobject *); -void bch_flash_dev_release(struct kobject *); -void bch_cache_set_release(struct kobject *); -void bch_cache_release(struct kobject *); +void bch_cached_dev_release(struct kobject *kobj); +void bch_flash_dev_release(struct kobject *kobj); +void bch_cache_set_release(struct kobject *kobj); +void bch_cache_release(struct kobject *kobj); -int bch_uuid_write(struct cache_set *); -void bcache_write_super(struct cache_set *); +int bch_uuid_write(struct cache_set *c); +void bcache_write_super(struct cache_set *c); int bch_flash_dev_create(struct cache_set *c, uint64_t size); -int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *); -void bch_cached_dev_detach(struct cached_dev *); -void bch_cached_dev_run(struct cached_dev *); -void bcache_device_stop(struct bcache_device *); +int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + uint8_t *set_uuid); +void bch_cached_dev_detach(struct cached_dev *dc); +void bch_cached_dev_run(struct cached_dev *dc); +void bcache_device_stop(struct bcache_device *d); -void bch_cache_set_unregister(struct cache_set *); -void bch_cache_set_stop(struct cache_set *); +void bch_cache_set_unregister(struct cache_set *c); +void bch_cache_set_stop(struct cache_set *c); -struct cache_set *bch_cache_set_alloc(struct cache_sb *); -void bch_btree_cache_free(struct cache_set *); -int bch_btree_cache_alloc(struct cache_set *); -void bch_moving_init_cache_set(struct cache_set *); -int bch_open_buckets_alloc(struct cache_set *); -void bch_open_buckets_free(struct cache_set *); +struct cache_set *bch_cache_set_alloc(struct cache_sb *sb); +void bch_btree_cache_free(struct cache_set *c); +int bch_btree_cache_alloc(struct cache_set *c); +void bch_moving_init_cache_set(struct cache_set *c); +int bch_open_buckets_alloc(struct cache_set *c); +void bch_open_buckets_free(struct cache_set *c); int bch_cache_allocator_start(struct cache *ca); void bch_debug_exit(void); -int bch_debug_init(struct kobject *); +void bch_debug_init(struct kobject *kobj); void bch_request_exit(void); int bch_request_init(void); diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index f3403b45bc28..8f07fa6e1739 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -18,31 +18,31 @@ #ifdef CONFIG_BCACHE_DEBUG -void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) +void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set) { struct bkey *k, *next; for (k = i->start; k < bset_bkey_last(i); k = next) { next = bkey_next(k); - printk(KERN_ERR "block %u key %u/%u: ", set, - (unsigned) ((u64 *) k - i->d), i->keys); + pr_err("block %u key %u/%u: ", set, + (unsigned int) ((u64 *) k - i->d), i->keys); if (b->ops->key_dump) b->ops->key_dump(b, k); else - printk("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); + pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); if (next < bset_bkey_last(i) && bkey_cmp(k, b->ops->is_extents ? &START_KEY(next) : next) > 0) - printk(KERN_ERR "Key skipped backwards\n"); + pr_err("Key skipped backwards\n"); } } void bch_dump_bucket(struct btree_keys *b) { - unsigned i; + unsigned int i; console_lock(); for (i = 0; i <= b->nsets; i++) @@ -53,7 +53,7 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { - unsigned ret = 0; + unsigned int ret = 0; struct btree_iter iter; struct bkey *k; @@ -128,7 +128,7 @@ static inline void bch_btree_iter_next_check(struct btree_iter *iter) {} /* Keylists */ -int __bch_keylist_realloc(struct keylist *l, unsigned u64s) +int __bch_keylist_realloc(struct keylist *l, unsigned int u64s) { size_t oldsize = bch_keylist_nkeys(l); size_t newsize = oldsize + u64s; @@ -180,7 +180,7 @@ void bch_keylist_pop_front(struct keylist *l) /* Key/pointer manipulation */ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, - unsigned i) + unsigned int i) { BUG_ON(i > KEY_PTRS(src)); @@ -194,7 +194,7 @@ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, bool __bch_cut_front(const struct bkey *where, struct bkey *k) { - unsigned i, len = 0; + unsigned int i, len = 0; if (bkey_cmp(where, &START_KEY(k)) <= 0) return false; @@ -214,7 +214,7 @@ bool __bch_cut_front(const struct bkey *where, struct bkey *k) bool __bch_cut_back(const struct bkey *where, struct bkey *k) { - unsigned len = 0; + unsigned int len = 0; if (bkey_cmp(where, k) >= 0) return false; @@ -240,9 +240,9 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k) #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) struct bkey_float { - unsigned exponent:BKEY_EXPONENT_BITS; - unsigned m:BKEY_MID_BITS; - unsigned mantissa:BKEY_MANTISSA_BITS; + unsigned int exponent:BKEY_EXPONENT_BITS; + unsigned int m:BKEY_MID_BITS; + unsigned int mantissa:BKEY_MANTISSA_BITS; } __packed; /* @@ -311,7 +311,9 @@ void bch_btree_keys_free(struct btree_keys *b) } EXPORT_SYMBOL(bch_btree_keys_free); -int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp) +int bch_btree_keys_alloc(struct btree_keys *b, + unsigned int page_order, + gfp_t gfp) { struct bset_tree *t = b->set; @@ -345,7 +347,7 @@ EXPORT_SYMBOL(bch_btree_keys_alloc); void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, bool *expensive_debug_checks) { - unsigned i; + unsigned int i; b->ops = ops; b->expensive_debug_checks = expensive_debug_checks; @@ -366,7 +368,11 @@ EXPORT_SYMBOL(bch_btree_keys_init); /* Binary tree stuff for auxiliary search trees */ -static unsigned inorder_next(unsigned j, unsigned size) +/* + * return array index next to j when does in-order traverse + * of a binary tree which is stored in a linear array + */ +static unsigned int inorder_next(unsigned int j, unsigned int size) { if (j * 2 + 1 < size) { j = j * 2 + 1; @@ -379,7 +385,11 @@ static unsigned inorder_next(unsigned j, unsigned size) return j; } -static unsigned inorder_prev(unsigned j, unsigned size) +/* + * return array index previous to j when does in-order traverse + * of a binary tree which is stored in a linear array + */ +static unsigned int inorder_prev(unsigned int j, unsigned int size) { if (j * 2 < size) { j = j * 2; @@ -392,7 +402,8 @@ static unsigned inorder_prev(unsigned j, unsigned size) return j; } -/* I have no idea why this code works... and I'm the one who wrote it +/* + * I have no idea why this code works... and I'm the one who wrote it * * However, I do know what it does: * Given a binary tree constructed in an array (i.e. how you normally implement @@ -405,10 +416,12 @@ static unsigned inorder_prev(unsigned j, unsigned size) * extra is a function of size: * extra = (size - rounddown_pow_of_two(size - 1)) << 1; */ -static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) +static unsigned int __to_inorder(unsigned int j, + unsigned int size, + unsigned int extra) { - unsigned b = fls(j); - unsigned shift = fls(size - 1) - b; + unsigned int b = fls(j); + unsigned int shift = fls(size - 1) - b; j ^= 1U << (b - 1); j <<= 1; @@ -421,14 +434,20 @@ static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) return j; } -static unsigned to_inorder(unsigned j, struct bset_tree *t) +/* + * Return the cacheline index in bset_tree->data, where j is index + * from a linear array which stores the auxiliar binary tree + */ +static unsigned int to_inorder(unsigned int j, struct bset_tree *t) { return __to_inorder(j, t->size, t->extra); } -static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) +static unsigned int __inorder_to_tree(unsigned int j, + unsigned int size, + unsigned int extra) { - unsigned shift; + unsigned int shift; if (j > extra) j += j - extra; @@ -441,7 +460,11 @@ static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) return j; } -static unsigned inorder_to_tree(unsigned j, struct bset_tree *t) +/* + * Return an index from a linear array which stores the auxiliar binary + * tree, j is the cacheline index of t->data. + */ +static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t) { return __inorder_to_tree(j, t->size, t->extra); } @@ -452,14 +475,15 @@ void inorder_test(void) unsigned long done = 0; ktime_t start = ktime_get(); - for (unsigned size = 2; + for (unsigned int size = 2; size < 65536000; size++) { - unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1; - unsigned i = 1, j = rounddown_pow_of_two(size - 1); + unsigned int extra = + (size - rounddown_pow_of_two(size - 1)) << 1; + unsigned int i = 1, j = rounddown_pow_of_two(size - 1); if (!(size % 4096)) - printk(KERN_NOTICE "loop %u, %llu per us\n", size, + pr_notice("loop %u, %llu per us\n", size, done / ktime_us_delta(ktime_get(), start)); while (1) { @@ -502,30 +526,31 @@ void inorder_test(void) * of the previous key so we can walk backwards to it from t->tree[j]'s key. */ -static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline, - unsigned offset) +static struct bkey *cacheline_to_bkey(struct bset_tree *t, + unsigned int cacheline, + unsigned int offset) { return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8; } -static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k) +static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k) { return ((void *) k - (void *) t->data) / BSET_CACHELINE; } -static unsigned bkey_to_cacheline_offset(struct bset_tree *t, - unsigned cacheline, +static unsigned int bkey_to_cacheline_offset(struct bset_tree *t, + unsigned int cacheline, struct bkey *k) { return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0); } -static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j) +static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j) { return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m); } -static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j) +static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j) { return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]); } @@ -534,7 +559,7 @@ static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j) * For the write set - the one we're currently inserting keys into - we don't * maintain a full search tree, we just keep a simple lookup table in t->prev. */ -static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) +static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline) { return cacheline_to_bkey(t, cacheline, t->prev[cacheline]); } @@ -546,14 +571,29 @@ static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) return low; } -static inline unsigned bfloat_mantissa(const struct bkey *k, +/* + * Calculate mantissa value for struct bkey_float. + * If most significant bit of f->exponent is not set, then + * - f->exponent >> 6 is 0 + * - p[0] points to bkey->low + * - p[-1] borrows bits from KEY_INODE() of bkey->high + * if most isgnificant bits of f->exponent is set, then + * - f->exponent >> 6 is 1 + * - p[0] points to bits from KEY_INODE() of bkey->high + * - p[-1] points to other bits from KEY_INODE() of + * bkey->high too. + * See make_bfloat() to check when most significant bit of f->exponent + * is set or not. + */ +static inline unsigned int bfloat_mantissa(const struct bkey *k, struct bkey_float *f) { const uint64_t *p = &k->low - (f->exponent >> 6); + return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK; } -static void make_bfloat(struct bset_tree *t, unsigned j) +static void make_bfloat(struct bset_tree *t, unsigned int j) { struct bkey_float *f = &t->tree[j]; struct bkey *m = tree_to_bkey(t, j); @@ -570,6 +610,16 @@ static void make_bfloat(struct bset_tree *t, unsigned j) BUG_ON(m < l || m > r); BUG_ON(bkey_next(p) != m); + /* + * If l and r have different KEY_INODE values (different backing + * device), f->exponent records how many least significant bits + * are different in KEY_INODE values and sets most significant + * bits to 1 (by +64). + * If l and r have same KEY_INODE value, f->exponent records + * how many different bits in least significant bits of bkey->low. + * See bfloat_mantiss() how the most significant bit of + * f->exponent is used to calculate bfloat mantissa value. + */ if (KEY_INODE(l) != KEY_INODE(r)) f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64; else @@ -591,7 +641,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j) static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t) { if (t != b->set) { - unsigned j = roundup(t[-1].size, + unsigned int j = roundup(t[-1].size, 64 / sizeof(struct bkey_float)); t->tree = t[-1].tree + j; @@ -633,17 +683,26 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic) } EXPORT_SYMBOL(bch_bset_init_next); +/* + * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to + * accelerate bkey search in a btree node (pointed by bset_tree->data in + * memory). After search in the auxiliar tree by calling bset_search_tree(), + * a struct bset_search_iter is returned which indicates range [l, r] from + * bset_tree->data where the searching bkey might be inside. Then a followed + * linear comparison does the exact search, see __bch_bset_search() for how + * the auxiliary tree is used. + */ void bch_bset_build_written_tree(struct btree_keys *b) { struct bset_tree *t = bset_tree_last(b); struct bkey *prev = NULL, *k = t->data->start; - unsigned j, cacheline = 1; + unsigned int j, cacheline = 1; b->last_set_unwritten = 0; bset_alloc_tree(b, t); - t->size = min_t(unsigned, + t->size = min_t(unsigned int, bkey_to_cacheline(t, bset_bkey_last(t->data)), b->set->tree + btree_keys_cachelines(b) - t->tree); @@ -683,7 +742,7 @@ EXPORT_SYMBOL(bch_bset_build_written_tree); void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k) { struct bset_tree *t; - unsigned inorder, j = 1; + unsigned int inorder, j = 1; for (t = b->set; t <= bset_tree_last(b); t++) if (k < bset_bkey_last(t->data)) @@ -730,14 +789,15 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, struct bset_tree *t, struct bkey *k) { - unsigned shift = bkey_u64s(k); - unsigned j = bkey_to_cacheline(t, k); + unsigned int shift = bkey_u64s(k); + unsigned int j = bkey_to_cacheline(t, k); /* We're getting called from btree_split() or btree_gc, just bail out */ if (!t->size) return; - /* k is the key we just inserted; we need to find the entry in the + /* + * k is the key we just inserted; we need to find the entry in the * lookup table for the first key that is strictly greater than k: * it's either k's cacheline or the next one */ @@ -745,7 +805,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, table_to_bkey(t, j) <= k) j++; - /* Adjust all the lookup table entries, and find a new key for any that + /* + * Adjust all the lookup table entries, and find a new key for any that * have gotten too big */ for (; j < t->size; j++) { @@ -770,7 +831,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b, k != bset_bkey_last(t->data); k = bkey_next(k)) if (t->size == bkey_to_cacheline(t, k)) { - t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k); + t->prev[t->size] = + bkey_to_cacheline_offset(t, t->size, k); t->size++; } } @@ -818,10 +880,10 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where, } EXPORT_SYMBOL(bch_bset_insert); -unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k, +unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, struct bkey *replace_key) { - unsigned status = BTREE_INSERT_STATUS_NO_INSERT; + unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; struct btree_iter iter; @@ -873,10 +935,10 @@ struct bset_search_iter { static struct bset_search_iter bset_search_write_set(struct bset_tree *t, const struct bkey *search) { - unsigned li = 0, ri = t->size; + unsigned int li = 0, ri = t->size; while (li + 1 != ri) { - unsigned m = (li + ri) >> 1; + unsigned int m = (li + ri) >> 1; if (bkey_cmp(table_to_bkey(t, m), search) > 0) ri = m; @@ -895,10 +957,22 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t, { struct bkey *l, *r; struct bkey_float *f; - unsigned inorder, j, n = 1; + unsigned int inorder, j, n = 1; do { - unsigned p = n << 4; + /* + * A bit trick here. + * If p < t->size, (int)(p - t->size) is a minus value and + * the most significant bit is set, right shifting 31 bits + * gets 1. If p >= t->size, the most significant bit is + * not set, right shifting 31 bits gets 0. + * So the following 2 lines equals to + * if (p >= t->size) + * p = 0; + * but a branch instruction is avoided. + */ + unsigned int p = n << 4; + p &= ((int) (p - t->size)) >> 31; prefetch(&t->tree[p]); @@ -907,6 +981,9 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t, f = &t->tree[j]; /* + * Similar bit trick, use subtract operation to avoid a branch + * instruction. + * * n = (f->mantissa > bfloat_mantissa()) * ? j * 2 * : j * 2 + 1; @@ -915,7 +992,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t, * to work - that's done in make_bfloat() */ if (likely(f->exponent != 127)) - n = j * 2 + (((unsigned) + n = j * 2 + (((unsigned int) (f->mantissa - bfloat_mantissa(search, f))) >> 31); else @@ -1046,6 +1123,7 @@ static struct bkey *__bch_btree_iter_init(struct btree_keys *b, struct bset_tree *start) { struct bkey *ret = NULL; + iter->size = ARRAY_SIZE(iter->data); iter->used = 0; @@ -1121,7 +1199,8 @@ void bch_bset_sort_state_free(struct bset_sort_state *state) mempool_exit(&state->pool); } -int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) +int bch_bset_sort_state_init(struct bset_sort_state *state, + unsigned int page_order) { spin_lock_init(&state->time.lock); @@ -1174,7 +1253,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, } static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, - unsigned start, unsigned order, bool fixup, + unsigned int start, unsigned int order, bool fixup, struct bset_sort_state *state) { uint64_t start_time; @@ -1225,7 +1304,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, bch_time_stats_update(&state->time, start_time); } -void bch_btree_sort_partial(struct btree_keys *b, unsigned start, +void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; @@ -1235,7 +1314,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned start, __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); if (start) { - unsigned i; + unsigned int i; for (i = start; i <= b->nsets; i++) keys += b->set[i].data->keys; @@ -1260,8 +1339,8 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); - struct btree_iter iter; + bch_btree_iter_init(b, &iter, NULL); btree_mergesort(b, new->set->data, &iter, false, true); @@ -1275,7 +1354,7 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state) { - unsigned crit = SORT_CRIT; + unsigned int crit = SORT_CRIT; int i; /* Don't sort if nothing to do */ @@ -1304,7 +1383,7 @@ EXPORT_SYMBOL(bch_btree_sort_lazy); void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats) { - unsigned i; + unsigned int i; for (i = 0; i <= b->nsets; i++) { struct bset_tree *t = &b->set[i]; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index b867f2200495..bac76aabca6d 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -163,10 +163,10 @@ struct bset_tree { */ /* size of the binary tree and prev array */ - unsigned size; + unsigned int size; /* function of size - precalculated for to_inorder() */ - unsigned extra; + unsigned int extra; /* copy of the last key in the set */ struct bkey end; @@ -187,18 +187,25 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(struct btree_iter_set, - struct btree_iter_set); - struct bkey *(*sort_fixup)(struct btree_iter *, struct bkey *); - bool (*insert_fixup)(struct btree_keys *, struct bkey *, - struct btree_iter *, struct bkey *); - bool (*key_invalid)(struct btree_keys *, - const struct bkey *); - bool (*key_bad)(struct btree_keys *, const struct bkey *); - bool (*key_merge)(struct btree_keys *, - struct bkey *, struct bkey *); - void (*key_to_text)(char *, size_t, const struct bkey *); - void (*key_dump)(struct btree_keys *, const struct bkey *); + bool (*sort_cmp)(struct btree_iter_set l, + struct btree_iter_set r); + struct bkey *(*sort_fixup)(struct btree_iter *iter, + struct bkey *tmp); + bool (*insert_fixup)(struct btree_keys *b, + struct bkey *insert, + struct btree_iter *iter, + struct bkey *replace_key); + bool (*key_invalid)(struct btree_keys *bk, + const struct bkey *k); + bool (*key_bad)(struct btree_keys *bk, + const struct bkey *k); + bool (*key_merge)(struct btree_keys *bk, + struct bkey *l, struct bkey *r); + void (*key_to_text)(char *buf, + size_t size, + const struct bkey *k); + void (*key_dump)(struct btree_keys *keys, + const struct bkey *k); /* * Only used for deciding whether to use START_KEY(k) or just the key @@ -211,7 +218,7 @@ struct btree_keys { const struct btree_keys_ops *ops; uint8_t page_order; uint8_t nsets; - unsigned last_set_unwritten:1; + unsigned int last_set_unwritten:1; bool *expensive_debug_checks; /* @@ -239,12 +246,14 @@ static inline bool bkey_written(struct btree_keys *b, struct bkey *k) return !b->last_set_unwritten || k < b->set[b->nsets].data->start; } -static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i) +static inline unsigned int bset_byte_offset(struct btree_keys *b, + struct bset *i) { return ((size_t) i) - ((size_t) b->set->data); } -static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i) +static inline unsigned int bset_sector_offset(struct btree_keys *b, + struct bset *i) { return bset_byte_offset(b, i) >> 9; } @@ -273,25 +282,27 @@ static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b) } static inline struct bset *bset_next_set(struct btree_keys *b, - unsigned block_bytes) + unsigned int block_bytes) { struct bset *i = bset_tree_last(b)->data; return ((void *) i) + roundup(set_bytes(i), block_bytes); } -void bch_btree_keys_free(struct btree_keys *); -int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t); -void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *, - bool *); - -void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t); -void bch_bset_build_written_tree(struct btree_keys *); -void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *); -bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *); -void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *); -unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *, - struct bkey *); +void bch_btree_keys_free(struct btree_keys *b); +int bch_btree_keys_alloc(struct btree_keys *b, unsigned int page_order, + gfp_t gfp); +void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, + bool *expensive_debug_checks); + +void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic); +void bch_bset_build_written_tree(struct btree_keys *b); +void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k); +bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r); +void bch_bset_insert(struct btree_keys *b, struct bkey *where, + struct bkey *insert); +unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, + struct bkey *replace_key); enum { BTREE_INSERT_STATUS_NO_INSERT = 0, @@ -313,18 +324,21 @@ struct btree_iter { } data[MAX_BSETS]; }; -typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *); +typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); -struct bkey *bch_btree_iter_next(struct btree_iter *); -struct bkey *bch_btree_iter_next_filter(struct btree_iter *, - struct btree_keys *, ptr_filter_fn); +struct bkey *bch_btree_iter_next(struct btree_iter *iter); +struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, + struct btree_keys *b, + ptr_filter_fn fn); -void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); -struct bkey *bch_btree_iter_init(struct btree_keys *, struct btree_iter *, - struct bkey *); +void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + struct bkey *end); +struct bkey *bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, + struct bkey *search); -struct bkey *__bch_bset_search(struct btree_keys *, struct bset_tree *, - const struct bkey *); +struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, + const struct bkey *search); /* * Returns the first key that is strictly greater than search @@ -349,21 +363,23 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, struct bset_sort_state { mempool_t pool; - unsigned page_order; - unsigned crit_factor; + unsigned int page_order; + unsigned int crit_factor; struct time_stats time; }; -void bch_bset_sort_state_free(struct bset_sort_state *); -int bch_bset_sort_state_init(struct bset_sort_state *, unsigned); -void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *); -void bch_btree_sort_into(struct btree_keys *, struct btree_keys *, - struct bset_sort_state *); -void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *, - struct bset_sort_state *); -void bch_btree_sort_partial(struct btree_keys *, unsigned, - struct bset_sort_state *); +void bch_bset_sort_state_free(struct bset_sort_state *state); +int bch_bset_sort_state_init(struct bset_sort_state *state, + unsigned int page_order); +void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state); +void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, + struct bset_sort_state *state); +void bch_btree_sort_and_fix_extents(struct btree_keys *b, + struct btree_iter *iter, + struct bset_sort_state *state); +void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, + struct bset_sort_state *state); static inline void bch_btree_sort(struct btree_keys *b, struct bset_sort_state *state) @@ -377,13 +393,13 @@ struct bset_stats { size_t floats, failed; }; -void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *); +void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state); /* Bkey utility code */ #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) -static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) +static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx) { return bkey_idx(i->start, idx); } @@ -401,10 +417,10 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l, : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); } -void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, - unsigned); -bool __bch_cut_front(const struct bkey *, struct bkey *); -bool __bch_cut_back(const struct bkey *, struct bkey *); +void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, + unsigned int i); +bool __bch_cut_front(const struct bkey *where, struct bkey *k); +bool __bch_cut_back(const struct bkey *where, struct bkey *k); static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) { @@ -522,18 +538,20 @@ static inline size_t bch_keylist_bytes(struct keylist *l) return bch_keylist_nkeys(l) * sizeof(uint64_t); } -struct bkey *bch_keylist_pop(struct keylist *); -void bch_keylist_pop_front(struct keylist *); -int __bch_keylist_realloc(struct keylist *, unsigned); +struct bkey *bch_keylist_pop(struct keylist *l); +void bch_keylist_pop_front(struct keylist *l); +int __bch_keylist_realloc(struct keylist *l, unsigned int u64s); /* Debug stuff */ #ifdef CONFIG_BCACHE_DEBUG -int __bch_count_data(struct btree_keys *); -void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...); -void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); -void bch_dump_bucket(struct btree_keys *); +int __bch_count_data(struct btree_keys *b); +void __printf(2, 3) __bch_check_keys(struct btree_keys *b, + const char *fmt, + ...); +void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set); +void bch_dump_bucket(struct btree_keys *b); #else @@ -541,7 +559,7 @@ static inline int __bch_count_data(struct btree_keys *b) { return -1; } static inline void __printf(2, 3) __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} static inline void bch_dump_bucket(struct btree_keys *b) {} -void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); +void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set); #endif diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 547c9eedc2f4..e7d4817681f2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -90,6 +90,9 @@ #define MAX_NEED_GC 64 #define MAX_SAVE_PRIO 72 +#define MAX_GC_TIMES 100 +#define MIN_GC_NODES 100 +#define GC_SLEEP_MS 100 #define PTR_DIRTY_BIT (((uint64_t) 1 << 36)) @@ -180,7 +183,7 @@ static void bch_btree_init_next(struct btree *b) void bkey_put(struct cache_set *c, struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i)) @@ -284,6 +287,7 @@ err: static void btree_node_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; + closure_put(cl); } @@ -432,7 +436,10 @@ static void do_btree_node_write(struct btree *b) continue_at(cl, btree_node_write_done, NULL); } else { - /* No problem for multipage bvec since the bio is just allocated */ + /* + * No problem for multipage bvec since the bio is + * just allocated + */ b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); @@ -476,7 +483,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent) void bch_btree_node_write(struct btree *b, struct closure *parent) { - unsigned nsets = b->keys.nsets; + unsigned int nsets = b->keys.nsets; lockdep_assert_held(&b->lock); @@ -578,7 +585,7 @@ static void mca_bucket_free(struct btree *b) list_move(&b->list, &b->c->btree_cache_freeable); } -static unsigned btree_order(struct bkey *k) +static unsigned int btree_order(struct bkey *k) { return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1); } @@ -586,7 +593,7 @@ static unsigned btree_order(struct bkey *k) static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) { if (!bch_btree_keys_alloc(&b->keys, - max_t(unsigned, + max_t(unsigned int, ilog2(b->c->btree_pages), btree_order(k)), gfp)) { @@ -601,6 +608,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, struct bkey *k, gfp_t gfp) { struct btree *b = kzalloc(sizeof(struct btree), gfp); + if (!b) return NULL; @@ -617,7 +625,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, return b; } -static int mca_reap(struct btree *b, unsigned min_order, bool flush) +static int mca_reap(struct btree *b, unsigned int min_order, bool flush) { struct closure cl; @@ -743,6 +751,7 @@ void bch_btree_cache_free(struct cache_set *c) { struct btree *b; struct closure cl; + closure_init_stack(&cl); if (c->shrink.list.next) @@ -783,7 +792,7 @@ void bch_btree_cache_free(struct cache_set *c) int bch_btree_cache_alloc(struct cache_set *c) { - unsigned i; + unsigned int i; for (i = 0; i < mca_reserve(c); i++) if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL)) @@ -1008,6 +1017,13 @@ retry: BUG_ON(b->level != level); } + if (btree_node_io_error(b)) { + rw_unlock(write, b); + return ERR_PTR(-EIO); + } + + BUG_ON(!b->written); + b->parent = parent; b->accessed = 1; @@ -1019,13 +1035,6 @@ retry: for (; i <= b->keys.nsets; i++) prefetch(b->keys.set[i].data); - if (btree_node_io_error(b)) { - rw_unlock(write, b); - return ERR_PTR(-EIO); - } - - BUG_ON(!b->written); - return b; } @@ -1121,6 +1130,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, struct btree_op *op) { struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); + if (!IS_ERR_OR_NULL(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); @@ -1133,7 +1143,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, static void make_btree_freeing_key(struct btree *b, struct bkey *k) { - unsigned i; + unsigned int i; mutex_lock(&b->c->bucket_lock); @@ -1154,7 +1164,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op) { struct cache_set *c = b->c; struct cache *ca; - unsigned i, reserve = (c->root->level - b->level) * 2 + 1; + unsigned int i, reserve = (c->root->level - b->level) * 2 + 1; mutex_lock(&c->bucket_lock); @@ -1178,7 +1188,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) { uint8_t stale = 0; - unsigned i; + unsigned int i; struct bucket *g; /* @@ -1216,7 +1226,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, SET_GC_MARK(g, GC_MARK_RECLAIMABLE); /* guard against overflow */ - SET_GC_SECTORS_USED(g, min_t(unsigned, + SET_GC_SECTORS_USED(g, min_t(unsigned int, GC_SECTORS_USED(g) + KEY_SIZE(k), MAX_GC_SECTORS_USED)); @@ -1230,7 +1240,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i) && @@ -1256,7 +1266,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats) static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) { uint8_t stale = 0; - unsigned keys = 0, good_keys = 0; + unsigned int keys = 0, good_keys = 0; struct bkey *k; struct btree_iter iter; struct bset_tree *t; @@ -1299,16 +1309,18 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) struct gc_merge_info { struct btree *b; - unsigned keys; + unsigned int keys; }; -static int bch_btree_insert_node(struct btree *, struct btree_op *, - struct keylist *, atomic_t *, struct bkey *); +static int bch_btree_insert_node(struct btree *b, struct btree_op *op, + struct keylist *insert_keys, + atomic_t *journal_ref, + struct bkey *replace_key); static int btree_gc_coalesce(struct btree *b, struct btree_op *op, struct gc_stat *gc, struct gc_merge_info *r) { - unsigned i, nodes = 0, keys = 0, blocks; + unsigned int i, nodes = 0, keys = 0, blocks; struct btree *new_nodes[GC_MERGE_NODES]; struct keylist keylist; struct closure cl; @@ -1508,11 +1520,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, return -EINTR; } -static unsigned btree_gc_count_keys(struct btree *b) +static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; struct btree_iter iter; - unsigned ret = 0; + unsigned int ret = 0; for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1520,6 +1532,32 @@ static unsigned btree_gc_count_keys(struct btree *b) return ret; } +static size_t btree_gc_min_nodes(struct cache_set *c) +{ + size_t min_nodes; + + /* + * Since incremental GC would stop 100ms when front + * side I/O comes, so when there are many btree nodes, + * if GC only processes constant (100) nodes each time, + * GC would last a long time, and the front side I/Os + * would run out of the buckets (since no new bucket + * can be allocated during GC), and be blocked again. + * So GC should not process constant nodes, but varied + * nodes according to the number of btree nodes, which + * realized by dividing GC into constant(100) times, + * so when there are many btree nodes, GC can process + * more nodes each time, otherwise, GC will process less + * nodes each time (but no less than MIN_GC_NODES) + */ + min_nodes = c->gc_stats.nodes / MAX_GC_TIMES; + if (min_nodes < MIN_GC_NODES) + min_nodes = MIN_GC_NODES; + + return min_nodes; +} + + static int btree_gc_recurse(struct btree *b, struct btree_op *op, struct closure *writes, struct gc_stat *gc) { @@ -1585,6 +1623,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1)); r->b = NULL; + if (atomic_read(&b->c->search_inflight) && + gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) { + gc->nodes_pre = gc->nodes; + ret = -EAGAIN; + break; + } + if (need_resched()) { ret = -EAGAIN; break; @@ -1642,7 +1687,7 @@ static void btree_gc_start(struct cache_set *c) { struct cache *ca; struct bucket *b; - unsigned i; + unsigned int i; if (!c->gc_mark_valid) return; @@ -1668,7 +1713,7 @@ static void bch_btree_gc_finish(struct cache_set *c) { struct bucket *b; struct cache *ca; - unsigned i; + unsigned int i; mutex_lock(&c->bucket_lock); @@ -1686,7 +1731,7 @@ static void bch_btree_gc_finish(struct cache_set *c) struct bcache_device *d = c->devices[i]; struct cached_dev *dc; struct keybuf_key *w, *n; - unsigned j; + unsigned int j; if (!d || UUID_FLASH_ONLY(&c->uuids[i])) continue; @@ -1753,7 +1798,10 @@ static void bch_btree_gc(struct cache_set *c) closure_sync(&writes); cond_resched(); - if (ret && ret != -EAGAIN) + if (ret == -EAGAIN) + schedule_timeout_interruptible(msecs_to_jiffies + (GC_SLEEP_MS)); + else if (ret) pr_warn("gc failed!"); } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); @@ -1775,7 +1823,7 @@ static void bch_btree_gc(struct cache_set *c) static bool gc_should_run(struct cache_set *c) { struct cache *ca; - unsigned i; + unsigned int i; for_each_cache(ca, c, i) if (ca->invalidate_needs_gc) @@ -1834,8 +1882,14 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) do { k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); - if (k) + if (k) { btree_node_prefetch(b, k); + /* + * initiallize c->gc_stats.nodes + * for incremental GC + */ + b->c->gc_stats.nodes++; + } if (p) ret = btree(check_recurse, p, b, op); @@ -1860,7 +1914,7 @@ void bch_initial_gc_finish(struct cache_set *c) { struct cache *ca; struct bucket *b; - unsigned i; + unsigned int i; bch_btree_gc_finish(c); @@ -1900,7 +1954,7 @@ void bch_initial_gc_finish(struct cache_set *c) static bool btree_insert_key(struct btree *b, struct bkey *k, struct bkey *replace_key) { - unsigned status; + unsigned int status; BUG_ON(bkey_cmp(k, &b->key) > 0); @@ -1999,7 +2053,7 @@ static int btree_split(struct btree *b, struct btree_op *op, block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; if (split) { - unsigned keys = 0; + unsigned int keys = 0; trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); @@ -2177,10 +2231,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, rw_lock(true, b, b->level); if (b->key.ptr[0] != btree_ptr || - b->seq != seq + 1) { + b->seq != seq + 1) { op->lock = b->level; goto out; - } + } } SET_KEY_PTRS(check_key, 1); @@ -2255,7 +2309,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys, void bch_btree_set_root(struct btree *b) { - unsigned i; + unsigned int i; struct closure cl; closure_init_stack(&cl); @@ -2367,7 +2421,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, struct refill { struct btree_op op; - unsigned nr_found; + unsigned int nr_found; struct keybuf *buf; struct bkey *end; keybuf_pred_fn *pred; @@ -2443,6 +2497,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, if (!RB_EMPTY_ROOT(&buf->keys)) { struct keybuf_key *w; + w = RB_FIRST(&buf->keys, struct keybuf_key, node); buf->start = START_KEY(&w->key); @@ -2474,6 +2529,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, { bool ret = false; struct keybuf_key *p, *w, s; + s.key = *start; if (bkey_cmp(end, &buf->start) <= 0 || @@ -2500,6 +2556,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, struct keybuf_key *bch_keybuf_next(struct keybuf *buf) { struct keybuf_key *w; + spin_lock(&buf->lock); w = RB_FIRST(&buf->keys, struct keybuf_key, node); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index d211e2c25b6b..a68d6c55783b 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -152,7 +152,7 @@ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ \ static inline void set_btree_node_ ## flag(struct btree *b) \ -{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ +{ set_bit(BTREE_NODE_ ## flag, &b->flags); } enum btree_flags { BTREE_NODE_io_error, @@ -184,7 +184,7 @@ static inline struct bset *btree_bset_last(struct btree *b) return bset_tree_last(&b->keys)->data; } -static inline unsigned bset_block_offset(struct btree *b, struct bset *i) +static inline unsigned int bset_block_offset(struct btree *b, struct bset *i) { return bset_sector_offset(&b->keys, i) >> b->c->block_bits; } @@ -213,7 +213,7 @@ struct btree_op { /* Btree level at which we start taking write locks */ short lock; - unsigned insert_collision:1; + unsigned int insert_collision:1; }; static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) @@ -238,26 +238,28 @@ static inline void rw_unlock(bool w, struct btree *b) (w ? up_write : up_read)(&b->lock); } -void bch_btree_node_read_done(struct btree *); -void __bch_btree_node_write(struct btree *, struct closure *); -void bch_btree_node_write(struct btree *, struct closure *); - -void bch_btree_set_root(struct btree *); -struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *, - int, bool, struct btree *); -struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, - struct bkey *, int, bool, struct btree *); - -int bch_btree_insert_check_key(struct btree *, struct btree_op *, - struct bkey *); -int bch_btree_insert(struct cache_set *, struct keylist *, - atomic_t *, struct bkey *); - -int bch_gc_thread_start(struct cache_set *); -void bch_initial_gc_finish(struct cache_set *); -void bch_moving_gc(struct cache_set *); -int bch_btree_check(struct cache_set *); -void bch_initial_mark_key(struct cache_set *, int, struct bkey *); +void bch_btree_node_read_done(struct btree *b); +void __bch_btree_node_write(struct btree *b, struct closure *parent); +void bch_btree_node_write(struct btree *b, struct closure *parent); + +void bch_btree_set_root(struct btree *b); +struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + int level, bool wait, + struct btree *parent); +struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, + struct bkey *k, int level, bool write, + struct btree *parent); + +int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, + struct bkey *check_key); +int bch_btree_insert(struct cache_set *c, struct keylist *keys, + atomic_t *journal_ref, struct bkey *replace_key); + +int bch_gc_thread_start(struct cache_set *c); +void bch_initial_gc_finish(struct cache_set *c); +void bch_moving_gc(struct cache_set *c); +int bch_btree_check(struct cache_set *c); +void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); static inline void wake_up_gc(struct cache_set *c) { @@ -272,9 +274,9 @@ static inline void wake_up_gc(struct cache_set *c) #define MAP_END_KEY 1 -typedef int (btree_map_nodes_fn)(struct btree_op *, struct btree *); -int __bch_btree_map_nodes(struct btree_op *, struct cache_set *, - struct bkey *, btree_map_nodes_fn *, int); +typedef int (btree_map_nodes_fn)(struct btree_op *b_op, struct btree *b); +int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, + struct bkey *from, btree_map_nodes_fn *fn, int flags); static inline int bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_nodes_fn *fn) @@ -290,21 +292,23 @@ static inline int bch_btree_map_leaf_nodes(struct btree_op *op, return __bch_btree_map_nodes(op, c, from, fn, MAP_LEAF_NODES); } -typedef int (btree_map_keys_fn)(struct btree_op *, struct btree *, - struct bkey *); -int bch_btree_map_keys(struct btree_op *, struct cache_set *, - struct bkey *, btree_map_keys_fn *, int); - -typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); - -void bch_keybuf_init(struct keybuf *); -void bch_refill_keybuf(struct cache_set *, struct keybuf *, - struct bkey *, keybuf_pred_fn *); -bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, - struct bkey *); -void bch_keybuf_del(struct keybuf *, struct keybuf_key *); -struct keybuf_key *bch_keybuf_next(struct keybuf *); -struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *, - struct bkey *, keybuf_pred_fn *); +typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b, + struct bkey *k); +int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, + struct bkey *from, btree_map_keys_fn *fn, int flags); + +typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k); + +void bch_keybuf_init(struct keybuf *buf); +void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, + struct bkey *end, keybuf_pred_fn *pred); +bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, + struct bkey *end); +void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w); +struct keybuf_key *bch_keybuf_next(struct keybuf *buf); +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, + struct keybuf *buf, + struct bkey *end, + keybuf_pred_fn *pred); void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats); #endif diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 0e14969182c6..73f5319295bc 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Asynchronous refcounty things * @@ -162,12 +163,13 @@ static struct dentry *closure_debug; static int debug_seq_show(struct seq_file *f, void *data) { struct closure *cl; + spin_lock_irq(&closure_list_lock); list_for_each_entry(cl, &closure_list, all) { int r = atomic_read(&cl->remaining); - seq_printf(f, "%p: %pF -> %pf p %p r %i ", + seq_printf(f, "%p: %pS -> %pS p %p r %i ", cl, (void *) cl->ip, cl->fn, cl->parent, r & CLOSURE_REMAINING_MASK); @@ -177,7 +179,7 @@ static int debug_seq_show(struct seq_file *f, void *data) r & CLOSURE_RUNNING ? "R" : ""); if (r & CLOSURE_WAITING) - seq_printf(f, " W %pF\n", + seq_printf(f, " W %pS\n", (void *) cl->waiting_on); seq_printf(f, "\n"); @@ -199,11 +201,16 @@ static const struct file_operations debug_ops = { .release = single_release }; -int __init closure_debug_init(void) +void __init closure_debug_init(void) { - closure_debug = debugfs_create_file("closures", - 0400, bcache_debug, NULL, &debug_ops); - return IS_ERR_OR_NULL(closure_debug); + if (!IS_ERR_OR_NULL(bcache_debug)) + /* + * it is unnecessary to check return value of + * debugfs_create_file(), we should not care + * about this. + */ + closure_debug = debugfs_create_file( + "closures", 0400, bcache_debug, NULL, &debug_ops); } #endif diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 71427eb5fdae..eca0d496b686 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -159,7 +159,7 @@ struct closure { #define CLOSURE_MAGIC_DEAD 0xc054dead #define CLOSURE_MAGIC_ALIVE 0xc054a11e - unsigned magic; + unsigned int magic; struct list_head all; unsigned long ip; unsigned long waiting_on; @@ -186,13 +186,13 @@ static inline void closure_sync(struct closure *cl) #ifdef CONFIG_BCACHE_CLOSURES_DEBUG -int closure_debug_init(void); +void closure_debug_init(void); void closure_debug_create(struct closure *cl); void closure_debug_destroy(struct closure *cl); #else -static inline int closure_debug_init(void) { return 0; } +static inline void closure_debug_init(void) {} static inline void closure_debug_create(struct closure *cl) {} static inline void closure_debug_destroy(struct closure *cl) {} @@ -289,10 +289,12 @@ static inline void closure_init_stack(struct closure *cl) } /** - * closure_wake_up - wake up all closures on a wait list. + * closure_wake_up - wake up all closures on a wait list, + * with memory barrier */ static inline void closure_wake_up(struct closure_waitlist *list) { + /* Memory barrier for the wait list */ smp_mb(); __closure_wake_up(list); } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index d030ce3025a6..06da66b2488a 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -67,34 +67,35 @@ void bch_btree_verify(struct btree *b) if (inmemory->keys != sorted->keys || memcmp(inmemory->start, sorted->start, - (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) { + (void *) bset_bkey_last(inmemory) - + (void *) inmemory->start)) { struct bset *i; - unsigned j; + unsigned int j; console_lock(); - printk(KERN_ERR "*** in memory:\n"); + pr_err("*** in memory:\n"); bch_dump_bset(&b->keys, inmemory, 0); - printk(KERN_ERR "*** read back in:\n"); + pr_err("*** read back in:\n"); bch_dump_bset(&v->keys, sorted, 0); for_each_written_bset(b, ondisk, i) { - unsigned block = ((void *) i - (void *) ondisk) / + unsigned int block = ((void *) i - (void *) ondisk) / block_bytes(b->c); - printk(KERN_ERR "*** on disk block %u:\n", block); + pr_err("*** on disk block %u:\n", block); bch_dump_bset(&b->keys, i, block); } - printk(KERN_ERR "*** block %zu not written\n", + pr_err("*** block %zu not written\n", ((void *) i - (void *) ondisk) / block_bytes(b->c)); for (j = 0; j < inmemory->keys; j++) if (inmemory->d[j] != sorted->d[j]) break; - printk(KERN_ERR "b->written %u\n", b->written); + pr_err("b->written %u\n", b->written); console_unlock(); panic("verify failed at %u\n", j); @@ -110,11 +111,15 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) struct bio_vec bv, cbv; struct bvec_iter iter, citer = { 0 }; - check = bio_clone_kmalloc(bio, GFP_NOIO); + check = bio_kmalloc(GFP_NOIO, bio_segments(bio)); if (!check) return; + check->bi_disk = bio->bi_disk; check->bi_opf = REQ_OP_READ; + check->bi_iter.bi_sector = bio->bi_iter.bi_sector; + check->bi_iter.bi_size = bio->bi_iter.bi_size; + bch_bio_map(check, NULL); if (bch_bio_alloc_pages(check, GFP_NOIO)) goto out_put; @@ -172,9 +177,9 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, while (size) { struct keybuf_key *w; - unsigned bytes = min(i->bytes, size); - + unsigned int bytes = min(i->bytes, size); int err = copy_to_user(buf, i->buf, bytes); + if (err) return err; @@ -233,8 +238,8 @@ void bch_debug_init_cache_set(struct cache_set *c) { if (!IS_ERR_OR_NULL(bcache_debug)) { char name[50]; - snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); + snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); c->debug = debugfs_create_file(name, 0400, bcache_debug, c, &cache_set_debug_ops); } @@ -248,11 +253,12 @@ void bch_debug_exit(void) debugfs_remove_recursive(bcache_debug); } -int __init bch_debug_init(struct kobject *kobj) +void __init bch_debug_init(struct kobject *kobj) { - if (!IS_ENABLED(CONFIG_DEBUG_FS)) - return 0; - + /* + * it is unnecessary to check return value of + * debugfs_create_file(), we should not care + * about this. + */ bcache_debug = debugfs_create_dir("bcache", NULL); - return IS_ERR_OR_NULL(bcache_debug); } diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h index acc48d3fa274..fb3d4dff4b26 100644 --- a/drivers/md/bcache/debug.h +++ b/drivers/md/bcache/debug.h @@ -8,8 +8,8 @@ struct cache_set; #ifdef CONFIG_BCACHE_DEBUG -void bch_btree_verify(struct btree *); -void bch_data_verify(struct cached_dev *, struct bio *); +void bch_btree_verify(struct btree *b); +void bch_data_verify(struct cached_dev *dc, struct bio *bio); #define expensive_debug_checks(c) ((c)->expensive_debug_checks) #define key_merging_disabled(c) ((c)->key_merging_disabled) @@ -27,7 +27,7 @@ static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {} #endif #ifdef CONFIG_DEBUG_FS -void bch_debug_init_cache_set(struct cache_set *); +void bch_debug_init_cache_set(struct cache_set *c); #else static inline void bch_debug_init_cache_set(struct cache_set *c) {} #endif diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 1d096742eb41..c809724e6571 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -46,7 +46,7 @@ static bool bch_key_sort_cmp(struct btree_iter_set l, static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i)) { @@ -67,7 +67,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i)) { @@ -96,7 +96,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) void bch_extent_to_text(char *buf, size_t size, const struct bkey *k) { - unsigned i = 0; + unsigned int i = 0; char *out = buf, *end = buf + size; #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) @@ -126,22 +126,22 @@ void bch_extent_to_text(char *buf, size_t size, const struct bkey *k) static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k) { struct btree *b = container_of(keys, struct btree, keys); - unsigned j; + unsigned int j; char buf[80]; bch_extent_to_text(buf, sizeof(buf), k); - printk(" %s", buf); + pr_err(" %s", buf); for (j = 0; j < KEY_PTRS(k); j++) { size_t n = PTR_BUCKET_NR(b->c, k, j); - printk(" bucket %zu", n); + pr_err(" bucket %zu", n); if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) - printk(" prio %i", + pr_err(" prio %i", PTR_BUCKET(b->c, k, j)->prio); } - printk(" %s\n", bch_ptr_status(b->c, k)); + pr_err(" %s\n", bch_ptr_status(b->c, k)); } /* Btree ptrs */ @@ -166,12 +166,13 @@ bad: static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); + return __bch_btree_ptr_invalid(b->c, k); } static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k) { - unsigned i; + unsigned int i; char buf[80]; struct bucket *g; @@ -204,7 +205,7 @@ err: static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); - unsigned i; + unsigned int i; if (!bkey_cmp(k, &ZERO_KEY) || !KEY_PTRS(k) || @@ -327,13 +328,14 @@ static bool bch_extent_insert_fixup(struct btree_keys *b, struct cache_set *c = container_of(b, struct btree, keys)->c; uint64_t old_offset; - unsigned old_size, sectors_found = 0; + unsigned int old_size, sectors_found = 0; BUG_ON(!KEY_OFFSET(insert)); BUG_ON(!KEY_SIZE(insert)); while (1) { struct bkey *k = bch_btree_iter_next(iter); + if (!k) break; @@ -363,7 +365,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b, * k might have been split since we inserted/found the * key we're replacing */ - unsigned i; + unsigned int i; uint64_t offset = KEY_START(k) - KEY_START(replace_key); @@ -498,11 +500,12 @@ bad: static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); + return __bch_extent_invalid(b->c, k); } static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, - unsigned ptr) + unsigned int ptr) { struct bucket *g = PTR_BUCKET(b->c, k, ptr); char buf[80]; @@ -534,7 +537,7 @@ err: static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); - unsigned i, stale; + unsigned int i, stale; if (!KEY_PTRS(k) || bch_extent_invalid(bk, k)) @@ -574,10 +577,12 @@ static uint64_t merge_chksums(struct bkey *l, struct bkey *r) ~((uint64_t)1 << 63); } -static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r) +static bool bch_extent_merge(struct btree_keys *bk, + struct bkey *l, + struct bkey *r) { struct btree *b = container_of(bk, struct btree, keys); - unsigned i; + unsigned int i; if (key_merging_disabled(b->c)) return false; diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index 0cd3575afa1d..4d667e05bb73 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -8,8 +8,8 @@ extern const struct btree_keys_ops bch_extent_keys_ops; struct bkey; struct cache_set; -void bch_extent_to_text(char *, size_t, const struct bkey *); -bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); -bool __bch_extent_invalid(struct cache_set *, const struct bkey *); +void bch_extent_to_text(char *buf, size_t size, const struct bkey *k); +bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k); +bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k); #endif /* _BCACHE_EXTENTS_H */ diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 9612873afee2..c25097968319 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -17,6 +17,7 @@ void bch_bbio_free(struct bio *bio, struct cache_set *c) { struct bbio *b = container_of(bio, struct bbio, bio); + mempool_free(b, &c->bio_meta); } @@ -42,9 +43,10 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) } void bch_submit_bbio(struct bio *bio, struct cache_set *c, - struct bkey *k, unsigned ptr) + struct bkey *k, unsigned int ptr) { struct bbio *b = container_of(bio, struct bbio, bio); + bch_bkey_copy_single_ptr(&b->key, k, ptr); __bch_submit_bbio(bio, c); } @@ -52,7 +54,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, /* IO errors */ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) { - unsigned errors; + unsigned int errors; WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); @@ -75,16 +77,16 @@ void bch_count_io_errors(struct cache *ca, */ if (ca->set->error_decay) { - unsigned count = atomic_inc_return(&ca->io_count); + unsigned int count = atomic_inc_return(&ca->io_count); while (count > ca->set->error_decay) { - unsigned errors; - unsigned old = count; - unsigned new = count - ca->set->error_decay; + unsigned int errors; + unsigned int old = count; + unsigned int new = count - ca->set->error_decay; /* * First we subtract refresh from count; each time we - * succesfully do so, we rescale the errors once: + * successfully do so, we rescale the errors once: */ count = atomic_cmpxchg(&ca->io_count, old, new); @@ -104,7 +106,7 @@ void bch_count_io_errors(struct cache *ca, } if (error) { - unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, + unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT, &ca->io_errors); errors >>= IO_ERROR_SHIFT; @@ -126,18 +128,18 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, struct cache *ca = PTR_CACHE(c, &b->key, 0); int is_read = (bio_data_dir(bio) == READ ? 1 : 0); - unsigned threshold = op_is_write(bio_op(bio)) + unsigned int threshold = op_is_write(bio_op(bio)) ? c->congested_write_threshold_us : c->congested_read_threshold_us; if (threshold) { - unsigned t = local_clock_us(); - + unsigned int t = local_clock_us(); int us = t - b->submit_time_us; int congested = atomic_read(&c->congested); if (us > (int) threshold) { int ms = us / 1024; + c->congested_last_us = t; ms = min(ms, CONGESTED_MAX + congested); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 18f1b5239620..6116bbf870d8 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -28,11 +28,12 @@ static void journal_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; + closure_put(cl); } static int journal_read_bucket(struct cache *ca, struct list_head *list, - unsigned bucket_index) + unsigned int bucket_index) { struct journal_device *ja = &ca->journal; struct bio *bio = &ja->bio; @@ -40,7 +41,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, struct journal_replay *i; struct jset *j, *data = ca->set->journal.w[0].data; struct closure cl; - unsigned len, left, offset = 0; + unsigned int len, left, offset = 0; int ret = 0; sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); @@ -50,7 +51,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, while (offset < ca->sb.bucket_size) { reread: left = ca->sb.bucket_size - offset; - len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS); + len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); bio_reset(bio); bio->bi_iter.bi_sector = bucket + offset; @@ -154,12 +155,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) }) struct cache *ca; - unsigned iter; + unsigned int iter; for_each_cache(ca, c, iter) { struct journal_device *ja = &ca->journal; DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); - unsigned i, l, r, m; + unsigned int i, l, r, m; uint64_t seq; bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); @@ -192,7 +193,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); l < ca->sb.njournal_buckets; - l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1)) + l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, + l + 1)) if (read_bucket(l)) goto bsearch; @@ -304,7 +306,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) k < bset_bkey_last(&i->j); k = bkey_next(k)) if (!__bch_extent_invalid(c, k)) { - unsigned j; + unsigned int j; for (j = 0; j < KEY_PTRS(k); j++) if (ptr_available(c, k, j)) @@ -492,7 +494,7 @@ static void journal_reclaim(struct cache_set *c) struct bkey *k = &c->journal.key; struct cache *ca; uint64_t last_seq; - unsigned iter, n = 0; + unsigned int iter, n = 0; atomic_t p __maybe_unused; atomic_long_inc(&c->reclaim); @@ -526,7 +528,7 @@ static void journal_reclaim(struct cache_set *c) for_each_cache(ca, c, iter) { struct journal_device *ja = &ca->journal; - unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; /* No space available on this device */ if (next == ja->discard_idx) @@ -580,7 +582,7 @@ static void journal_write_endio(struct bio *bio) closure_put(&w->c->journal.io); } -static void journal_write(struct closure *); +static void journal_write(struct closure *cl); static void journal_write_done(struct closure *cl) { @@ -609,11 +611,12 @@ static void journal_write_unlocked(struct closure *cl) struct cache *ca; struct journal_write *w = c->journal.cur; struct bkey *k = &c->journal.key; - unsigned i, sectors = set_blocks(w->data, block_bytes(c)) * + unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) * c->sb.block_size; struct bio *bio; struct bio_list list; + bio_list_init(&list); if (!w->need_write) { @@ -705,7 +708,7 @@ static void journal_try_write(struct cache_set *c) } static struct journal_write *journal_wait_for_write(struct cache_set *c, - unsigned nkeys) + unsigned int nkeys) __acquires(&c->journal.lock) { size_t sectors; @@ -828,6 +831,7 @@ void bch_journal_free(struct cache_set *c) free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); free_pages((unsigned long) c->journal.w[0].data, JSET_BITS); free_fifo(&c->journal.pin); + free_heap(&c->flush_btree); } int bch_journal_alloc(struct cache_set *c) diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index b5788199188f..66f0facff84b 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -110,7 +110,7 @@ struct journal { struct delayed_work work; /* Number of blocks free in the bucket(s) we're currently writing to */ - unsigned blocks_free; + unsigned int blocks_free; uint64_t seq; DECLARE_FIFO(atomic_t, pin); @@ -131,13 +131,13 @@ struct journal_device { uint64_t seq[SB_JOURNAL_BUCKETS]; /* Journal bucket we're currently writing to */ - unsigned cur_idx; + unsigned int cur_idx; /* Last journal bucket that still contains an open journal entry */ - unsigned last_idx; + unsigned int last_idx; /* Next journal bucket to be discarded */ - unsigned discard_idx; + unsigned int discard_idx; #define DISCARD_READY 0 #define DISCARD_IN_FLIGHT 1 @@ -167,14 +167,16 @@ struct cache_set; struct btree_op; struct keylist; -atomic_t *bch_journal(struct cache_set *, struct keylist *, struct closure *); -void bch_journal_next(struct journal *); -void bch_journal_mark(struct cache_set *, struct list_head *); -void bch_journal_meta(struct cache_set *, struct closure *); -int bch_journal_read(struct cache_set *, struct list_head *); -int bch_journal_replay(struct cache_set *, struct list_head *); - -void bch_journal_free(struct cache_set *); -int bch_journal_alloc(struct cache_set *); +atomic_t *bch_journal(struct cache_set *c, + struct keylist *keys, + struct closure *parent); +void bch_journal_next(struct journal *j); +void bch_journal_mark(struct cache_set *c, struct list_head *list); +void bch_journal_meta(struct cache_set *c, struct closure *cl); +int bch_journal_read(struct cache_set *c, struct list_head *list); +int bch_journal_replay(struct cache_set *c, struct list_head *list); + +void bch_journal_free(struct cache_set *c); +int bch_journal_alloc(struct cache_set *c); #endif /* _BCACHE_JOURNAL_H */ diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index a24c3a95b2c0..7891fb512736 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -23,7 +23,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) { struct cache_set *c = container_of(buf, struct cache_set, moving_gc_keys); - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i) && @@ -38,6 +38,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) static void moving_io_destructor(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); + kfree(io); } @@ -186,9 +187,10 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r) return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); } -static unsigned bucket_heap_top(struct cache *ca) +static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; + return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; } @@ -196,7 +198,7 @@ void bch_moving_gc(struct cache_set *c) { struct cache *ca; struct bucket *b; - unsigned i; + unsigned int i; if (!c->copy_gc_enabled) return; @@ -204,9 +206,9 @@ void bch_moving_gc(struct cache_set *c) mutex_lock(&c->bucket_lock); for_each_cache(ca, c, i) { - unsigned sectors_to_move = 0; - unsigned reserve_sectors = ca->sb.bucket_size * - fifo_used(&ca->free[RESERVE_MOVINGGC]); + unsigned int sectors_to_move = 0; + unsigned int reserve_sectors = ca->sb.bucket_size * + fifo_used(&ca->free[RESERVE_MOVINGGC]); ca->heap.used = 0; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ae67f5fa8047..51be355a3309 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -25,9 +25,9 @@ struct kmem_cache *bch_search_cache; -static void bch_data_insert_start(struct closure *); +static void bch_data_insert_start(struct closure *cl); -static unsigned cache_mode(struct cached_dev *dc) +static unsigned int cache_mode(struct cached_dev *dc) { return BDEV_CACHE_MODE(&dc->sb); } @@ -45,6 +45,7 @@ static void bio_csum(struct bio *bio, struct bkey *k) bio_for_each_segment(bv, bio, iter) { void *d = kmap(bv.bv_page) + bv.bv_offset; + csum = bch_crc64_update(csum, d, bv.bv_len); kunmap(bv.bv_page); } @@ -98,7 +99,7 @@ static void bch_data_insert_keys(struct closure *cl) closure_return(cl); } -static int bch_keylist_realloc(struct keylist *l, unsigned u64s, +static int bch_keylist_realloc(struct keylist *l, unsigned int u64s, struct cache_set *c) { size_t oldsize = bch_keylist_nkeys(l); @@ -107,7 +108,7 @@ static int bch_keylist_realloc(struct keylist *l, unsigned u64s, /* * The journalling code doesn't handle the case where the keys to insert * is bigger than an empty write: If we just return -ENOMEM here, - * bio_insert() and bio_invalidate() will insert the keys created so far + * bch_data_insert_keys() will insert the keys created so far * and finish the rest when the keylist is empty. */ if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset)) @@ -125,7 +126,7 @@ static void bch_data_invalidate(struct closure *cl) bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector); while (bio_sectors(bio)) { - unsigned sectors = min(bio_sectors(bio), + unsigned int sectors = min(bio_sectors(bio), 1U << (KEY_SIZE_BITS - 1)); if (bch_keylist_realloc(&op->insert_keys, 2, op->c)) @@ -135,7 +136,9 @@ static void bch_data_invalidate(struct closure *cl) bio->bi_iter.bi_size -= sectors << 9; bch_keylist_add(&op->insert_keys, - &KEY(op->inode, bio->bi_iter.bi_sector, sectors)); + &KEY(op->inode, + bio->bi_iter.bi_sector, + sectors)); } op->insert_data_done = true; @@ -151,7 +154,7 @@ static void bch_data_insert_error(struct closure *cl) /* * Our data write just errored, which means we've got a bunch of keys to - * insert that point to data that wasn't succesfully written. + * insert that point to data that wasn't successfully written. * * We don't have to insert those keys but we still have to invalidate * that region of the cache - so, if we just strip off all the pointers @@ -211,7 +214,7 @@ static void bch_data_insert_start(struct closure *cl) bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA); do { - unsigned i; + unsigned int i; struct bkey *k; struct bio_set *split = &op->c->bio_split; @@ -328,7 +331,7 @@ void bch_data_insert(struct closure *cl) /* Congested? */ -unsigned bch_get_congested(struct cache_set *c) +unsigned int bch_get_congested(struct cache_set *c) { int i; long rand; @@ -372,8 +375,8 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) { struct cache_set *c = dc->disk.c; - unsigned mode = cache_mode(dc); - unsigned sectors, congested = bch_get_congested(c); + unsigned int mode = cache_mode(dc); + unsigned int sectors, congested = bch_get_congested(c); struct task_struct *task = current; struct io *i; @@ -469,11 +472,11 @@ struct search { struct bio *cache_miss; struct bcache_device *d; - unsigned insert_bio_sectors; - unsigned recoverable:1; - unsigned write:1; - unsigned read_dirty_data:1; - unsigned cache_missed:1; + unsigned int insert_bio_sectors; + unsigned int recoverable:1; + unsigned int write:1; + unsigned int read_dirty_data:1; + unsigned int cache_missed:1; unsigned long start_time; @@ -514,20 +517,20 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) struct search *s = container_of(op, struct search, op); struct bio *n, *bio = &s->bio.bio; struct bkey *bio_key; - unsigned ptr; + unsigned int ptr; if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0) return MAP_CONTINUE; if (KEY_INODE(k) != s->iop.inode || KEY_START(k) > bio->bi_iter.bi_sector) { - unsigned bio_sectors = bio_sectors(bio); - unsigned sectors = KEY_INODE(k) == s->iop.inode + unsigned int bio_sectors = bio_sectors(bio); + unsigned int sectors = KEY_INODE(k) == s->iop.inode ? min_t(uint64_t, INT_MAX, KEY_START(k) - bio->bi_iter.bi_sector) : INT_MAX; - int ret = s->d->cache_miss(b, s, bio, sectors); + if (ret != MAP_CONTINUE) return ret; @@ -623,6 +626,7 @@ static void request_endio(struct bio *bio) if (bio->bi_status) { struct search *s = container_of(cl, struct search, cl); + s->iop.status = bio->bi_status; /* Only cache read errors are recoverable */ s->recoverable = false; @@ -667,8 +671,7 @@ static void backing_request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - generic_end_io_acct(s->d->disk->queue, - bio_data_dir(s->orig_bio), + generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio), &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); @@ -702,6 +705,8 @@ static void search_free(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); + atomic_dec(&s->d->c->search_inflight); + if (s->iop.bio) bio_put(s->iop.bio); @@ -719,6 +724,7 @@ static inline struct search *search_alloc(struct bio *bio, closure_init(&s->cl, NULL); do_bio_hook(s, bio, request_endio); + atomic_inc(&d->c->search_inflight); s->orig_bio = bio; s->cache_miss = NULL; @@ -811,7 +817,8 @@ static void cached_dev_read_done(struct closure *cl) if (s->iop.bio) { bio_reset(s->iop.bio); - s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector; + s->iop.bio->bi_iter.bi_sector = + s->cache_miss->bi_iter.bi_sector; bio_copy_dev(s->iop.bio, s->cache_miss); s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; bch_bio_map(s->iop.bio, NULL); @@ -854,10 +861,10 @@ static void cached_dev_read_done_bh(struct closure *cl) } static int cached_dev_cache_miss(struct btree *b, struct search *s, - struct bio *bio, unsigned sectors) + struct bio *bio, unsigned int sectors) { int ret = MAP_CONTINUE; - unsigned reada = 0; + unsigned int reada = 0; struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); struct bio *miss, *cache_bio; @@ -1062,8 +1069,7 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_end_io = ddip->bi_end_io; bio->bi_private = ddip->bi_private; - generic_end_io_acct(ddip->d->disk->queue, - bio_data_dir(bio), + generic_end_io_acct(ddip->d->disk->queue, bio_op(bio), &ddip->d->disk->part0, ddip->start_time); if (bio->bi_status) { @@ -1102,6 +1108,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) generic_make_request(bio); } +static void quit_max_writeback_rate(struct cache_set *c, + struct cached_dev *this_dc) +{ + int i; + struct bcache_device *d; + struct cached_dev *dc; + + /* + * mutex bch_register_lock may compete with other parallel requesters, + * or attach/detach operations on other backing device. Waiting to + * the mutex lock may increase I/O request latency for seconds or more. + * To avoid such situation, if mutext_trylock() failed, only writeback + * rate of current cached device is set to 1, and __update_write_back() + * will decide writeback rate of other cached devices (remember now + * c->idle_counter is 0 already). + */ + if (mutex_trylock(&bch_register_lock)) { + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + /* + * set writeback rate to default minimum value, + * then let update_writeback_rate() to decide the + * upcoming rate. + */ + atomic_long_set(&dc->writeback_rate.rate, 1); + } + mutex_unlock(&bch_register_lock); + } else + atomic_long_set(&this_dc->writeback_rate.rate, 1); +} + /* Cached devices - read & write stuff */ static blk_qc_t cached_dev_make_request(struct request_queue *q, @@ -1119,8 +1163,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - atomic_set(&dc->backing_idle, 0); - generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); + if (likely(d->c)) { + if (atomic_read(&d->c->idle_counter)) + atomic_set(&d->c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { + atomic_set(&d->c->at_max_writeback_rate, 0); + quit_max_writeback_rate(d->c, dc); + } + } + + generic_start_io_acct(q, + bio_op(bio), + bio_sectors(bio), + &d->disk->part0); bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; @@ -1156,6 +1217,7 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, unsigned int cmd, unsigned long arg) { struct cached_dev *dc = container_of(d, struct cached_dev, disk); + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); } @@ -1170,7 +1232,7 @@ static int cached_dev_congested(void *data, int bits) return 1; if (cached_dev_get(dc)) { - unsigned i; + unsigned int i; struct cache *ca; for_each_cache(ca, d->c, i) { @@ -1197,9 +1259,9 @@ void bch_cached_dev_request_init(struct cached_dev *dc) /* Flash backed devices */ static int flash_dev_cache_miss(struct btree *b, struct search *s, - struct bio *bio, unsigned sectors) + struct bio *bio, unsigned int sectors) { - unsigned bytes = min(sectors, bio_sectors(bio)) << 9; + unsigned int bytes = min(sectors, bio_sectors(bio)) << 9; swap(bio->bi_iter.bi_size, bytes); zero_fill_bio(bio); @@ -1229,7 +1291,6 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, struct search *s; struct closure *cl; struct bcache_device *d = bio->bi_disk->private_data; - int rw = bio_data_dir(bio); if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { bio->bi_status = BLK_STS_IOERR; @@ -1237,7 +1298,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); cl = &s->cl; @@ -1254,7 +1315,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, flash_dev_nodata, bcache_wq); return BLK_QC_T_NONE; - } else if (rw) { + } else if (bio_data_dir(bio)) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), &KEY(d->id, bio_end_sector(bio), 0)); @@ -1283,7 +1344,7 @@ static int flash_dev_congested(void *data, int bits) struct bcache_device *d = data; struct request_queue *q; struct cache *ca; - unsigned i; + unsigned int i; int ret = 0; for_each_cache(ca, d->c, i) { @@ -1306,8 +1367,7 @@ void bch_flash_dev_request_init(struct bcache_device *d) void bch_request_exit(void) { - if (bch_search_cache) - kmem_cache_destroy(bch_search_cache); + kmem_cache_destroy(bch_search_cache); } int __init bch_request_init(void) diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index dea0886b81c1..aa055cfeb099 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -8,7 +8,7 @@ struct data_insert_op { struct bio *bio; struct workqueue_struct *wq; - unsigned inode; + unsigned int inode; uint16_t write_point; uint16_t write_prio; blk_status_t status; @@ -17,15 +17,15 @@ struct data_insert_op { uint16_t flags; struct { - unsigned bypass:1; - unsigned writeback:1; - unsigned flush_journal:1; - unsigned csum:1; + unsigned int bypass:1; + unsigned int writeback:1; + unsigned int flush_journal:1; + unsigned int csum:1; - unsigned replace:1; - unsigned replace_collision:1; + unsigned int replace:1; + unsigned int replace_collision:1; - unsigned insert_data_done:1; + unsigned int insert_data_done:1; }; }; @@ -33,7 +33,7 @@ struct data_insert_op { BKEY_PADDED(replace_key); }; -unsigned bch_get_congested(struct cache_set *); +unsigned int bch_get_congested(struct cache_set *c); void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index be119326297b..894410f3f829 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -33,11 +33,11 @@ * stored left shifted by 16, and scaled back in the sysfs show() function. */ -static const unsigned DAY_RESCALE = 288; -static const unsigned HOUR_RESCALE = 12; -static const unsigned FIVE_MINUTE_RESCALE = 1; -static const unsigned accounting_delay = (HZ * 300) / 22; -static const unsigned accounting_weight = 32; +static const unsigned int DAY_RESCALE = 288; +static const unsigned int HOUR_RESCALE = 12; +static const unsigned int FIVE_MINUTE_RESCALE = 1; +static const unsigned int accounting_delay = (HZ * 300) / 22; +static const unsigned int accounting_weight = 32; /* sysfs reading/writing */ @@ -152,7 +152,7 @@ static void scale_accounting(struct timer_list *t) struct cache_accounting *acc = from_timer(acc, t, timer); #define move_stat(name) do { \ - unsigned t = atomic_xchg(&acc->collector.name, 0); \ + unsigned int t = atomic_xchg(&acc->collector.name, 0); \ t <<= 16; \ acc->five_minute.name += t; \ acc->hour.name += t; \ @@ -200,6 +200,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, bool hit, bool bypass) { struct cached_dev *dc = container_of(d, struct cached_dev, disk); + mark_cache_stats(&dc->accounting.collector, hit, bypass); mark_cache_stats(&c->accounting.collector, hit, bypass); } @@ -207,6 +208,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d) { struct cached_dev *dc = container_of(d, struct cached_dev, disk); + atomic_inc(&dc->accounting.collector.cache_readaheads); atomic_inc(&c->accounting.collector.cache_readaheads); } @@ -214,6 +216,7 @@ void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d) void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d) { struct cached_dev *dc = container_of(d, struct cached_dev, disk); + atomic_inc(&dc->accounting.collector.cache_miss_collisions); atomic_inc(&c->accounting.collector.cache_miss_collisions); } diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h index 0b70f9de0c03..abfaabf7e7fc 100644 --- a/drivers/md/bcache/stats.h +++ b/drivers/md/bcache/stats.h @@ -23,7 +23,7 @@ struct cache_stats { unsigned long cache_miss_collisions; unsigned long sectors_bypassed; - unsigned rescale; + unsigned int rescale; }; struct cache_accounting { @@ -53,10 +53,13 @@ void bch_cache_accounting_clear(struct cache_accounting *acc); void bch_cache_accounting_destroy(struct cache_accounting *acc); -void bch_mark_cache_accounting(struct cache_set *, struct bcache_device *, - bool, bool); -void bch_mark_cache_readahead(struct cache_set *, struct bcache_device *); -void bch_mark_cache_miss_collision(struct cache_set *, struct bcache_device *); -void bch_mark_sectors_bypassed(struct cache_set *, struct cached_dev *, int); +void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, + bool hit, bool bypass); +void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d); +void bch_mark_cache_miss_collision(struct cache_set *c, + struct bcache_device *d); +void bch_mark_sectors_bypassed(struct cache_set *c, + struct cached_dev *dc, + int sectors); #endif /* _BCACHE_STATS_H_ */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fa4058e43202..94c756c66bd7 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bcache setup/teardown code, and some metadata io - read a superblock and * figure out what to do with it. @@ -61,7 +62,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, const char *err; struct cache_sb *s; struct buffer_head *bh = __bread(bdev, 1, SB_SIZE); - unsigned i; + unsigned int i; if (!bh) return "IO error"; @@ -149,7 +150,8 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto err; err = "Invalid superblock: device too small"; - if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) + if (get_capacity(bdev->bd_disk) < + sb->bucket_size * sb->nbuckets) goto err; err = "Bad UUID"; @@ -181,7 +183,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto err; } - sb->last_mount = get_seconds(); + sb->last_mount = (u32)ktime_get_real_seconds(); err = NULL; get_page(bh->b_page); @@ -202,7 +204,7 @@ static void write_bdev_super_endio(struct bio *bio) static void __write_super(struct cache_sb *sb, struct bio *bio) { struct cache_sb *out = page_address(bio_first_page_all(bio)); - unsigned i; + unsigned int i; bio->bi_iter.bi_sector = SB_SECTOR; bio->bi_iter.bi_size = SB_SIZE; @@ -282,7 +284,7 @@ void bcache_write_super(struct cache_set *c) { struct closure *cl = &c->sb_write; struct cache *ca; - unsigned i; + unsigned int i; down(&c->sb_write_mutex); closure_init(cl, &c->cl); @@ -334,7 +336,7 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, { struct closure *cl = &c->uuid_write; struct uuid_entry *u; - unsigned i; + unsigned int i; char buf[80]; BUG_ON(!parent); @@ -415,8 +417,8 @@ static int __uuid_write(struct cache_set *c) { BKEY_PADDED(key) k; struct closure cl; - closure_init_stack(&cl); + closure_init_stack(&cl); lockdep_assert_held(&bch_register_lock); if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true)) @@ -456,6 +458,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + return uuid_find(c, zero_uuid); } @@ -463,8 +466,8 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c) * Bucket priorities/gens: * * For each bucket, we store on disk its - * 8 bit gen - * 16 bit priority + * 8 bit gen + * 16 bit priority * * See alloc.c for an explanation of the gen. The priority is used to implement * lru (and in the future other) cache replacement policies; for most purposes @@ -587,7 +590,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) struct prio_set *p = ca->disk_buckets; struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; struct bucket *b; - unsigned bucket_nr = 0; + unsigned int bucket_nr = 0; for (b = ca->buckets; b < ca->buckets + ca->sb.nbuckets; @@ -599,7 +602,8 @@ static void prio_read(struct cache *ca, uint64_t bucket) prio_io(ca, bucket, REQ_OP_READ, 0); - if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) + if (p->csum != + bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); if (p->magic != pset_magic(&ca->sb)) @@ -619,6 +623,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) static int open_dev(struct block_device *b, fmode_t mode) { struct bcache_device *d = b->bd_disk->private_data; + if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) return -ENXIO; @@ -629,6 +634,7 @@ static int open_dev(struct block_device *b, fmode_t mode) static void release_dev(struct gendisk *b, fmode_t mode) { struct bcache_device *d = b->private_data; + closure_put(&d->cl); } @@ -662,7 +668,7 @@ static void bcache_device_unlink(struct bcache_device *d) lockdep_assert_held(&bch_register_lock); if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { - unsigned i; + unsigned int i; struct cache *ca; sysfs_remove_link(&d->c->kobj, d->name); @@ -676,7 +682,7 @@ static void bcache_device_unlink(struct bcache_device *d) static void bcache_device_link(struct bcache_device *d, struct cache_set *c, const char *name) { - unsigned i; + unsigned int i; struct cache *ca; for_each_cache(ca, d->c, i) @@ -696,12 +702,14 @@ static void bcache_device_detach(struct bcache_device *d) { lockdep_assert_held(&bch_register_lock); + atomic_dec(&d->c->attached_dev_nr); + if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { struct uuid_entry *u = d->c->uuids + d->id; SET_UUID_FLASH_ONLY(u, 0); memcpy(u->uuid, invalid_uuid, 16); - u->invalidated = cpu_to_le32(get_seconds()); + u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); bch_uuid_write(d->c); } @@ -713,7 +721,7 @@ static void bcache_device_detach(struct bcache_device *d) } static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, - unsigned id) + unsigned int id) { d->id = id; d->c = c; @@ -760,7 +768,7 @@ static void bcache_device_free(struct bcache_device *d) closure_debug_destroy(&d->cl); } -static int bcache_device_init(struct bcache_device *d, unsigned block_size, +static int bcache_device_init(struct bcache_device *d, unsigned int block_size, sector_t sectors) { struct request_queue *q; @@ -776,7 +784,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, if (!d->nr_stripes || d->nr_stripes > max_stripes) { pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", - (unsigned)d->nr_stripes); + (unsigned int)d->nr_stripes); return -ENOMEM; } @@ -796,11 +804,12 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, return idx; if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), - BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) || - !(d->disk = alloc_disk(BCACHE_MINORS))) { - ida_simple_remove(&bcache_device_idx, idx); - return -ENOMEM; - } + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) + goto err; + + d->disk = alloc_disk(BCACHE_MINORS); + if (!d->disk) + goto err; set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); @@ -834,6 +843,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, blk_queue_write_cache(q, true, true); return 0; + +err: + ida_simple_remove(&bcache_device_idx, idx); + return -ENOMEM; + } /* Cached device */ @@ -911,6 +925,7 @@ void bch_cached_dev_run(struct cached_dev *dc) if (!d->c && BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { struct closure cl; + closure_init_stack(&cl); SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); @@ -920,8 +935,10 @@ void bch_cached_dev_run(struct cached_dev *dc) add_disk(d->disk); bd_link_disk_holder(dc->bdev, dc->disk.disk); - /* won't show up in the uevent file, use udevadm monitor -e instead - * only class / kset properties are persistent */ + /* + * won't show up in the uevent file, use udevadm monitor -e instead + * only class / kset properties are persistent + */ kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); kfree(env[1]); kfree(env[2]); @@ -968,6 +985,7 @@ static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); struct closure cl; + closure_init_stack(&cl); BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); @@ -1027,7 +1045,7 @@ void bch_cached_dev_detach(struct cached_dev *dc) int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, uint8_t *set_uuid) { - uint32_t rtime = cpu_to_le32(get_seconds()); + uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds()); struct uuid_entry *u; struct cached_dev *exist_dc, *t; @@ -1070,7 +1088,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { memcpy(u->uuid, invalid_uuid, 16); - u->invalidated = cpu_to_le32(get_seconds()); + u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); u = NULL; } @@ -1089,12 +1107,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, } } - /* Deadlocks since we're called via sysfs... - sysfs_remove_file(&dc->kobj, &sysfs_attach); + /* + * Deadlocks since we're called via sysfs... + * sysfs_remove_file(&dc->kobj, &sysfs_attach); */ if (bch_is_zero(u->uuid, 16)) { struct closure cl; + closure_init_stack(&cl); memcpy(u->uuid, dc->sb.uuid, 16); @@ -1116,11 +1136,11 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, list_move(&dc->list, &c->cached_devs); calc_cached_dev_sectors(c); - smp_wmb(); /* * dc->c must be set before dc->count != 0 - paired with the mb in * cached_dev_get() */ + smp_wmb(); refcount_set(&dc->count, 1); /* Block writeback thread, but spawn it */ @@ -1138,6 +1158,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + atomic_inc(&c->attached_dev_nr); /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1203,7 +1224,7 @@ static void cached_dev_flush(struct closure *cl) continue_at(cl, cached_dev_free, system_wq); } -static int cached_dev_init(struct cached_dev *dc, unsigned block_size) +static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) { int ret; struct io *io; @@ -1285,6 +1306,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, pr_info("registered backing device %s", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); + /* attach to a matched cache set if it exists */ list_for_each_entry(c, &bch_cache_sets, list) bch_cached_dev_attach(dc, c, NULL); @@ -1310,7 +1332,10 @@ void bch_flash_dev_release(struct kobject *kobj) static void flash_dev_free(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); + mutex_lock(&bch_register_lock); + atomic_long_sub(bcache_dev_sectors_dirty(d), + &d->c->flash_dev_dirty_sectors); bcache_device_free(d); mutex_unlock(&bch_register_lock); kobject_put(&d->kobj); @@ -1390,7 +1415,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) get_random_bytes(u->uuid, 16); memset(u->label, 0, 32); - u->first_reg = u->last_reg = cpu_to_le32(get_seconds()); + u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds()); SET_UUID_FLASH_ONLY(u, 1); u->sectors = size >> 9; @@ -1447,17 +1472,18 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) pr_info("CACHE_SET_IO_DISABLE already set"); - /* XXX: we can be called from atomic context - acquire_console_sem(); - */ + /* + * XXX: we can be called from atomic context + * acquire_console_sem(); + */ - printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid); + pr_err("bcache: error on %pU: ", c->sb.set_uuid); va_start(args, fmt); vprintk(fmt, args); va_end(args); - printk(", disabling caching\n"); + pr_err(", disabling caching\n"); if (c->on_error == ON_ERROR_PANIC) panic("panic forced after error\n"); @@ -1469,6 +1495,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) void bch_cache_set_release(struct kobject *kobj) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); + kfree(c); module_put(THIS_MODULE); } @@ -1477,7 +1504,7 @@ static void cache_set_free(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, cl); struct cache *ca; - unsigned i; + unsigned int i; if (!IS_ERR_OR_NULL(c->debug)) debugfs_remove(c->debug); @@ -1520,7 +1547,7 @@ static void cache_set_flush(struct closure *cl) struct cache_set *c = container_of(cl, struct cache_set, caching); struct cache *ca; struct btree *b; - unsigned i; + unsigned int i; bch_cache_accounting_destroy(&c->accounting); @@ -1659,6 +1686,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) { int iter_size; struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + if (!c) return NULL; @@ -1687,6 +1715,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->block_bits = ilog2(sb->block_size); c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); c->devices_max_used = 0; + atomic_set(&c->attached_dev_nr, 0); c->btree_pages = bucket_pages(c); if (c->btree_pages > BTREE_MAX_PAGES) c->btree_pages = max_t(int, c->btree_pages / 4, @@ -1718,8 +1747,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) || mempool_init_slab_pool(&c->search, 32, bch_search_cache) || mempool_init_kmalloc_pool(&c->bio_meta, 2, - sizeof(struct bbio) + sizeof(struct bio_vec) * - bucket_pages(c)) || + sizeof(struct bbio) + sizeof(struct bio_vec) * + bucket_pages(c)) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) || @@ -1749,7 +1778,7 @@ static void run_cache_set(struct cache_set *c) struct cached_dev *dc, *t; struct cache *ca; struct closure cl; - unsigned i; + unsigned int i; closure_init_stack(&cl); @@ -1791,7 +1820,9 @@ static void run_cache_set(struct cache_set *c) goto err; err = "error reading btree root"; - c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); + c->root = bch_btree_node_get(c, NULL, k, + j->btree_level, + true, NULL); if (IS_ERR_OR_NULL(c->root)) goto err; @@ -1840,7 +1871,7 @@ static void run_cache_set(struct cache_set *c) pr_notice("invalidating existing data"); for_each_cache(ca, c, i) { - unsigned j; + unsigned int j; ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, 2, SB_JOURNAL_BUCKETS); @@ -1894,7 +1925,7 @@ static void run_cache_set(struct cache_set *c) goto err; closure_sync(&cl); - c->sb.last_mount = get_seconds(); + c->sb.last_mount = (u32)ktime_get_real_seconds(); bcache_write_super(c); list_for_each_entry_safe(dc, t, &uncached_devices, list) @@ -1985,7 +2016,7 @@ err: void bch_cache_release(struct kobject *kobj) { struct cache *ca = container_of(kobj, struct cache, kobj); - unsigned i; + unsigned int i; if (ca->set) { BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca); @@ -2085,7 +2116,9 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, goto err; } - if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { + if (kobject_add(&ca->kobj, + &part_to_dev(bdev->bd_part)->kobj, + "bcache")) { err = "error calling kobject_add"; ret = -ENOMEM; goto out; @@ -2114,13 +2147,14 @@ err: /* Global interfaces/init */ -static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, - const char *, size_t); +static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + const char *buffer, size_t size); kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); -static bool bch_is_open_backing(struct block_device *bdev) { +static bool bch_is_open_backing(struct block_device *bdev) +{ struct cache_set *c, *tc; struct cached_dev *dc, *t; @@ -2134,10 +2168,11 @@ static bool bch_is_open_backing(struct block_device *bdev) { return false; } -static bool bch_is_open_cache(struct block_device *bdev) { +static bool bch_is_open_cache(struct block_device *bdev) +{ struct cache_set *c, *tc; struct cache *ca; - unsigned i; + unsigned int i; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) for_each_cache(ca, c, i) @@ -2146,7 +2181,8 @@ static bool bch_is_open_cache(struct block_device *bdev) { return false; } -static bool bch_is_open(struct block_device *bdev) { +static bool bch_is_open(struct block_device *bdev) +{ return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); } @@ -2163,8 +2199,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!try_module_get(THIS_MODULE)) return -EBUSY; - if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || - !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) + path = kstrndup(buffer, size, GFP_KERNEL); + if (!path) + goto err; + + sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL); + if (!sb) goto err; err = "failed to open device"; @@ -2199,6 +2239,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, err = "failed to register device"; if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); + if (!dc) goto err_close; @@ -2207,6 +2248,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, mutex_unlock(&bch_register_lock); } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) goto err_close; @@ -2324,13 +2366,21 @@ static int __init bcache_init(void) return bcache_major; } - if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || - !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || - bch_request_init() || - bch_debug_init(bcache_kobj) || closure_debug_init() || + bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0); + if (!bcache_wq) + goto err; + + bcache_kobj = kobject_create_and_add("bcache", fs_kobj); + if (!bcache_kobj) + goto err; + + if (bch_request_init() || sysfs_create_files(bcache_kobj, files)) goto err; + bch_debug_init(bcache_kobj); + closure_debug_init(); + return 0; err: bcache_exit(); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 225b15aa0340..150cf4f4cf74 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -130,8 +130,10 @@ rw_attribute(btree_shrinker_disabled); rw_attribute(copy_gc_enabled); rw_attribute(size); -static ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], - size_t selected) +static ssize_t bch_snprint_string_list(char *buf, + size_t size, + const char * const list[], + size_t selected) { char *out = buf; size_t i; @@ -148,7 +150,8 @@ SHOW(__bch_cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - const char *states[] = { "no cache", "clean", "dirty", "inconsistent" }; + char const *states[] = { "no cache", "clean", "dirty", "inconsistent" }; + int wb = dc->writeback_running; #define var(stat) (dc->stat) @@ -170,7 +173,8 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); + sysfs_hprint(writeback_rate, + wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0); sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); sysfs_printf(io_error_limit, "%i", dc->error_limit); sysfs_printf(io_disable, "%i", dc->io_disable); @@ -188,15 +192,22 @@ SHOW(__bch_cached_dev) char change[20]; s64 next_io; - bch_hprint(rate, dc->writeback_rate.rate << 9); - bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); - bch_hprint(target, dc->writeback_rate_target << 9); - bch_hprint(proportional,dc->writeback_rate_proportional << 9); - bch_hprint(integral, dc->writeback_rate_integral_scaled << 9); - bch_hprint(change, dc->writeback_rate_change << 9); - - next_io = div64_s64(dc->writeback_rate.next - local_clock(), - NSEC_PER_MSEC); + /* + * Except for dirty and target, other values should + * be 0 if writeback is not running. + */ + bch_hprint(rate, + wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 + : 0); + bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); + bch_hprint(target, dc->writeback_rate_target << 9); + bch_hprint(proportional, + wb ? dc->writeback_rate_proportional << 9 : 0); + bch_hprint(integral, + wb ? dc->writeback_rate_integral_scaled << 9 : 0); + bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0); + next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(), + NSEC_PER_MSEC) : 0; return sprintf(buf, "rate:\t\t%s/sec\n" @@ -255,8 +266,19 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, INT_MAX); + if (attr == &sysfs_writeback_rate) { + ssize_t ret; + long int v = atomic_long_read(&dc->writeback_rate.rate); + + ret = strtoul_safe_clamp(buf, v, 1, INT_MAX); + + if (!ret) { + atomic_long_set(&dc->writeback_rate.rate, v); + ret = size; + } + + return ret; + } sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, @@ -287,7 +309,7 @@ STORE(__cached_dev) if (v < 0) return v; - if ((unsigned) v != BDEV_CACHE_MODE(&dc->sb)) { + if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) { SET_BDEV_CACHE_MODE(&dc->sb, v); bch_write_bdev_super(dc, NULL); } @@ -321,8 +343,9 @@ STORE(__cached_dev) add_uevent_var(env, "DRIVER=bcache"); add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), add_uevent_var(env, "CACHED_LABEL=%s", buf); - kobject_uevent_env( - &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp); + kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj, + KOBJ_CHANGE, + env->envp); kfree(env); } @@ -338,8 +361,8 @@ STORE(__cached_dev) if (!v) return size; } - - pr_err("Can't attach %s: cache set not found", buf); + if (v == -ENOENT) + pr_err("Can't attach %s: cache set not found", buf); return v; } @@ -439,6 +462,7 @@ STORE(__bch_flash_dev) if (attr == &sysfs_size) { uint64_t v; + strtoi_h_or_return(buf, v); u->sectors = v >> 9; @@ -513,9 +537,9 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf) op.stats.floats, op.stats.failed); } -static unsigned bch_root_usage(struct cache_set *c) +static unsigned int bch_root_usage(struct cache_set *c) { - unsigned bytes = 0; + unsigned int bytes = 0; struct bkey *k; struct btree *b; struct btree_iter iter; @@ -550,9 +574,9 @@ static size_t bch_cache_size(struct cache_set *c) return ret; } -static unsigned bch_cache_max_chain(struct cache_set *c) +static unsigned int bch_cache_max_chain(struct cache_set *c) { - unsigned ret = 0; + unsigned int ret = 0; struct hlist_head *h; mutex_lock(&c->bucket_lock); @@ -560,7 +584,7 @@ static unsigned bch_cache_max_chain(struct cache_set *c) for (h = c->bucket_hash; h < c->bucket_hash + (1 << BUCKET_HASH_BITS); h++) { - unsigned i = 0; + unsigned int i = 0; struct hlist_node *p; hlist_for_each(p, h) @@ -573,13 +597,13 @@ static unsigned bch_cache_max_chain(struct cache_set *c) return ret; } -static unsigned bch_btree_used(struct cache_set *c) +static unsigned int bch_btree_used(struct cache_set *c) { return div64_u64(c->gc_stats.key_bytes * 100, (c->gc_stats.nodes ?: 1) * btree_bytes(c)); } -static unsigned bch_average_key_size(struct cache_set *c) +static unsigned int bch_average_key_size(struct cache_set *c) { return c->gc_stats.nkeys ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys) @@ -683,6 +707,7 @@ STORE(__bch_cache_set) if (attr == &sysfs_flash_vol_create) { int r; uint64_t v; + strtoi_h_or_return(buf, v); r = bch_flash_dev_create(c, v); @@ -716,6 +741,7 @@ STORE(__bch_cache_set) if (attr == &sysfs_prune_cache) { struct shrink_control sc; + sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); c->shrink.scan_objects(&c->shrink, &sc); @@ -769,12 +795,14 @@ STORE_LOCKED(bch_cache_set) SHOW(bch_cache_set_internal) { struct cache_set *c = container_of(kobj, struct cache_set, internal); + return bch_cache_set_show(&c->kobj, attr, buf); } STORE(bch_cache_set_internal) { struct cache_set *c = container_of(kobj, struct cache_set, internal); + return bch_cache_set_store(&c->kobj, attr, buf, size); } @@ -976,7 +1004,7 @@ STORE(__bch_cache) if (v < 0) return v; - if ((unsigned) v != CACHE_REPLACEMENT(&ca->sb)) { + if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) { mutex_lock(&ca->set->bucket_lock); SET_CACHE_REPLACEMENT(&ca->sb, v); mutex_unlock(&ca->set->bucket_lock); diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index b54fe9602529..3fe82425859c 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -44,9 +44,9 @@ STORE(fn) \ static struct attribute sysfs_##_name = \ { .name = #_name, .mode = _mode } -#define write_attribute(n) __sysfs_attribute(n, S_IWUSR) -#define read_attribute(n) __sysfs_attribute(n, S_IRUGO) -#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR) +#define write_attribute(n) __sysfs_attribute(n, 0200) +#define read_attribute(n) __sysfs_attribute(n, 0444) +#define rw_attribute(n) __sysfs_attribute(n, 0644) #define sysfs_printf(file, fmt, ...) \ do { \ diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index fc479b026d6d..20eddeac1531 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * random utiility code, for bcache but in theory not specific to bcache * @@ -133,6 +134,7 @@ bool bch_is_zero(const char *p, size_t n) int bch_parse_uuid(const char *s, char *uuid) { size_t i, j, x; + memset(uuid, 0, 16); for (i = 0, j = 0; @@ -200,7 +202,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done * NSEC_PER_SEC, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate)); /* Bound the time. Don't let us fall further than 2 seconds behind * (this prevents unnecessary backlog that would make it impossible @@ -279,134 +281,3 @@ int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) return 0; } - -/* - * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any - * use permitted, subject to terms of PostgreSQL license; see.) - - * If we have a 64-bit integer type, then a 64-bit CRC looks just like the - * usual sort of implementation. (See Ross Williams' excellent introduction - * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from - * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) - * If we have no working 64-bit type, then fake it with two 32-bit registers. - * - * The present implementation is a normal (not "reflected", in Williams' - * terms) 64-bit CRC, using initial all-ones register contents and a final - * bit inversion. The chosen polynomial is borrowed from the DLT1 spec - * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): - * - * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + - * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + - * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + - * x^7 + x^4 + x + 1 -*/ - -static const uint64_t crc_table[256] = { - 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL, - 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL, - 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL, - 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL, - 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL, - 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL, - 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL, - 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL, - 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL, - 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL, - 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL, - 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL, - 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL, - 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL, - 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL, - 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL, - 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL, - 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL, - 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL, - 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL, - 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL, - 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL, - 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL, - 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL, - 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL, - 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL, - 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL, - 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL, - 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL, - 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL, - 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL, - 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL, - 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL, - 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL, - 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL, - 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL, - 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL, - 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL, - 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL, - 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL, - 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL, - 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL, - 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL, - 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL, - 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL, - 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL, - 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL, - 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL, - 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL, - 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL, - 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL, - 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL, - 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL, - 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL, - 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL, - 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL, - 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL, - 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL, - 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL, - 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL, - 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL, - 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL, - 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL, - 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL, - 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL, - 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL, - 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL, - 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL, - 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL, - 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL, - 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL, - 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL, - 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL, - 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL, - 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL, - 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL, - 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL, - 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL, - 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL, - 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL, - 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL, - 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL, - 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL, - 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL, - 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL, - 0x9AFCE626CE85B507ULL, -}; - -uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len) -{ - const unsigned char *data = _data; - - while (len--) { - int i = ((int) (crc >> 56) ^ *data++) & 0xFF; - crc = crc_table[i] ^ (crc << 8); - } - - return crc; -} - -uint64_t bch_crc64(const void *data, size_t len) -{ - uint64_t crc = 0xffffffffffffffffULL; - - crc = bch_crc64_update(crc, data, len); - - return crc ^ 0xffffffffffffffffULL; -} diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cced87f8eb27..00aab6abcfe4 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -11,6 +11,7 @@ #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> +#include <linux/crc64.h> #include "closure.h" @@ -288,10 +289,10 @@ do { \ #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) -int bch_strtoint_h(const char *, int *); -int bch_strtouint_h(const char *, unsigned int *); -int bch_strtoll_h(const char *, long long *); -int bch_strtoull_h(const char *, unsigned long long *); +int bch_strtoint_h(const char *cp, int *res); +int bch_strtouint_h(const char *cp, unsigned int *res); +int bch_strtoll_h(const char *cp, long long *res); +int bch_strtoull_h(const char *cp, unsigned long long *res); static inline int bch_strtol_h(const char *cp, long *res) { @@ -347,7 +348,7 @@ static inline int bch_strtoul_h(const char *cp, long *res) snprintf(buf, size, \ __builtin_types_compatible_p(typeof(var), int) \ ? "%i\n" : \ - __builtin_types_compatible_p(typeof(var), unsigned) \ + __builtin_types_compatible_p(typeof(var), unsigned int) \ ? "%u\n" : \ __builtin_types_compatible_p(typeof(var), long) \ ? "%li\n" : \ @@ -379,7 +380,7 @@ struct time_stats { void bch_time_stats_update(struct time_stats *stats, uint64_t time); -static inline unsigned local_clock_us(void) +static inline unsigned int local_clock_us(void) { return local_clock() >> 10; } @@ -402,7 +403,8 @@ do { \ __print_time_stat(stats, name, \ average_duration, duration_units); \ sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \ - div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\ + div_u64((stats)->max_duration, \ + NSEC_PER_ ## duration_units)); \ \ sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ ? div_s64(local_clock() - (stats)->last, \ @@ -442,7 +444,7 @@ struct bch_ratelimit { * Rate at which we want to do work, in units per second * The units here correspond to the units passed to bch_next_delay() */ - uint32_t rate; + atomic_long_t rate; }; static inline void bch_ratelimit_reset(struct bch_ratelimit *d) @@ -542,10 +544,27 @@ dup: \ #define RB_PREV(ptr, member) \ container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member) +static inline uint64_t bch_crc64(const void *p, size_t len) +{ + uint64_t crc = 0xffffffffffffffffULL; + + crc = crc64_be(crc, p, len); + return crc ^ 0xffffffffffffffffULL; +} + +static inline uint64_t bch_crc64_update(uint64_t crc, + const void *p, + size_t len) +{ + crc = crc64_be(crc, p, len); + return crc; +} + /* Does linear interpolation between powers of two */ -static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) +static inline unsigned int fract_exp_two(unsigned int x, + unsigned int fract_bits) { - unsigned fract = x & ~(~0 << fract_bits); + unsigned int fract = x & ~(~0 << fract_bits); x >>= fract_bits; x = 1 << x; @@ -561,8 +580,4 @@ static inline sector_t bdev_sectors(struct block_device *bdev) { return bdev->bd_inode->i_size >> 9; } - -uint64_t bch_crc64_update(uint64_t, const void *, size_t); -uint64_t bch_crc64(const void *, size_t); - #endif /* _BCACHE_UTIL_H */ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index ad45ebe1a74b..6be05bd7ca67 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -27,7 +27,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) * flash-only devices */ uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - - bcache_flash_devs_sectors_dirty(c); + atomic_long_read(&c->flash_dev_dirty_sectors); /* * Unfortunately there is no control of global dirty data. If the @@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_proportional = proportional_scaled; dc->writeback_rate_integral_scaled = integral_scaled; - dc->writeback_rate_change = new_rate - dc->writeback_rate.rate; - dc->writeback_rate.rate = new_rate; + dc->writeback_rate_change = new_rate - + atomic_long_read(&dc->writeback_rate.rate); + atomic_long_set(&dc->writeback_rate.rate, new_rate); dc->writeback_rate_target = target; } +static bool set_at_max_writeback_rate(struct cache_set *c, + struct cached_dev *dc) +{ + /* + * Idle_counter is increased everytime when update_writeback_rate() is + * called. If all backing devices attached to the same cache set have + * identical dc->writeback_rate_update_seconds values, it is about 6 + * rounds of update_writeback_rate() on each backing device before + * c->at_max_writeback_rate is set to 1, and then max wrteback rate set + * to each dc->writeback_rate.rate. + * In order to avoid extra locking cost for counting exact dirty cached + * devices number, c->attached_dev_nr is used to calculate the idle + * throushold. It might be bigger if not all cached device are in write- + * back mode, but it still works well with limited extra rounds of + * update_writeback_rate(). + */ + if (atomic_inc_return(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) + return false; + + if (atomic_read(&c->at_max_writeback_rate) != 1) + atomic_set(&c->at_max_writeback_rate, 1); + + atomic_long_set(&dc->writeback_rate.rate, INT_MAX); + + /* keep writeback_rate_target as existing value */ + dc->writeback_rate_proportional = 0; + dc->writeback_rate_integral_scaled = 0; + dc->writeback_rate_change = 0; + + /* + * Check c->idle_counter and c->at_max_writeback_rate agagain in case + * new I/O arrives during before set_at_max_writeback_rate() returns. + * Then the writeback rate is set to 1, and its new value should be + * decided via __update_writeback_rate(). + */ + if ((atomic_read(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) || + !atomic_read(&c->at_max_writeback_rate)) + return false; + + return true; +} + static void update_writeback_rate(struct work_struct *work) { struct cached_dev *dc = container_of(to_delayed_work(work), @@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work) return; } - down_read(&dc->writeback_lock); - - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); + if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { + /* + * If the whole cache set is idle, set_at_max_writeback_rate() + * will set writeback rate to a max number. Then it is + * unncessary to update writeback rate for an idle cache set + * in maximum writeback rate number(s). + */ + if (!set_at_max_writeback_rate(c, dc)) { + down_read(&dc->writeback_lock); + __update_writeback_rate(dc); + up_read(&dc->writeback_lock); + } + } - up_read(&dc->writeback_lock); /* * CACHE_SET_IO_DISABLE might be set via sysfs interface, @@ -163,7 +215,8 @@ static void update_writeback_rate(struct work_struct *work) smp_mb(); } -static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) +static unsigned int writeback_delay(struct cached_dev *dc, + unsigned int sectors) { if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || !dc->writeback_percent) @@ -197,6 +250,7 @@ static void dirty_init(struct keybuf_key *w) static void dirty_io_destructor(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); + kfree(io); } @@ -211,7 +265,7 @@ static void write_dirty_finish(struct closure *cl) /* This is kind of a dumb way of signalling errors. */ if (KEY_DIRTY(&w->key)) { int ret; - unsigned i; + unsigned int i; struct keylist keys; bch_keylist_init(&keys); @@ -325,7 +379,7 @@ static void read_dirty_submit(struct closure *cl) static void read_dirty(struct cached_dev *dc) { - unsigned delay = 0; + unsigned int delay = 0; struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w; size_t size; int nk, i; @@ -390,7 +444,8 @@ static void read_dirty(struct cached_dev *dc) io = kzalloc(sizeof(struct dirty_io) + sizeof(struct bio_vec) * - DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), + DIV_ROUND_UP(KEY_SIZE(&w->key), + PAGE_SECTORS), GFP_KERNEL); if (!io) goto err; @@ -413,7 +468,8 @@ static void read_dirty(struct cached_dev *dc) down(&dc->in_flight); - /* We've acquired a semaphore for the maximum + /* + * We've acquired a semaphore for the maximum * simultaneous number of writebacks; from here * everything happens asynchronously. */ @@ -422,27 +478,6 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); - /* If the control system would wait for at least half a - * second, and there's been no reqs hitting the backing disk - * for awhile: use an alternate mode where we have at most - * one contiguous set of writebacks in flight at a time. If - * someone wants to do IO it will be quick, as it will only - * have to contend with one operation in flight, and we'll - * be round-tripping data to the backing disk as quickly as - * it can accept it. - */ - if (delay >= HZ / 2) { - /* 3 means at least 1.5 seconds, up to 7.5 if we - * have slowed way down. - */ - if (atomic_inc_return(&dc->backing_idle) >= 3) { - /* Wait for current I/Os to finish */ - closure_sync(&cl); - /* And immediately launch a new set. */ - delay = 0; - } - } - while (!kthread_should_stop() && !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && delay) { @@ -467,20 +502,23 @@ err: /* Scan for dirty data */ -void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, +void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, uint64_t offset, int nr_sectors) { struct bcache_device *d = c->devices[inode]; - unsigned stripe_offset, stripe, sectors_dirty; + unsigned int stripe_offset, stripe, sectors_dirty; if (!d) return; + if (UUID_FLASH_ONLY(&c->uuids[inode])) + atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors); + stripe = offset_to_stripe(d, offset); stripe_offset = offset & (d->stripe_size - 1); while (nr_sectors) { - int s = min_t(unsigned, abs(nr_sectors), + int s = min_t(unsigned int, abs(nr_sectors), d->stripe_size - stripe_offset); if (nr_sectors < 0) @@ -504,7 +542,9 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, static bool dirty_pred(struct keybuf *buf, struct bkey *k) { - struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys); + struct cached_dev *dc = container_of(buf, + struct cached_dev, + writeback_keys); BUG_ON(KEY_INODE(k) != dc->disk.id); @@ -514,7 +554,7 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) static void refill_full_stripes(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; - unsigned start_stripe, stripe, next_stripe; + unsigned int start_stripe, stripe, next_stripe; bool wrapped = false; stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned)); @@ -654,7 +694,7 @@ static int bch_writeback_thread(void *arg) read_dirty(dc); if (searched_full_index) { - unsigned delay = dc->writeback_delay * HZ; + unsigned int delay = dc->writeback_delay * HZ; while (delay && !kthread_should_stop() && @@ -673,10 +713,14 @@ static int bch_writeback_thread(void *arg) } /* Init */ +#define INIT_KEYS_EACH_TIME 500000 +#define INIT_KEYS_SLEEP_MS 100 struct sectors_dirty_init { struct btree_op op; - unsigned inode; + unsigned int inode; + size_t count; + struct bkey start; }; static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, @@ -691,18 +735,37 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), KEY_START(k), KEY_SIZE(k)); + op->count++; + if (atomic_read(&b->c->search_inflight) && + !(op->count % INIT_KEYS_EACH_TIME)) { + bkey_copy_key(&op->start, k); + return -EAGAIN; + } + return MAP_CONTINUE; } void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; + int ret; bch_btree_op_init(&op.op, -1); op.inode = d->id; - - bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), - sectors_dirty_init_fn, 0); + op.count = 0; + op.start = KEY(op.inode, 0, 0); + + do { + ret = bch_btree_map_keys(&op.op, d->c, &op.start, + sectors_dirty_init_fn, 0); + if (ret == -EAGAIN) + schedule_timeout_interruptible( + msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); + else if (ret < 0) { + pr_warn("sectors dirty init failed, ret=%d!", ret); + break; + } + } while (ret == -EAGAIN); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -715,7 +778,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_running = true; dc->writeback_percent = 10; dc->writeback_delay = 30; - dc->writeback_rate.rate = 1024; + atomic_long_set(&dc->writeback_rate.rate, 1024); dc->writeback_rate_minimum = 8; dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 610fb01de629..d2b9fdbc8994 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -28,26 +28,7 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } -static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) -{ - uint64_t i, ret = 0; - - mutex_lock(&bch_register_lock); - - for (i = 0; i < c->devices_max_used; i++) { - struct bcache_device *d = c->devices[i]; - - if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) - continue; - ret += bcache_dev_sectors_dirty(d); - } - - mutex_unlock(&bch_register_lock); - - return ret; -} - -static inline unsigned offset_to_stripe(struct bcache_device *d, +static inline unsigned int offset_to_stripe(struct bcache_device *d, uint64_t offset) { do_div(offset, d->stripe_size); @@ -56,9 +37,9 @@ static inline unsigned offset_to_stripe(struct bcache_device *d, static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, uint64_t offset, - unsigned nr_sectors) + unsigned int nr_sectors) { - unsigned stripe = offset_to_stripe(&dc->disk, offset); + unsigned int stripe = offset_to_stripe(&dc->disk, offset); while (1) { if (atomic_read(dc->disk.stripe_sectors_dirty + stripe)) @@ -73,9 +54,9 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, } static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, - unsigned cache_mode, bool would_skip) + unsigned int cache_mode, bool would_skip) { - unsigned in_use = dc->disk.c->gc_stats.in_use; + unsigned int in_use = dc->disk.c->gc_stats.in_use; if (cache_mode != CACHE_MODE_WRITEBACK || test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || @@ -115,10 +96,11 @@ static inline void bch_writeback_add(struct cached_dev *dc) } } -void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); +void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, + uint64_t offset, int nr_sectors); -void bch_sectors_dirty_init(struct bcache_device *); -void bch_cached_dev_writeback_init(struct cached_dev *); -int bch_cached_dev_writeback_start(struct cached_dev *); +void bch_sectors_dirty_init(struct bcache_device *d); +void bch_cached_dev_writeback_init(struct cached_dev *dc); +int bch_cached_dev_writeback_start(struct cached_dev *dc); #endif |