summaryrefslogtreecommitdiff
path: root/drivers/md/bcache
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/Kconfig7
-rw-r--r--drivers/md/bcache/alloc.c39
-rw-r--r--drivers/md/bcache/bcache.h230
-rw-r--r--drivers/md/bcache/bset.c205
-rw-r--r--drivers/md/bcache/bset.h146
-rw-r--r--drivers/md/bcache/btree.c135
-rw-r--r--drivers/md/bcache/btree.h88
-rw-r--r--drivers/md/bcache/closure.c19
-rw-r--r--drivers/md/bcache/closure.h10
-rw-r--r--drivers/md/bcache/debug.c40
-rw-r--r--drivers/md/bcache/debug.h6
-rw-r--r--drivers/md/bcache/extents.c37
-rw-r--r--drivers/md/bcache/extents.h6
-rw-r--r--drivers/md/bcache/io.c24
-rw-r--r--drivers/md/bcache/journal.c28
-rw-r--r--drivers/md/bcache/journal.h28
-rw-r--r--drivers/md/bcache/movinggc.c14
-rw-r--r--drivers/md/bcache/request.c136
-rw-r--r--drivers/md/bcache/request.h18
-rw-r--r--drivers/md/bcache/stats.c15
-rw-r--r--drivers/md/bcache/stats.h15
-rw-r--r--drivers/md/bcache/super.c166
-rw-r--r--drivers/md/bcache/sysfs.c84
-rw-r--r--drivers/md/bcache/sysfs.h6
-rw-r--r--drivers/md/bcache/util.c135
-rw-r--r--drivers/md/bcache/util.h43
-rw-r--r--drivers/md/bcache/writeback.c155
-rw-r--r--drivers/md/bcache/writeback.h38
28 files changed, 1076 insertions, 797 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 17bf109c58e9..f6e0a8b3a61e 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -1,7 +1,8 @@
config BCACHE
tristate "Block device as cache"
- ---help---
+ select CRC64
+ help
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
@@ -10,7 +11,7 @@ config BCACHE
config BCACHE_DEBUG
bool "Bcache debugging"
depends on BCACHE
- ---help---
+ help
Don't select this option unless you're a developer
Enables extra debugging tools, allows expensive runtime checks to be
@@ -20,7 +21,7 @@ config BCACHE_CLOSURES_DEBUG
bool "Debug closures"
depends on BCACHE
select DEBUG_FS
- ---help---
+ help
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 7fa2631b422c..7a28232d868b 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -87,8 +87,8 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
{
struct cache *ca;
struct bucket *b;
- unsigned next = c->nbuckets * c->sb.bucket_size / 1024;
- unsigned i;
+ unsigned int next = c->nbuckets * c->sb.bucket_size / 1024;
+ unsigned int i;
int r;
atomic_sub(sectors, &c->rescale);
@@ -169,7 +169,7 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
#define bucket_prio(b) \
({ \
- unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
+ unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
\
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
})
@@ -244,6 +244,7 @@ static void invalidate_buckets_random(struct cache *ca)
while (!fifo_full(&ca->free_inc)) {
size_t n;
+
get_random_bytes(&n, sizeof(n));
n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket);
@@ -301,7 +302,7 @@ do { \
static int bch_allocator_push(struct cache *ca, long bucket)
{
- unsigned i;
+ unsigned int i;
/* Prios/gens are actually the most important reserve */
if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
@@ -385,7 +386,7 @@ out:
/* Allocation */
-long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
+long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
{
DEFINE_WAIT(w);
struct bucket *b;
@@ -421,7 +422,7 @@ out:
if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
- unsigned j;
+ unsigned int j;
for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
@@ -470,14 +471,14 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
__bch_bucket_free(PTR_CACHE(c, k, i),
PTR_BUCKET(c, k, i));
}
-int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait)
{
int i;
@@ -510,10 +511,11 @@ err:
return -1;
}
-int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait)
{
int ret;
+
mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
mutex_unlock(&c->bucket_lock);
@@ -524,8 +526,8 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct open_bucket {
struct list_head list;
- unsigned last_write_point;
- unsigned sectors_free;
+ unsigned int last_write_point;
+ unsigned int sectors_free;
BKEY_PADDED(key);
};
@@ -556,7 +558,7 @@ struct open_bucket {
*/
static struct open_bucket *pick_data_bucket(struct cache_set *c,
const struct bkey *search,
- unsigned write_point,
+ unsigned int write_point,
struct bkey *alloc)
{
struct open_bucket *ret, *ret_task = NULL;
@@ -595,12 +597,16 @@ found:
*
* If s->writeback is true, will not fail.
*/
-bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
- unsigned write_point, unsigned write_prio, bool wait)
+bool bch_alloc_sectors(struct cache_set *c,
+ struct bkey *k,
+ unsigned int sectors,
+ unsigned int write_point,
+ unsigned int write_prio,
+ bool wait)
{
struct open_bucket *b;
BKEY_PADDED(key) alloc;
- unsigned i;
+ unsigned int i;
/*
* We might have to allocate a new bucket, which we can't do with a
@@ -613,7 +619,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
spin_lock(&c->data_bucket_lock);
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
- unsigned watermark = write_prio
+ unsigned int watermark = write_prio
? RESERVE_MOVINGGC
: RESERVE_NONE;
@@ -702,6 +708,7 @@ int bch_open_buckets_alloc(struct cache_set *c)
for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
+
if (!b)
return -ENOMEM;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d6bf294f3907..83504dd8100a 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -252,7 +252,7 @@ struct bcache_device {
struct kobject kobj;
struct cache_set *c;
- unsigned id;
+ unsigned int id;
#define BCACHEDEVNAME_SIZE 12
char name[BCACHEDEVNAME_SIZE];
@@ -264,18 +264,19 @@ struct bcache_device {
#define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
- unsigned nr_stripes;
- unsigned stripe_size;
+ unsigned int nr_stripes;
+ unsigned int stripe_size;
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
struct bio_set bio_split;
- unsigned data_csum:1;
+ unsigned int data_csum:1;
- int (*cache_miss)(struct btree *, struct search *,
- struct bio *, unsigned);
- int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
+ int (*cache_miss)(struct btree *b, struct search *s,
+ struct bio *bio, unsigned int sectors);
+ int (*ioctl)(struct bcache_device *d, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
};
struct io {
@@ -284,7 +285,7 @@ struct io {
struct list_head lru;
unsigned long jiffies;
- unsigned sequential;
+ unsigned int sequential;
sector_t last;
};
@@ -328,13 +329,6 @@ struct cached_dev {
*/
atomic_t has_dirty;
- /*
- * Set to zero by things that touch the backing volume-- except
- * writeback. Incremented by writeback. Used to determine when to
- * accelerate idle writeback.
- */
- atomic_t backing_idle;
-
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
@@ -365,18 +359,18 @@ struct cached_dev {
struct cache_accounting accounting;
/* The rest of this all shows up in sysfs */
- unsigned sequential_cutoff;
- unsigned readahead;
+ unsigned int sequential_cutoff;
+ unsigned int readahead;
- unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
+ unsigned int io_disable:1;
+ unsigned int verify:1;
+ unsigned int bypass_torture_test:1;
- unsigned partial_stripes_expensive:1;
- unsigned writeback_metadata:1;
- unsigned writeback_running:1;
+ unsigned int partial_stripes_expensive:1;
+ unsigned int writeback_metadata:1;
+ unsigned int writeback_running:1;
unsigned char writeback_percent;
- unsigned writeback_delay;
+ unsigned int writeback_delay;
uint64_t writeback_rate_target;
int64_t writeback_rate_proportional;
@@ -384,16 +378,16 @@ struct cached_dev {
int64_t writeback_rate_integral_scaled;
int32_t writeback_rate_change;
- unsigned writeback_rate_update_seconds;
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
+ unsigned int writeback_rate_update_seconds;
+ unsigned int writeback_rate_i_term_inverse;
+ unsigned int writeback_rate_p_term_inverse;
+ unsigned int writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
atomic_t io_errors;
- unsigned error_limit;
- unsigned offline_seconds;
+ unsigned int error_limit;
+ unsigned int offline_seconds;
char backing_dev_name[BDEVNAME_SIZE];
};
@@ -423,9 +417,9 @@ struct cache {
/*
* When allocating new buckets, prio_write() gets first dibs - since we
* may not be allocate at all without writing priorities and gens.
- * prio_buckets[] contains the last buckets we wrote priorities to (so
- * gc can mark them as metadata), prio_next[] contains the buckets
- * allocated for the next prio write.
+ * prio_last_buckets[] contains the last buckets we wrote priorities to
+ * (so gc can mark them as metadata), prio_buckets[] contains the
+ * buckets allocated for the next prio write.
*/
uint64_t *prio_buckets;
uint64_t *prio_last_buckets;
@@ -454,7 +448,7 @@ struct cache {
* until a gc finishes - otherwise we could pointlessly burn a ton of
* cpu
*/
- unsigned invalidate_needs_gc;
+ unsigned int invalidate_needs_gc;
bool discard; /* Get rid of? */
@@ -474,11 +468,12 @@ struct cache {
struct gc_stat {
size_t nodes;
+ size_t nodes_pre;
size_t key_bytes;
size_t nkeys;
uint64_t data; /* sectors */
- unsigned in_use; /* percent */
+ unsigned int in_use; /* percent */
};
/*
@@ -514,6 +509,8 @@ struct cache_set {
struct cache_accounting accounting;
unsigned long flags;
+ atomic_t idle_counter;
+ atomic_t at_max_writeback_rate;
struct cache_sb sb;
@@ -522,9 +519,11 @@ struct cache_set {
int caches_loaded;
struct bcache_device **devices;
- unsigned devices_max_used;
+ unsigned int devices_max_used;
+ atomic_t attached_dev_nr;
struct list_head cached_devs;
uint64_t cached_dev_sectors;
+ atomic_long_t flash_dev_dirty_sectors;
struct closure caching;
struct closure sb_write;
@@ -550,7 +549,7 @@ struct cache_set {
* Default number of pages for a new btree node - may be less than a
* full bucket
*/
- unsigned btree_pages;
+ unsigned int btree_pages;
/*
* Lists of struct btrees; lru is the list for structs that have memory
@@ -573,7 +572,7 @@ struct cache_set {
struct list_head btree_cache_freed;
/* Number of elements in btree_cache + btree_cache_freeable lists */
- unsigned btree_cache_used;
+ unsigned int btree_cache_used;
/*
* If we need to allocate memory for a new btree node and that
@@ -603,6 +602,10 @@ struct cache_set {
*/
atomic_t rescale;
/*
+ * used for GC, identify if any front side I/Os is inflight
+ */
+ atomic_t search_inflight;
+ /*
* When we invalidate buckets, we use both the priority and the amount
* of good data to determine which buckets to reuse first - to weight
* those together consistently we keep track of the smallest nonzero
@@ -611,8 +614,8 @@ struct cache_set {
uint16_t min_prio;
/*
- * max(gen - last_gc) for all buckets. When it gets too big we have to gc
- * to keep gens from wrapping around.
+ * max(gen - last_gc) for all buckets. When it gets too big we have to
+ * gc to keep gens from wrapping around.
*/
uint8_t need_gc;
struct gc_stat gc_stats;
@@ -647,7 +650,7 @@ struct cache_set {
struct mutex verify_lock;
#endif
- unsigned nr_uuids;
+ unsigned int nr_uuids;
struct uuid_entry *uuids;
BKEY_PADDED(uuid_bucket);
struct closure uuid_write;
@@ -668,12 +671,12 @@ struct cache_set {
struct journal journal;
#define CONGESTED_MAX 1024
- unsigned congested_last_us;
+ unsigned int congested_last_us;
atomic_t congested;
/* The rest of this all shows up in sysfs */
- unsigned congested_read_threshold_us;
- unsigned congested_write_threshold_us;
+ unsigned int congested_read_threshold_us;
+ unsigned int congested_write_threshold_us;
struct time_stats btree_gc_time;
struct time_stats btree_split_time;
@@ -692,16 +695,16 @@ struct cache_set {
ON_ERROR_PANIC,
} on_error;
#define DEFAULT_IO_ERROR_LIMIT 8
- unsigned error_limit;
- unsigned error_decay;
+ unsigned int error_limit;
+ unsigned int error_decay;
unsigned short journal_delay_ms;
bool expensive_debug_checks;
- unsigned verify:1;
- unsigned key_merging_disabled:1;
- unsigned gc_always_rewrite:1;
- unsigned shrinker_disabled:1;
- unsigned copy_gc_enabled:1;
+ unsigned int verify:1;
+ unsigned int key_merging_disabled:1;
+ unsigned int gc_always_rewrite:1;
+ unsigned int shrinker_disabled:1;
+ unsigned int copy_gc_enabled:1;
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
@@ -710,7 +713,7 @@ struct cache_set {
};
struct bbio {
- unsigned submit_time_us;
+ unsigned int submit_time_us;
union {
struct bkey key;
uint64_t _pad[3];
@@ -727,10 +730,10 @@ struct bbio {
#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
#define btree_blocks(b) \
- ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
+ ((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
#define btree_default_blocks(c) \
- ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
+ ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
@@ -759,21 +762,21 @@ static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
static inline struct cache *PTR_CACHE(struct cache_set *c,
const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
return c->cache[PTR_DEV(k, ptr)];
}
static inline size_t PTR_BUCKET_NR(struct cache_set *c,
const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
return sector_to_bucket(c, PTR_OFFSET(k, ptr));
}
static inline struct bucket *PTR_BUCKET(struct cache_set *c,
const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
}
@@ -781,17 +784,18 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
static inline uint8_t gen_after(uint8_t a, uint8_t b)
{
uint8_t r = a - b;
+
return r > 128U ? 0 : r;
}
static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
- unsigned i)
+ unsigned int i)
{
return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
}
static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
- unsigned i)
+ unsigned int i)
{
return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
}
@@ -877,16 +881,16 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
#define BUCKET_GC_GEN_MAX 96U
#define kobj_attribute_write(n, fn) \
- static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
+ static struct kobj_attribute ksysfs_##n = __ATTR(n, 0200, NULL, fn)
#define kobj_attribute_rw(n, show, store) \
static struct kobj_attribute ksysfs_##n = \
- __ATTR(n, S_IWUSR|S_IRUSR, show, store)
+ __ATTR(n, 0600, show, store)
static inline void wake_up_allocators(struct cache_set *c)
{
struct cache *ca;
- unsigned i;
+ unsigned int i;
for_each_cache(ca, c, i)
wake_up_process(ca->alloc_thread);
@@ -922,40 +926,43 @@ static inline void wait_for_kthread_stop(void)
/* Forward declarations */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
-void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
- const char *);
-void bch_bbio_free(struct bio *, struct cache_set *);
-struct bio *bch_bbio_alloc(struct cache_set *);
-
-void __bch_submit_bbio(struct bio *, struct cache_set *);
-void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
-
-uint8_t bch_inc_gen(struct cache *, struct bucket *);
-void bch_rescale_priorities(struct cache_set *, int);
-
-bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
-void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
-
-void __bch_bucket_free(struct cache *, struct bucket *);
-void bch_bucket_free(struct cache_set *, struct bkey *);
-
-long bch_bucket_alloc(struct cache *, unsigned, bool);
-int __bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
-int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
-bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
+void bch_count_io_errors(struct cache *ca, blk_status_t error,
+ int is_read, const char *m);
+void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
+ blk_status_t error, const char *m);
+void bch_bbio_endio(struct cache_set *c, struct bio *bio,
+ blk_status_t error, const char *m);
+void bch_bbio_free(struct bio *bio, struct cache_set *c);
+struct bio *bch_bbio_alloc(struct cache_set *c);
+
+void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
+void bch_submit_bbio(struct bio *bio, struct cache_set *c,
+ struct bkey *k, unsigned int ptr);
+
+uint8_t bch_inc_gen(struct cache *ca, struct bucket *b);
+void bch_rescale_priorities(struct cache_set *c, int sectors);
+
+bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b);
+void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b);
+
+void __bch_bucket_free(struct cache *ca, struct bucket *b);
+void bch_bucket_free(struct cache_set *c, struct bkey *k);
+
+long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, int n, bool wait);
+int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, int n, bool wait);
+bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
+ unsigned int sectors, unsigned int write_point,
+ unsigned int write_prio, bool wait);
bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
-bool bch_cache_set_error(struct cache_set *, const char *, ...);
+bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-void bch_prio_write(struct cache *);
-void bch_write_bdev_super(struct cached_dev *, struct closure *);
+void bch_prio_write(struct cache *ca);
+void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
extern struct mutex bch_register_lock;
@@ -967,35 +974,36 @@ extern struct kobj_type bch_cache_set_ktype;
extern struct kobj_type bch_cache_set_internal_ktype;
extern struct kobj_type bch_cache_ktype;
-void bch_cached_dev_release(struct kobject *);
-void bch_flash_dev_release(struct kobject *);
-void bch_cache_set_release(struct kobject *);
-void bch_cache_release(struct kobject *);
+void bch_cached_dev_release(struct kobject *kobj);
+void bch_flash_dev_release(struct kobject *kobj);
+void bch_cache_set_release(struct kobject *kobj);
+void bch_cache_release(struct kobject *kobj);
-int bch_uuid_write(struct cache_set *);
-void bcache_write_super(struct cache_set *);
+int bch_uuid_write(struct cache_set *c);
+void bcache_write_super(struct cache_set *c);
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
-void bch_cached_dev_detach(struct cached_dev *);
-void bch_cached_dev_run(struct cached_dev *);
-void bcache_device_stop(struct bcache_device *);
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+ uint8_t *set_uuid);
+void bch_cached_dev_detach(struct cached_dev *dc);
+void bch_cached_dev_run(struct cached_dev *dc);
+void bcache_device_stop(struct bcache_device *d);
-void bch_cache_set_unregister(struct cache_set *);
-void bch_cache_set_stop(struct cache_set *);
+void bch_cache_set_unregister(struct cache_set *c);
+void bch_cache_set_stop(struct cache_set *c);
-struct cache_set *bch_cache_set_alloc(struct cache_sb *);
-void bch_btree_cache_free(struct cache_set *);
-int bch_btree_cache_alloc(struct cache_set *);
-void bch_moving_init_cache_set(struct cache_set *);
-int bch_open_buckets_alloc(struct cache_set *);
-void bch_open_buckets_free(struct cache_set *);
+struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
+void bch_btree_cache_free(struct cache_set *c);
+int bch_btree_cache_alloc(struct cache_set *c);
+void bch_moving_init_cache_set(struct cache_set *c);
+int bch_open_buckets_alloc(struct cache_set *c);
+void bch_open_buckets_free(struct cache_set *c);
int bch_cache_allocator_start(struct cache *ca);
void bch_debug_exit(void);
-int bch_debug_init(struct kobject *);
+void bch_debug_init(struct kobject *kobj);
void bch_request_exit(void);
int bch_request_init(void);
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index f3403b45bc28..8f07fa6e1739 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -18,31 +18,31 @@
#ifdef CONFIG_BCACHE_DEBUG
-void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
{
struct bkey *k, *next;
for (k = i->start; k < bset_bkey_last(i); k = next) {
next = bkey_next(k);
- printk(KERN_ERR "block %u key %u/%u: ", set,
- (unsigned) ((u64 *) k - i->d), i->keys);
+ pr_err("block %u key %u/%u: ", set,
+ (unsigned int) ((u64 *) k - i->d), i->keys);
if (b->ops->key_dump)
b->ops->key_dump(b, k);
else
- printk("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
+ pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
if (next < bset_bkey_last(i) &&
bkey_cmp(k, b->ops->is_extents ?
&START_KEY(next) : next) > 0)
- printk(KERN_ERR "Key skipped backwards\n");
+ pr_err("Key skipped backwards\n");
}
}
void bch_dump_bucket(struct btree_keys *b)
{
- unsigned i;
+ unsigned int i;
console_lock();
for (i = 0; i <= b->nsets; i++)
@@ -53,7 +53,7 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
- unsigned ret = 0;
+ unsigned int ret = 0;
struct btree_iter iter;
struct bkey *k;
@@ -128,7 +128,7 @@ static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
/* Keylists */
-int __bch_keylist_realloc(struct keylist *l, unsigned u64s)
+int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
{
size_t oldsize = bch_keylist_nkeys(l);
size_t newsize = oldsize + u64s;
@@ -180,7 +180,7 @@ void bch_keylist_pop_front(struct keylist *l)
/* Key/pointer manipulation */
void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
- unsigned i)
+ unsigned int i)
{
BUG_ON(i > KEY_PTRS(src));
@@ -194,7 +194,7 @@ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
bool __bch_cut_front(const struct bkey *where, struct bkey *k)
{
- unsigned i, len = 0;
+ unsigned int i, len = 0;
if (bkey_cmp(where, &START_KEY(k)) <= 0)
return false;
@@ -214,7 +214,7 @@ bool __bch_cut_front(const struct bkey *where, struct bkey *k)
bool __bch_cut_back(const struct bkey *where, struct bkey *k)
{
- unsigned len = 0;
+ unsigned int len = 0;
if (bkey_cmp(where, k) >= 0)
return false;
@@ -240,9 +240,9 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k)
#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
struct bkey_float {
- unsigned exponent:BKEY_EXPONENT_BITS;
- unsigned m:BKEY_MID_BITS;
- unsigned mantissa:BKEY_MANTISSA_BITS;
+ unsigned int exponent:BKEY_EXPONENT_BITS;
+ unsigned int m:BKEY_MID_BITS;
+ unsigned int mantissa:BKEY_MANTISSA_BITS;
} __packed;
/*
@@ -311,7 +311,9 @@ void bch_btree_keys_free(struct btree_keys *b)
}
EXPORT_SYMBOL(bch_btree_keys_free);
-int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp)
+int bch_btree_keys_alloc(struct btree_keys *b,
+ unsigned int page_order,
+ gfp_t gfp)
{
struct bset_tree *t = b->set;
@@ -345,7 +347,7 @@ EXPORT_SYMBOL(bch_btree_keys_alloc);
void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
bool *expensive_debug_checks)
{
- unsigned i;
+ unsigned int i;
b->ops = ops;
b->expensive_debug_checks = expensive_debug_checks;
@@ -366,7 +368,11 @@ EXPORT_SYMBOL(bch_btree_keys_init);
/* Binary tree stuff for auxiliary search trees */
-static unsigned inorder_next(unsigned j, unsigned size)
+/*
+ * return array index next to j when does in-order traverse
+ * of a binary tree which is stored in a linear array
+ */
+static unsigned int inorder_next(unsigned int j, unsigned int size)
{
if (j * 2 + 1 < size) {
j = j * 2 + 1;
@@ -379,7 +385,11 @@ static unsigned inorder_next(unsigned j, unsigned size)
return j;
}
-static unsigned inorder_prev(unsigned j, unsigned size)
+/*
+ * return array index previous to j when does in-order traverse
+ * of a binary tree which is stored in a linear array
+ */
+static unsigned int inorder_prev(unsigned int j, unsigned int size)
{
if (j * 2 < size) {
j = j * 2;
@@ -392,7 +402,8 @@ static unsigned inorder_prev(unsigned j, unsigned size)
return j;
}
-/* I have no idea why this code works... and I'm the one who wrote it
+/*
+ * I have no idea why this code works... and I'm the one who wrote it
*
* However, I do know what it does:
* Given a binary tree constructed in an array (i.e. how you normally implement
@@ -405,10 +416,12 @@ static unsigned inorder_prev(unsigned j, unsigned size)
* extra is a function of size:
* extra = (size - rounddown_pow_of_two(size - 1)) << 1;
*/
-static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra)
+static unsigned int __to_inorder(unsigned int j,
+ unsigned int size,
+ unsigned int extra)
{
- unsigned b = fls(j);
- unsigned shift = fls(size - 1) - b;
+ unsigned int b = fls(j);
+ unsigned int shift = fls(size - 1) - b;
j ^= 1U << (b - 1);
j <<= 1;
@@ -421,14 +434,20 @@ static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra)
return j;
}
-static unsigned to_inorder(unsigned j, struct bset_tree *t)
+/*
+ * Return the cacheline index in bset_tree->data, where j is index
+ * from a linear array which stores the auxiliar binary tree
+ */
+static unsigned int to_inorder(unsigned int j, struct bset_tree *t)
{
return __to_inorder(j, t->size, t->extra);
}
-static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra)
+static unsigned int __inorder_to_tree(unsigned int j,
+ unsigned int size,
+ unsigned int extra)
{
- unsigned shift;
+ unsigned int shift;
if (j > extra)
j += j - extra;
@@ -441,7 +460,11 @@ static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra)
return j;
}
-static unsigned inorder_to_tree(unsigned j, struct bset_tree *t)
+/*
+ * Return an index from a linear array which stores the auxiliar binary
+ * tree, j is the cacheline index of t->data.
+ */
+static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t)
{
return __inorder_to_tree(j, t->size, t->extra);
}
@@ -452,14 +475,15 @@ void inorder_test(void)
unsigned long done = 0;
ktime_t start = ktime_get();
- for (unsigned size = 2;
+ for (unsigned int size = 2;
size < 65536000;
size++) {
- unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1;
- unsigned i = 1, j = rounddown_pow_of_two(size - 1);
+ unsigned int extra =
+ (size - rounddown_pow_of_two(size - 1)) << 1;
+ unsigned int i = 1, j = rounddown_pow_of_two(size - 1);
if (!(size % 4096))
- printk(KERN_NOTICE "loop %u, %llu per us\n", size,
+ pr_notice("loop %u, %llu per us\n", size,
done / ktime_us_delta(ktime_get(), start));
while (1) {
@@ -502,30 +526,31 @@ void inorder_test(void)
* of the previous key so we can walk backwards to it from t->tree[j]'s key.
*/
-static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline,
- unsigned offset)
+static struct bkey *cacheline_to_bkey(struct bset_tree *t,
+ unsigned int cacheline,
+ unsigned int offset)
{
return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
}
-static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
+static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
{
return ((void *) k - (void *) t->data) / BSET_CACHELINE;
}
-static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
- unsigned cacheline,
+static unsigned int bkey_to_cacheline_offset(struct bset_tree *t,
+ unsigned int cacheline,
struct bkey *k)
{
return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
}
-static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j)
+static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j)
{
return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
}
-static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
+static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j)
{
return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
}
@@ -534,7 +559,7 @@ static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
* For the write set - the one we're currently inserting keys into - we don't
* maintain a full search tree, we just keep a simple lookup table in t->prev.
*/
-static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
+static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline)
{
return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
}
@@ -546,14 +571,29 @@ static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
return low;
}
-static inline unsigned bfloat_mantissa(const struct bkey *k,
+/*
+ * Calculate mantissa value for struct bkey_float.
+ * If most significant bit of f->exponent is not set, then
+ * - f->exponent >> 6 is 0
+ * - p[0] points to bkey->low
+ * - p[-1] borrows bits from KEY_INODE() of bkey->high
+ * if most isgnificant bits of f->exponent is set, then
+ * - f->exponent >> 6 is 1
+ * - p[0] points to bits from KEY_INODE() of bkey->high
+ * - p[-1] points to other bits from KEY_INODE() of
+ * bkey->high too.
+ * See make_bfloat() to check when most significant bit of f->exponent
+ * is set or not.
+ */
+static inline unsigned int bfloat_mantissa(const struct bkey *k,
struct bkey_float *f)
{
const uint64_t *p = &k->low - (f->exponent >> 6);
+
return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
}
-static void make_bfloat(struct bset_tree *t, unsigned j)
+static void make_bfloat(struct bset_tree *t, unsigned int j)
{
struct bkey_float *f = &t->tree[j];
struct bkey *m = tree_to_bkey(t, j);
@@ -570,6 +610,16 @@ static void make_bfloat(struct bset_tree *t, unsigned j)
BUG_ON(m < l || m > r);
BUG_ON(bkey_next(p) != m);
+ /*
+ * If l and r have different KEY_INODE values (different backing
+ * device), f->exponent records how many least significant bits
+ * are different in KEY_INODE values and sets most significant
+ * bits to 1 (by +64).
+ * If l and r have same KEY_INODE value, f->exponent records
+ * how many different bits in least significant bits of bkey->low.
+ * See bfloat_mantiss() how the most significant bit of
+ * f->exponent is used to calculate bfloat mantissa value.
+ */
if (KEY_INODE(l) != KEY_INODE(r))
f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64;
else
@@ -591,7 +641,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j)
static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
{
if (t != b->set) {
- unsigned j = roundup(t[-1].size,
+ unsigned int j = roundup(t[-1].size,
64 / sizeof(struct bkey_float));
t->tree = t[-1].tree + j;
@@ -633,17 +683,26 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
}
EXPORT_SYMBOL(bch_bset_init_next);
+/*
+ * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
+ * accelerate bkey search in a btree node (pointed by bset_tree->data in
+ * memory). After search in the auxiliar tree by calling bset_search_tree(),
+ * a struct bset_search_iter is returned which indicates range [l, r] from
+ * bset_tree->data where the searching bkey might be inside. Then a followed
+ * linear comparison does the exact search, see __bch_bset_search() for how
+ * the auxiliary tree is used.
+ */
void bch_bset_build_written_tree(struct btree_keys *b)
{
struct bset_tree *t = bset_tree_last(b);
struct bkey *prev = NULL, *k = t->data->start;
- unsigned j, cacheline = 1;
+ unsigned int j, cacheline = 1;
b->last_set_unwritten = 0;
bset_alloc_tree(b, t);
- t->size = min_t(unsigned,
+ t->size = min_t(unsigned int,
bkey_to_cacheline(t, bset_bkey_last(t->data)),
b->set->tree + btree_keys_cachelines(b) - t->tree);
@@ -683,7 +742,7 @@ EXPORT_SYMBOL(bch_bset_build_written_tree);
void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
{
struct bset_tree *t;
- unsigned inorder, j = 1;
+ unsigned int inorder, j = 1;
for (t = b->set; t <= bset_tree_last(b); t++)
if (k < bset_bkey_last(t->data))
@@ -730,14 +789,15 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
struct bkey *k)
{
- unsigned shift = bkey_u64s(k);
- unsigned j = bkey_to_cacheline(t, k);
+ unsigned int shift = bkey_u64s(k);
+ unsigned int j = bkey_to_cacheline(t, k);
/* We're getting called from btree_split() or btree_gc, just bail out */
if (!t->size)
return;
- /* k is the key we just inserted; we need to find the entry in the
+ /*
+ * k is the key we just inserted; we need to find the entry in the
* lookup table for the first key that is strictly greater than k:
* it's either k's cacheline or the next one
*/
@@ -745,7 +805,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
table_to_bkey(t, j) <= k)
j++;
- /* Adjust all the lookup table entries, and find a new key for any that
+ /*
+ * Adjust all the lookup table entries, and find a new key for any that
* have gotten too big
*/
for (; j < t->size; j++) {
@@ -770,7 +831,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
k != bset_bkey_last(t->data);
k = bkey_next(k))
if (t->size == bkey_to_cacheline(t, k)) {
- t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k);
+ t->prev[t->size] =
+ bkey_to_cacheline_offset(t, t->size, k);
t->size++;
}
}
@@ -818,10 +880,10 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
}
EXPORT_SYMBOL(bch_bset_insert);
-unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
+unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bkey *replace_key)
{
- unsigned status = BTREE_INSERT_STATUS_NO_INSERT;
+ unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
struct btree_iter iter;
@@ -873,10 +935,10 @@ struct bset_search_iter {
static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
const struct bkey *search)
{
- unsigned li = 0, ri = t->size;
+ unsigned int li = 0, ri = t->size;
while (li + 1 != ri) {
- unsigned m = (li + ri) >> 1;
+ unsigned int m = (li + ri) >> 1;
if (bkey_cmp(table_to_bkey(t, m), search) > 0)
ri = m;
@@ -895,10 +957,22 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
{
struct bkey *l, *r;
struct bkey_float *f;
- unsigned inorder, j, n = 1;
+ unsigned int inorder, j, n = 1;
do {
- unsigned p = n << 4;
+ /*
+ * A bit trick here.
+ * If p < t->size, (int)(p - t->size) is a minus value and
+ * the most significant bit is set, right shifting 31 bits
+ * gets 1. If p >= t->size, the most significant bit is
+ * not set, right shifting 31 bits gets 0.
+ * So the following 2 lines equals to
+ * if (p >= t->size)
+ * p = 0;
+ * but a branch instruction is avoided.
+ */
+ unsigned int p = n << 4;
+
p &= ((int) (p - t->size)) >> 31;
prefetch(&t->tree[p]);
@@ -907,6 +981,9 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
f = &t->tree[j];
/*
+ * Similar bit trick, use subtract operation to avoid a branch
+ * instruction.
+ *
* n = (f->mantissa > bfloat_mantissa())
* ? j * 2
* : j * 2 + 1;
@@ -915,7 +992,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
* to work - that's done in make_bfloat()
*/
if (likely(f->exponent != 127))
- n = j * 2 + (((unsigned)
+ n = j * 2 + (((unsigned int)
(f->mantissa -
bfloat_mantissa(search, f))) >> 31);
else
@@ -1046,6 +1123,7 @@ static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
struct bset_tree *start)
{
struct bkey *ret = NULL;
+
iter->size = ARRAY_SIZE(iter->data);
iter->used = 0;
@@ -1121,7 +1199,8 @@ void bch_bset_sort_state_free(struct bset_sort_state *state)
mempool_exit(&state->pool);
}
-int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
+int bch_bset_sort_state_init(struct bset_sort_state *state,
+ unsigned int page_order)
{
spin_lock_init(&state->time.lock);
@@ -1174,7 +1253,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
}
static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
- unsigned start, unsigned order, bool fixup,
+ unsigned int start, unsigned int order, bool fixup,
struct bset_sort_state *state)
{
uint64_t start_time;
@@ -1225,7 +1304,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
bch_time_stats_update(&state->time, start_time);
}
-void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
+void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
@@ -1235,7 +1314,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
if (start) {
- unsigned i;
+ unsigned int i;
for (i = start; i <= b->nsets; i++)
keys += b->set[i].data->keys;
@@ -1260,8 +1339,8 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
-
struct btree_iter iter;
+
bch_btree_iter_init(b, &iter, NULL);
btree_mergesort(b, new->set->data, &iter, false, true);
@@ -1275,7 +1354,7 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
{
- unsigned crit = SORT_CRIT;
+ unsigned int crit = SORT_CRIT;
int i;
/* Don't sort if nothing to do */
@@ -1304,7 +1383,7 @@ EXPORT_SYMBOL(bch_btree_sort_lazy);
void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i <= b->nsets; i++) {
struct bset_tree *t = &b->set[i];
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index b867f2200495..bac76aabca6d 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -163,10 +163,10 @@ struct bset_tree {
*/
/* size of the binary tree and prev array */
- unsigned size;
+ unsigned int size;
/* function of size - precalculated for to_inorder() */
- unsigned extra;
+ unsigned int extra;
/* copy of the last key in the set */
struct bkey end;
@@ -187,18 +187,25 @@ struct bset_tree {
};
struct btree_keys_ops {
- bool (*sort_cmp)(struct btree_iter_set,
- struct btree_iter_set);
- struct bkey *(*sort_fixup)(struct btree_iter *, struct bkey *);
- bool (*insert_fixup)(struct btree_keys *, struct bkey *,
- struct btree_iter *, struct bkey *);
- bool (*key_invalid)(struct btree_keys *,
- const struct bkey *);
- bool (*key_bad)(struct btree_keys *, const struct bkey *);
- bool (*key_merge)(struct btree_keys *,
- struct bkey *, struct bkey *);
- void (*key_to_text)(char *, size_t, const struct bkey *);
- void (*key_dump)(struct btree_keys *, const struct bkey *);
+ bool (*sort_cmp)(struct btree_iter_set l,
+ struct btree_iter_set r);
+ struct bkey *(*sort_fixup)(struct btree_iter *iter,
+ struct bkey *tmp);
+ bool (*insert_fixup)(struct btree_keys *b,
+ struct bkey *insert,
+ struct btree_iter *iter,
+ struct bkey *replace_key);
+ bool (*key_invalid)(struct btree_keys *bk,
+ const struct bkey *k);
+ bool (*key_bad)(struct btree_keys *bk,
+ const struct bkey *k);
+ bool (*key_merge)(struct btree_keys *bk,
+ struct bkey *l, struct bkey *r);
+ void (*key_to_text)(char *buf,
+ size_t size,
+ const struct bkey *k);
+ void (*key_dump)(struct btree_keys *keys,
+ const struct bkey *k);
/*
* Only used for deciding whether to use START_KEY(k) or just the key
@@ -211,7 +218,7 @@ struct btree_keys {
const struct btree_keys_ops *ops;
uint8_t page_order;
uint8_t nsets;
- unsigned last_set_unwritten:1;
+ unsigned int last_set_unwritten:1;
bool *expensive_debug_checks;
/*
@@ -239,12 +246,14 @@ static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
}
-static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i)
+static inline unsigned int bset_byte_offset(struct btree_keys *b,
+ struct bset *i)
{
return ((size_t) i) - ((size_t) b->set->data);
}
-static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i)
+static inline unsigned int bset_sector_offset(struct btree_keys *b,
+ struct bset *i)
{
return bset_byte_offset(b, i) >> 9;
}
@@ -273,25 +282,27 @@ static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b)
}
static inline struct bset *bset_next_set(struct btree_keys *b,
- unsigned block_bytes)
+ unsigned int block_bytes)
{
struct bset *i = bset_tree_last(b)->data;
return ((void *) i) + roundup(set_bytes(i), block_bytes);
}
-void bch_btree_keys_free(struct btree_keys *);
-int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
-void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
- bool *);
-
-void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t);
-void bch_bset_build_written_tree(struct btree_keys *);
-void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
-bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *);
-void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *);
-unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *,
- struct bkey *);
+void bch_btree_keys_free(struct btree_keys *b);
+int bch_btree_keys_alloc(struct btree_keys *b, unsigned int page_order,
+ gfp_t gfp);
+void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
+ bool *expensive_debug_checks);
+
+void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic);
+void bch_bset_build_written_tree(struct btree_keys *b);
+void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k);
+bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r);
+void bch_bset_insert(struct btree_keys *b, struct bkey *where,
+ struct bkey *insert);
+unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
+ struct bkey *replace_key);
enum {
BTREE_INSERT_STATUS_NO_INSERT = 0,
@@ -313,18 +324,21 @@ struct btree_iter {
} data[MAX_BSETS];
};
-typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *);
+typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
-struct bkey *bch_btree_iter_next(struct btree_iter *);
-struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
- struct btree_keys *, ptr_filter_fn);
+struct bkey *bch_btree_iter_next(struct btree_iter *iter);
+struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
+ struct btree_keys *b,
+ ptr_filter_fn fn);
-void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
-struct bkey *bch_btree_iter_init(struct btree_keys *, struct btree_iter *,
- struct bkey *);
+void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
+ struct bkey *end);
+struct bkey *bch_btree_iter_init(struct btree_keys *b,
+ struct btree_iter *iter,
+ struct bkey *search);
-struct bkey *__bch_bset_search(struct btree_keys *, struct bset_tree *,
- const struct bkey *);
+struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
+ const struct bkey *search);
/*
* Returns the first key that is strictly greater than search
@@ -349,21 +363,23 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
struct bset_sort_state {
mempool_t pool;
- unsigned page_order;
- unsigned crit_factor;
+ unsigned int page_order;
+ unsigned int crit_factor;
struct time_stats time;
};
-void bch_bset_sort_state_free(struct bset_sort_state *);
-int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
-void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
-void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
- struct bset_sort_state *);
-void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *,
- struct bset_sort_state *);
-void bch_btree_sort_partial(struct btree_keys *, unsigned,
- struct bset_sort_state *);
+void bch_bset_sort_state_free(struct bset_sort_state *state);
+int bch_bset_sort_state_init(struct bset_sort_state *state,
+ unsigned int page_order);
+void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state);
+void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
+ struct bset_sort_state *state);
+void bch_btree_sort_and_fix_extents(struct btree_keys *b,
+ struct btree_iter *iter,
+ struct bset_sort_state *state);
+void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
+ struct bset_sort_state *state);
static inline void bch_btree_sort(struct btree_keys *b,
struct bset_sort_state *state)
@@ -377,13 +393,13 @@ struct bset_stats {
size_t floats, failed;
};
-void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
+void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state);
/* Bkey utility code */
#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
-static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
+static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx)
{
return bkey_idx(i->start, idx);
}
@@ -401,10 +417,10 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}
-void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
- unsigned);
-bool __bch_cut_front(const struct bkey *, struct bkey *);
-bool __bch_cut_back(const struct bkey *, struct bkey *);
+void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
+ unsigned int i);
+bool __bch_cut_front(const struct bkey *where, struct bkey *k);
+bool __bch_cut_back(const struct bkey *where, struct bkey *k);
static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
{
@@ -522,18 +538,20 @@ static inline size_t bch_keylist_bytes(struct keylist *l)
return bch_keylist_nkeys(l) * sizeof(uint64_t);
}
-struct bkey *bch_keylist_pop(struct keylist *);
-void bch_keylist_pop_front(struct keylist *);
-int __bch_keylist_realloc(struct keylist *, unsigned);
+struct bkey *bch_keylist_pop(struct keylist *l);
+void bch_keylist_pop_front(struct keylist *l);
+int __bch_keylist_realloc(struct keylist *l, unsigned int u64s);
/* Debug stuff */
#ifdef CONFIG_BCACHE_DEBUG
-int __bch_count_data(struct btree_keys *);
-void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
-void bch_dump_bucket(struct btree_keys *);
+int __bch_count_data(struct btree_keys *b);
+void __printf(2, 3) __bch_check_keys(struct btree_keys *b,
+ const char *fmt,
+ ...);
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set);
+void bch_dump_bucket(struct btree_keys *b);
#else
@@ -541,7 +559,7 @@ static inline int __bch_count_data(struct btree_keys *b) { return -1; }
static inline void __printf(2, 3)
__bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set);
#endif
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 547c9eedc2f4..e7d4817681f2 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -90,6 +90,9 @@
#define MAX_NEED_GC 64
#define MAX_SAVE_PRIO 72
+#define MAX_GC_TIMES 100
+#define MIN_GC_NODES 100
+#define GC_SLEEP_MS 100
#define PTR_DIRTY_BIT (((uint64_t) 1 << 36))
@@ -180,7 +183,7 @@ static void bch_btree_init_next(struct btree *b)
void bkey_put(struct cache_set *c, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i))
@@ -284,6 +287,7 @@ err:
static void btree_node_read_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
+
closure_put(cl);
}
@@ -432,7 +436,10 @@ static void do_btree_node_write(struct btree *b)
continue_at(cl, btree_node_write_done, NULL);
} else {
- /* No problem for multipage bvec since the bio is just allocated */
+ /*
+ * No problem for multipage bvec since the bio is
+ * just allocated
+ */
b->bio->bi_vcnt = 0;
bch_bio_map(b->bio, i);
@@ -476,7 +483,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
void bch_btree_node_write(struct btree *b, struct closure *parent)
{
- unsigned nsets = b->keys.nsets;
+ unsigned int nsets = b->keys.nsets;
lockdep_assert_held(&b->lock);
@@ -578,7 +585,7 @@ static void mca_bucket_free(struct btree *b)
list_move(&b->list, &b->c->btree_cache_freeable);
}
-static unsigned btree_order(struct bkey *k)
+static unsigned int btree_order(struct bkey *k)
{
return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1);
}
@@ -586,7 +593,7 @@ static unsigned btree_order(struct bkey *k)
static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
{
if (!bch_btree_keys_alloc(&b->keys,
- max_t(unsigned,
+ max_t(unsigned int,
ilog2(b->c->btree_pages),
btree_order(k)),
gfp)) {
@@ -601,6 +608,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
struct bkey *k, gfp_t gfp)
{
struct btree *b = kzalloc(sizeof(struct btree), gfp);
+
if (!b)
return NULL;
@@ -617,7 +625,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
return b;
}
-static int mca_reap(struct btree *b, unsigned min_order, bool flush)
+static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
{
struct closure cl;
@@ -743,6 +751,7 @@ void bch_btree_cache_free(struct cache_set *c)
{
struct btree *b;
struct closure cl;
+
closure_init_stack(&cl);
if (c->shrink.list.next)
@@ -783,7 +792,7 @@ void bch_btree_cache_free(struct cache_set *c)
int bch_btree_cache_alloc(struct cache_set *c)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < mca_reserve(c); i++)
if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL))
@@ -1008,6 +1017,13 @@ retry:
BUG_ON(b->level != level);
}
+ if (btree_node_io_error(b)) {
+ rw_unlock(write, b);
+ return ERR_PTR(-EIO);
+ }
+
+ BUG_ON(!b->written);
+
b->parent = parent;
b->accessed = 1;
@@ -1019,13 +1035,6 @@ retry:
for (; i <= b->keys.nsets; i++)
prefetch(b->keys.set[i].data);
- if (btree_node_io_error(b)) {
- rw_unlock(write, b);
- return ERR_PTR(-EIO);
- }
-
- BUG_ON(!b->written);
-
return b;
}
@@ -1121,6 +1130,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
+
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1133,7 +1143,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
static void make_btree_freeing_key(struct btree *b, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
mutex_lock(&b->c->bucket_lock);
@@ -1154,7 +1164,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op)
{
struct cache_set *c = b->c;
struct cache *ca;
- unsigned i, reserve = (c->root->level - b->level) * 2 + 1;
+ unsigned int i, reserve = (c->root->level - b->level) * 2 + 1;
mutex_lock(&c->bucket_lock);
@@ -1178,7 +1188,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
struct bkey *k)
{
uint8_t stale = 0;
- unsigned i;
+ unsigned int i;
struct bucket *g;
/*
@@ -1216,7 +1226,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
/* guard against overflow */
- SET_GC_SECTORS_USED(g, min_t(unsigned,
+ SET_GC_SECTORS_USED(g, min_t(unsigned int,
GC_SECTORS_USED(g) + KEY_SIZE(k),
MAX_GC_SECTORS_USED));
@@ -1230,7 +1240,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
@@ -1256,7 +1266,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
{
uint8_t stale = 0;
- unsigned keys = 0, good_keys = 0;
+ unsigned int keys = 0, good_keys = 0;
struct bkey *k;
struct btree_iter iter;
struct bset_tree *t;
@@ -1299,16 +1309,18 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
struct gc_merge_info {
struct btree *b;
- unsigned keys;
+ unsigned int keys;
};
-static int bch_btree_insert_node(struct btree *, struct btree_op *,
- struct keylist *, atomic_t *, struct bkey *);
+static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
+ struct keylist *insert_keys,
+ atomic_t *journal_ref,
+ struct bkey *replace_key);
static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
struct gc_stat *gc, struct gc_merge_info *r)
{
- unsigned i, nodes = 0, keys = 0, blocks;
+ unsigned int i, nodes = 0, keys = 0, blocks;
struct btree *new_nodes[GC_MERGE_NODES];
struct keylist keylist;
struct closure cl;
@@ -1508,11 +1520,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
return -EINTR;
}
-static unsigned btree_gc_count_keys(struct btree *b)
+static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
struct btree_iter iter;
- unsigned ret = 0;
+ unsigned int ret = 0;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
ret += bkey_u64s(k);
@@ -1520,6 +1532,32 @@ static unsigned btree_gc_count_keys(struct btree *b)
return ret;
}
+static size_t btree_gc_min_nodes(struct cache_set *c)
+{
+ size_t min_nodes;
+
+ /*
+ * Since incremental GC would stop 100ms when front
+ * side I/O comes, so when there are many btree nodes,
+ * if GC only processes constant (100) nodes each time,
+ * GC would last a long time, and the front side I/Os
+ * would run out of the buckets (since no new bucket
+ * can be allocated during GC), and be blocked again.
+ * So GC should not process constant nodes, but varied
+ * nodes according to the number of btree nodes, which
+ * realized by dividing GC into constant(100) times,
+ * so when there are many btree nodes, GC can process
+ * more nodes each time, otherwise, GC will process less
+ * nodes each time (but no less than MIN_GC_NODES)
+ */
+ min_nodes = c->gc_stats.nodes / MAX_GC_TIMES;
+ if (min_nodes < MIN_GC_NODES)
+ min_nodes = MIN_GC_NODES;
+
+ return min_nodes;
+}
+
+
static int btree_gc_recurse(struct btree *b, struct btree_op *op,
struct closure *writes, struct gc_stat *gc)
{
@@ -1585,6 +1623,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
r->b = NULL;
+ if (atomic_read(&b->c->search_inflight) &&
+ gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) {
+ gc->nodes_pre = gc->nodes;
+ ret = -EAGAIN;
+ break;
+ }
+
if (need_resched()) {
ret = -EAGAIN;
break;
@@ -1642,7 +1687,7 @@ static void btree_gc_start(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
if (!c->gc_mark_valid)
return;
@@ -1668,7 +1713,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
{
struct bucket *b;
struct cache *ca;
- unsigned i;
+ unsigned int i;
mutex_lock(&c->bucket_lock);
@@ -1686,7 +1731,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
struct bcache_device *d = c->devices[i];
struct cached_dev *dc;
struct keybuf_key *w, *n;
- unsigned j;
+ unsigned int j;
if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
continue;
@@ -1753,7 +1798,10 @@ static void bch_btree_gc(struct cache_set *c)
closure_sync(&writes);
cond_resched();
- if (ret && ret != -EAGAIN)
+ if (ret == -EAGAIN)
+ schedule_timeout_interruptible(msecs_to_jiffies
+ (GC_SLEEP_MS));
+ else if (ret)
pr_warn("gc failed!");
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -1775,7 +1823,7 @@ static void bch_btree_gc(struct cache_set *c)
static bool gc_should_run(struct cache_set *c)
{
struct cache *ca;
- unsigned i;
+ unsigned int i;
for_each_cache(ca, c, i)
if (ca->invalidate_needs_gc)
@@ -1834,8 +1882,14 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
do {
k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad);
- if (k)
+ if (k) {
btree_node_prefetch(b, k);
+ /*
+ * initiallize c->gc_stats.nodes
+ * for incremental GC
+ */
+ b->c->gc_stats.nodes++;
+ }
if (p)
ret = btree(check_recurse, p, b, op);
@@ -1860,7 +1914,7 @@ void bch_initial_gc_finish(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
bch_btree_gc_finish(c);
@@ -1900,7 +1954,7 @@ void bch_initial_gc_finish(struct cache_set *c)
static bool btree_insert_key(struct btree *b, struct bkey *k,
struct bkey *replace_key)
{
- unsigned status;
+ unsigned int status;
BUG_ON(bkey_cmp(k, &b->key) > 0);
@@ -1999,7 +2053,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
if (split) {
- unsigned keys = 0;
+ unsigned int keys = 0;
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
@@ -2177,10 +2231,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
rw_lock(true, b, b->level);
if (b->key.ptr[0] != btree_ptr ||
- b->seq != seq + 1) {
+ b->seq != seq + 1) {
op->lock = b->level;
goto out;
- }
+ }
}
SET_KEY_PTRS(check_key, 1);
@@ -2255,7 +2309,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
void bch_btree_set_root(struct btree *b)
{
- unsigned i;
+ unsigned int i;
struct closure cl;
closure_init_stack(&cl);
@@ -2367,7 +2421,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
struct refill {
struct btree_op op;
- unsigned nr_found;
+ unsigned int nr_found;
struct keybuf *buf;
struct bkey *end;
keybuf_pred_fn *pred;
@@ -2443,6 +2497,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
if (!RB_EMPTY_ROOT(&buf->keys)) {
struct keybuf_key *w;
+
w = RB_FIRST(&buf->keys, struct keybuf_key, node);
buf->start = START_KEY(&w->key);
@@ -2474,6 +2529,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
{
bool ret = false;
struct keybuf_key *p, *w, s;
+
s.key = *start;
if (bkey_cmp(end, &buf->start) <= 0 ||
@@ -2500,6 +2556,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
{
struct keybuf_key *w;
+
spin_lock(&buf->lock);
w = RB_FIRST(&buf->keys, struct keybuf_key, node);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index d211e2c25b6b..a68d6c55783b 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -152,7 +152,7 @@ static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
\
static inline void set_btree_node_ ## flag(struct btree *b) \
-{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \
+{ set_bit(BTREE_NODE_ ## flag, &b->flags); }
enum btree_flags {
BTREE_NODE_io_error,
@@ -184,7 +184,7 @@ static inline struct bset *btree_bset_last(struct btree *b)
return bset_tree_last(&b->keys)->data;
}
-static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
+static inline unsigned int bset_block_offset(struct btree *b, struct bset *i)
{
return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
}
@@ -213,7 +213,7 @@ struct btree_op {
/* Btree level at which we start taking write locks */
short lock;
- unsigned insert_collision:1;
+ unsigned int insert_collision:1;
};
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
@@ -238,26 +238,28 @@ static inline void rw_unlock(bool w, struct btree *b)
(w ? up_write : up_read)(&b->lock);
}
-void bch_btree_node_read_done(struct btree *);
-void __bch_btree_node_write(struct btree *, struct closure *);
-void bch_btree_node_write(struct btree *, struct closure *);
-
-void bch_btree_set_root(struct btree *);
-struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
- int, bool, struct btree *);
-struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
- struct bkey *, int, bool, struct btree *);
-
-int bch_btree_insert_check_key(struct btree *, struct btree_op *,
- struct bkey *);
-int bch_btree_insert(struct cache_set *, struct keylist *,
- atomic_t *, struct bkey *);
-
-int bch_gc_thread_start(struct cache_set *);
-void bch_initial_gc_finish(struct cache_set *);
-void bch_moving_gc(struct cache_set *);
-int bch_btree_check(struct cache_set *);
-void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
+void bch_btree_node_read_done(struct btree *b);
+void __bch_btree_node_write(struct btree *b, struct closure *parent);
+void bch_btree_node_write(struct btree *b, struct closure *parent);
+
+void bch_btree_set_root(struct btree *b);
+struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ int level, bool wait,
+ struct btree *parent);
+struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
+ struct bkey *k, int level, bool write,
+ struct btree *parent);
+
+int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
+ struct bkey *check_key);
+int bch_btree_insert(struct cache_set *c, struct keylist *keys,
+ atomic_t *journal_ref, struct bkey *replace_key);
+
+int bch_gc_thread_start(struct cache_set *c);
+void bch_initial_gc_finish(struct cache_set *c);
+void bch_moving_gc(struct cache_set *c);
+int bch_btree_check(struct cache_set *c);
+void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k);
static inline void wake_up_gc(struct cache_set *c)
{
@@ -272,9 +274,9 @@ static inline void wake_up_gc(struct cache_set *c)
#define MAP_END_KEY 1
-typedef int (btree_map_nodes_fn)(struct btree_op *, struct btree *);
-int __bch_btree_map_nodes(struct btree_op *, struct cache_set *,
- struct bkey *, btree_map_nodes_fn *, int);
+typedef int (btree_map_nodes_fn)(struct btree_op *b_op, struct btree *b);
+int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
+ struct bkey *from, btree_map_nodes_fn *fn, int flags);
static inline int bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_nodes_fn *fn)
@@ -290,21 +292,23 @@ static inline int bch_btree_map_leaf_nodes(struct btree_op *op,
return __bch_btree_map_nodes(op, c, from, fn, MAP_LEAF_NODES);
}
-typedef int (btree_map_keys_fn)(struct btree_op *, struct btree *,
- struct bkey *);
-int bch_btree_map_keys(struct btree_op *, struct cache_set *,
- struct bkey *, btree_map_keys_fn *, int);
-
-typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
-
-void bch_keybuf_init(struct keybuf *);
-void bch_refill_keybuf(struct cache_set *, struct keybuf *,
- struct bkey *, keybuf_pred_fn *);
-bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
- struct bkey *);
-void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
-struct keybuf_key *bch_keybuf_next(struct keybuf *);
-struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
- struct bkey *, keybuf_pred_fn *);
+typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
+ struct bkey *k);
+int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
+ struct bkey *from, btree_map_keys_fn *fn, int flags);
+
+typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k);
+
+void bch_keybuf_init(struct keybuf *buf);
+void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
+ struct bkey *end, keybuf_pred_fn *pred);
+bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
+ struct bkey *end);
+void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w);
+struct keybuf_key *bch_keybuf_next(struct keybuf *buf);
+struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
+ struct keybuf *buf,
+ struct bkey *end,
+ keybuf_pred_fn *pred);
void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats);
#endif
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 0e14969182c6..73f5319295bc 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Asynchronous refcounty things
*
@@ -162,12 +163,13 @@ static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
{
struct closure *cl;
+
spin_lock_irq(&closure_list_lock);
list_for_each_entry(cl, &closure_list, all) {
int r = atomic_read(&cl->remaining);
- seq_printf(f, "%p: %pF -> %pf p %p r %i ",
+ seq_printf(f, "%p: %pS -> %pS p %p r %i ",
cl, (void *) cl->ip, cl->fn, cl->parent,
r & CLOSURE_REMAINING_MASK);
@@ -177,7 +179,7 @@ static int debug_seq_show(struct seq_file *f, void *data)
r & CLOSURE_RUNNING ? "R" : "");
if (r & CLOSURE_WAITING)
- seq_printf(f, " W %pF\n",
+ seq_printf(f, " W %pS\n",
(void *) cl->waiting_on);
seq_printf(f, "\n");
@@ -199,11 +201,16 @@ static const struct file_operations debug_ops = {
.release = single_release
};
-int __init closure_debug_init(void)
+void __init closure_debug_init(void)
{
- closure_debug = debugfs_create_file("closures",
- 0400, bcache_debug, NULL, &debug_ops);
- return IS_ERR_OR_NULL(closure_debug);
+ if (!IS_ERR_OR_NULL(bcache_debug))
+ /*
+ * it is unnecessary to check return value of
+ * debugfs_create_file(), we should not care
+ * about this.
+ */
+ closure_debug = debugfs_create_file(
+ "closures", 0400, bcache_debug, NULL, &debug_ops);
}
#endif
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 71427eb5fdae..eca0d496b686 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -159,7 +159,7 @@ struct closure {
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
- unsigned magic;
+ unsigned int magic;
struct list_head all;
unsigned long ip;
unsigned long waiting_on;
@@ -186,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-int closure_debug_init(void);
+void closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
-static inline int closure_debug_init(void) { return 0; }
+static inline void closure_debug_init(void) {}
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}
@@ -289,10 +289,12 @@ static inline void closure_init_stack(struct closure *cl)
}
/**
- * closure_wake_up - wake up all closures on a wait list.
+ * closure_wake_up - wake up all closures on a wait list,
+ * with memory barrier
*/
static inline void closure_wake_up(struct closure_waitlist *list)
{
+ /* Memory barrier for the wait list */
smp_mb();
__closure_wake_up(list);
}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index d030ce3025a6..06da66b2488a 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -67,34 +67,35 @@ void bch_btree_verify(struct btree *b)
if (inmemory->keys != sorted->keys ||
memcmp(inmemory->start,
sorted->start,
- (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
+ (void *) bset_bkey_last(inmemory) -
+ (void *) inmemory->start)) {
struct bset *i;
- unsigned j;
+ unsigned int j;
console_lock();
- printk(KERN_ERR "*** in memory:\n");
+ pr_err("*** in memory:\n");
bch_dump_bset(&b->keys, inmemory, 0);
- printk(KERN_ERR "*** read back in:\n");
+ pr_err("*** read back in:\n");
bch_dump_bset(&v->keys, sorted, 0);
for_each_written_bset(b, ondisk, i) {
- unsigned block = ((void *) i - (void *) ondisk) /
+ unsigned int block = ((void *) i - (void *) ondisk) /
block_bytes(b->c);
- printk(KERN_ERR "*** on disk block %u:\n", block);
+ pr_err("*** on disk block %u:\n", block);
bch_dump_bset(&b->keys, i, block);
}
- printk(KERN_ERR "*** block %zu not written\n",
+ pr_err("*** block %zu not written\n",
((void *) i - (void *) ondisk) / block_bytes(b->c));
for (j = 0; j < inmemory->keys; j++)
if (inmemory->d[j] != sorted->d[j])
break;
- printk(KERN_ERR "b->written %u\n", b->written);
+ pr_err("b->written %u\n", b->written);
console_unlock();
panic("verify failed at %u\n", j);
@@ -110,11 +111,15 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_clone_kmalloc(bio, GFP_NOIO);
+ check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
if (!check)
return;
+ check->bi_disk = bio->bi_disk;
check->bi_opf = REQ_OP_READ;
+ check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
+ check->bi_iter.bi_size = bio->bi_iter.bi_size;
+ bch_bio_map(check, NULL);
if (bch_bio_alloc_pages(check, GFP_NOIO))
goto out_put;
@@ -172,9 +177,9 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
while (size) {
struct keybuf_key *w;
- unsigned bytes = min(i->bytes, size);
-
+ unsigned int bytes = min(i->bytes, size);
int err = copy_to_user(buf, i->buf, bytes);
+
if (err)
return err;
@@ -233,8 +238,8 @@ void bch_debug_init_cache_set(struct cache_set *c)
{
if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
- snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
+ snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
@@ -248,11 +253,12 @@ void bch_debug_exit(void)
debugfs_remove_recursive(bcache_debug);
}
-int __init bch_debug_init(struct kobject *kobj)
+void __init bch_debug_init(struct kobject *kobj)
{
- if (!IS_ENABLED(CONFIG_DEBUG_FS))
- return 0;
-
+ /*
+ * it is unnecessary to check return value of
+ * debugfs_create_file(), we should not care
+ * about this.
+ */
bcache_debug = debugfs_create_dir("bcache", NULL);
- return IS_ERR_OR_NULL(bcache_debug);
}
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index acc48d3fa274..fb3d4dff4b26 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -8,8 +8,8 @@ struct cache_set;
#ifdef CONFIG_BCACHE_DEBUG
-void bch_btree_verify(struct btree *);
-void bch_data_verify(struct cached_dev *, struct bio *);
+void bch_btree_verify(struct btree *b);
+void bch_data_verify(struct cached_dev *dc, struct bio *bio);
#define expensive_debug_checks(c) ((c)->expensive_debug_checks)
#define key_merging_disabled(c) ((c)->key_merging_disabled)
@@ -27,7 +27,7 @@ static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
#endif
#ifdef CONFIG_DEBUG_FS
-void bch_debug_init_cache_set(struct cache_set *);
+void bch_debug_init_cache_set(struct cache_set *c);
#else
static inline void bch_debug_init_cache_set(struct cache_set *c) {}
#endif
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 1d096742eb41..c809724e6571 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -46,7 +46,7 @@ static bool bch_key_sort_cmp(struct btree_iter_set l,
static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
@@ -67,7 +67,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
@@ -96,7 +96,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
{
- unsigned i = 0;
+ unsigned int i = 0;
char *out = buf, *end = buf + size;
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
@@ -126,22 +126,22 @@ void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
{
struct btree *b = container_of(keys, struct btree, keys);
- unsigned j;
+ unsigned int j;
char buf[80];
bch_extent_to_text(buf, sizeof(buf), k);
- printk(" %s", buf);
+ pr_err(" %s", buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
- printk(" bucket %zu", n);
+ pr_err(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
- printk(" prio %i",
+ pr_err(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
- printk(" %s\n", bch_ptr_status(b->c, k));
+ pr_err(" %s\n", bch_ptr_status(b->c, k));
}
/* Btree ptrs */
@@ -166,12 +166,13 @@ bad:
static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
+
return __bch_btree_ptr_invalid(b->c, k);
}
static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
{
- unsigned i;
+ unsigned int i;
char buf[80];
struct bucket *g;
@@ -204,7 +205,7 @@ err:
static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- unsigned i;
+ unsigned int i;
if (!bkey_cmp(k, &ZERO_KEY) ||
!KEY_PTRS(k) ||
@@ -327,13 +328,14 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
struct cache_set *c = container_of(b, struct btree, keys)->c;
uint64_t old_offset;
- unsigned old_size, sectors_found = 0;
+ unsigned int old_size, sectors_found = 0;
BUG_ON(!KEY_OFFSET(insert));
BUG_ON(!KEY_SIZE(insert));
while (1) {
struct bkey *k = bch_btree_iter_next(iter);
+
if (!k)
break;
@@ -363,7 +365,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
* k might have been split since we inserted/found the
* key we're replacing
*/
- unsigned i;
+ unsigned int i;
uint64_t offset = KEY_START(k) -
KEY_START(replace_key);
@@ -498,11 +500,12 @@ bad:
static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
+
return __bch_extent_invalid(b->c, k);
}
static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
struct bucket *g = PTR_BUCKET(b->c, k, ptr);
char buf[80];
@@ -534,7 +537,7 @@ err:
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- unsigned i, stale;
+ unsigned int i, stale;
if (!KEY_PTRS(k) ||
bch_extent_invalid(bk, k))
@@ -574,10 +577,12 @@ static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
~((uint64_t)1 << 63);
}
-static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r)
+static bool bch_extent_merge(struct btree_keys *bk,
+ struct bkey *l,
+ struct bkey *r)
{
struct btree *b = container_of(bk, struct btree, keys);
- unsigned i;
+ unsigned int i;
if (key_merging_disabled(b->c))
return false;
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index 0cd3575afa1d..4d667e05bb73 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -8,8 +8,8 @@ extern const struct btree_keys_ops bch_extent_keys_ops;
struct bkey;
struct cache_set;
-void bch_extent_to_text(char *, size_t, const struct bkey *);
-bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
-bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
+void bch_extent_to_text(char *buf, size_t size, const struct bkey *k);
+bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k);
+bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k);
#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9612873afee2..c25097968319 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -17,6 +17,7 @@
void bch_bbio_free(struct bio *bio, struct cache_set *c)
{
struct bbio *b = container_of(bio, struct bbio, bio);
+
mempool_free(b, &c->bio_meta);
}
@@ -42,9 +43,10 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
}
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- struct bkey *k, unsigned ptr)
+ struct bkey *k, unsigned int ptr)
{
struct bbio *b = container_of(bio, struct bbio, bio);
+
bch_bkey_copy_single_ptr(&b->key, k, ptr);
__bch_submit_bbio(bio, c);
}
@@ -52,7 +54,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
/* IO errors */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
{
- unsigned errors;
+ unsigned int errors;
WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
@@ -75,16 +77,16 @@ void bch_count_io_errors(struct cache *ca,
*/
if (ca->set->error_decay) {
- unsigned count = atomic_inc_return(&ca->io_count);
+ unsigned int count = atomic_inc_return(&ca->io_count);
while (count > ca->set->error_decay) {
- unsigned errors;
- unsigned old = count;
- unsigned new = count - ca->set->error_decay;
+ unsigned int errors;
+ unsigned int old = count;
+ unsigned int new = count - ca->set->error_decay;
/*
* First we subtract refresh from count; each time we
- * succesfully do so, we rescale the errors once:
+ * successfully do so, we rescale the errors once:
*/
count = atomic_cmpxchg(&ca->io_count, old, new);
@@ -104,7 +106,7 @@ void bch_count_io_errors(struct cache *ca,
}
if (error) {
- unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
+ unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT,
&ca->io_errors);
errors >>= IO_ERROR_SHIFT;
@@ -126,18 +128,18 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
struct cache *ca = PTR_CACHE(c, &b->key, 0);
int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
- unsigned threshold = op_is_write(bio_op(bio))
+ unsigned int threshold = op_is_write(bio_op(bio))
? c->congested_write_threshold_us
: c->congested_read_threshold_us;
if (threshold) {
- unsigned t = local_clock_us();
-
+ unsigned int t = local_clock_us();
int us = t - b->submit_time_us;
int congested = atomic_read(&c->congested);
if (us > (int) threshold) {
int ms = us / 1024;
+
c->congested_last_us = t;
ms = min(ms, CONGESTED_MAX + congested);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 18f1b5239620..6116bbf870d8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -28,11 +28,12 @@
static void journal_read_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
+
closure_put(cl);
}
static int journal_read_bucket(struct cache *ca, struct list_head *list,
- unsigned bucket_index)
+ unsigned int bucket_index)
{
struct journal_device *ja = &ca->journal;
struct bio *bio = &ja->bio;
@@ -40,7 +41,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
struct journal_replay *i;
struct jset *j, *data = ca->set->journal.w[0].data;
struct closure cl;
- unsigned len, left, offset = 0;
+ unsigned int len, left, offset = 0;
int ret = 0;
sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
@@ -50,7 +51,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
- len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS);
+ len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
bio_reset(bio);
bio->bi_iter.bi_sector = bucket + offset;
@@ -154,12 +155,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
})
struct cache *ca;
- unsigned iter;
+ unsigned int iter;
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
- unsigned i, l, r, m;
+ unsigned int i, l, r, m;
uint64_t seq;
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
@@ -192,7 +193,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
l < ca->sb.njournal_buckets;
- l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
+ l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets,
+ l + 1))
if (read_bucket(l))
goto bsearch;
@@ -304,7 +306,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
k < bset_bkey_last(&i->j);
k = bkey_next(k))
if (!__bch_extent_invalid(c, k)) {
- unsigned j;
+ unsigned int j;
for (j = 0; j < KEY_PTRS(k); j++)
if (ptr_available(c, k, j))
@@ -492,7 +494,7 @@ static void journal_reclaim(struct cache_set *c)
struct bkey *k = &c->journal.key;
struct cache *ca;
uint64_t last_seq;
- unsigned iter, n = 0;
+ unsigned int iter, n = 0;
atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
@@ -526,7 +528,7 @@ static void journal_reclaim(struct cache_set *c)
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
- unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
+ unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
/* No space available on this device */
if (next == ja->discard_idx)
@@ -580,7 +582,7 @@ static void journal_write_endio(struct bio *bio)
closure_put(&w->c->journal.io);
}
-static void journal_write(struct closure *);
+static void journal_write(struct closure *cl);
static void journal_write_done(struct closure *cl)
{
@@ -609,11 +611,12 @@ static void journal_write_unlocked(struct closure *cl)
struct cache *ca;
struct journal_write *w = c->journal.cur;
struct bkey *k = &c->journal.key;
- unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
+ unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) *
c->sb.block_size;
struct bio *bio;
struct bio_list list;
+
bio_list_init(&list);
if (!w->need_write) {
@@ -705,7 +708,7 @@ static void journal_try_write(struct cache_set *c)
}
static struct journal_write *journal_wait_for_write(struct cache_set *c,
- unsigned nkeys)
+ unsigned int nkeys)
__acquires(&c->journal.lock)
{
size_t sectors;
@@ -828,6 +831,7 @@ void bch_journal_free(struct cache_set *c)
free_pages((unsigned long) c->journal.w[1].data, JSET_BITS);
free_pages((unsigned long) c->journal.w[0].data, JSET_BITS);
free_fifo(&c->journal.pin);
+ free_heap(&c->flush_btree);
}
int bch_journal_alloc(struct cache_set *c)
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index b5788199188f..66f0facff84b 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -110,7 +110,7 @@ struct journal {
struct delayed_work work;
/* Number of blocks free in the bucket(s) we're currently writing to */
- unsigned blocks_free;
+ unsigned int blocks_free;
uint64_t seq;
DECLARE_FIFO(atomic_t, pin);
@@ -131,13 +131,13 @@ struct journal_device {
uint64_t seq[SB_JOURNAL_BUCKETS];
/* Journal bucket we're currently writing to */
- unsigned cur_idx;
+ unsigned int cur_idx;
/* Last journal bucket that still contains an open journal entry */
- unsigned last_idx;
+ unsigned int last_idx;
/* Next journal bucket to be discarded */
- unsigned discard_idx;
+ unsigned int discard_idx;
#define DISCARD_READY 0
#define DISCARD_IN_FLIGHT 1
@@ -167,14 +167,16 @@ struct cache_set;
struct btree_op;
struct keylist;
-atomic_t *bch_journal(struct cache_set *, struct keylist *, struct closure *);
-void bch_journal_next(struct journal *);
-void bch_journal_mark(struct cache_set *, struct list_head *);
-void bch_journal_meta(struct cache_set *, struct closure *);
-int bch_journal_read(struct cache_set *, struct list_head *);
-int bch_journal_replay(struct cache_set *, struct list_head *);
-
-void bch_journal_free(struct cache_set *);
-int bch_journal_alloc(struct cache_set *);
+atomic_t *bch_journal(struct cache_set *c,
+ struct keylist *keys,
+ struct closure *parent);
+void bch_journal_next(struct journal *j);
+void bch_journal_mark(struct cache_set *c, struct list_head *list);
+void bch_journal_meta(struct cache_set *c, struct closure *cl);
+int bch_journal_read(struct cache_set *c, struct list_head *list);
+int bch_journal_replay(struct cache_set *c, struct list_head *list);
+
+void bch_journal_free(struct cache_set *c);
+int bch_journal_alloc(struct cache_set *c);
#endif /* _BCACHE_JOURNAL_H */
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index a24c3a95b2c0..7891fb512736 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -23,7 +23,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
{
struct cache_set *c = container_of(buf, struct cache_set,
moving_gc_keys);
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
@@ -38,6 +38,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
static void moving_io_destructor(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
+
kfree(io);
}
@@ -186,9 +187,10 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r)
return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
}
-static unsigned bucket_heap_top(struct cache *ca)
+static unsigned int bucket_heap_top(struct cache *ca)
{
struct bucket *b;
+
return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
}
@@ -196,7 +198,7 @@ void bch_moving_gc(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
if (!c->copy_gc_enabled)
return;
@@ -204,9 +206,9 @@ void bch_moving_gc(struct cache_set *c)
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i) {
- unsigned sectors_to_move = 0;
- unsigned reserve_sectors = ca->sb.bucket_size *
- fifo_used(&ca->free[RESERVE_MOVINGGC]);
+ unsigned int sectors_to_move = 0;
+ unsigned int reserve_sectors = ca->sb.bucket_size *
+ fifo_used(&ca->free[RESERVE_MOVINGGC]);
ca->heap.used = 0;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ae67f5fa8047..51be355a3309 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -25,9 +25,9 @@
struct kmem_cache *bch_search_cache;
-static void bch_data_insert_start(struct closure *);
+static void bch_data_insert_start(struct closure *cl);
-static unsigned cache_mode(struct cached_dev *dc)
+static unsigned int cache_mode(struct cached_dev *dc)
{
return BDEV_CACHE_MODE(&dc->sb);
}
@@ -45,6 +45,7 @@ static void bio_csum(struct bio *bio, struct bkey *k)
bio_for_each_segment(bv, bio, iter) {
void *d = kmap(bv.bv_page) + bv.bv_offset;
+
csum = bch_crc64_update(csum, d, bv.bv_len);
kunmap(bv.bv_page);
}
@@ -98,7 +99,7 @@ static void bch_data_insert_keys(struct closure *cl)
closure_return(cl);
}
-static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
+static int bch_keylist_realloc(struct keylist *l, unsigned int u64s,
struct cache_set *c)
{
size_t oldsize = bch_keylist_nkeys(l);
@@ -107,7 +108,7 @@ static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
/*
* The journalling code doesn't handle the case where the keys to insert
* is bigger than an empty write: If we just return -ENOMEM here,
- * bio_insert() and bio_invalidate() will insert the keys created so far
+ * bch_data_insert_keys() will insert the keys created so far
* and finish the rest when the keylist is empty.
*/
if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
@@ -125,7 +126,7 @@ static void bch_data_invalidate(struct closure *cl)
bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
while (bio_sectors(bio)) {
- unsigned sectors = min(bio_sectors(bio),
+ unsigned int sectors = min(bio_sectors(bio),
1U << (KEY_SIZE_BITS - 1));
if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
@@ -135,7 +136,9 @@ static void bch_data_invalidate(struct closure *cl)
bio->bi_iter.bi_size -= sectors << 9;
bch_keylist_add(&op->insert_keys,
- &KEY(op->inode, bio->bi_iter.bi_sector, sectors));
+ &KEY(op->inode,
+ bio->bi_iter.bi_sector,
+ sectors));
}
op->insert_data_done = true;
@@ -151,7 +154,7 @@ static void bch_data_insert_error(struct closure *cl)
/*
* Our data write just errored, which means we've got a bunch of keys to
- * insert that point to data that wasn't succesfully written.
+ * insert that point to data that wasn't successfully written.
*
* We don't have to insert those keys but we still have to invalidate
* that region of the cache - so, if we just strip off all the pointers
@@ -211,7 +214,7 @@ static void bch_data_insert_start(struct closure *cl)
bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
do {
- unsigned i;
+ unsigned int i;
struct bkey *k;
struct bio_set *split = &op->c->bio_split;
@@ -328,7 +331,7 @@ void bch_data_insert(struct closure *cl)
/* Congested? */
-unsigned bch_get_congested(struct cache_set *c)
+unsigned int bch_get_congested(struct cache_set *c)
{
int i;
long rand;
@@ -372,8 +375,8 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
{
struct cache_set *c = dc->disk.c;
- unsigned mode = cache_mode(dc);
- unsigned sectors, congested = bch_get_congested(c);
+ unsigned int mode = cache_mode(dc);
+ unsigned int sectors, congested = bch_get_congested(c);
struct task_struct *task = current;
struct io *i;
@@ -469,11 +472,11 @@ struct search {
struct bio *cache_miss;
struct bcache_device *d;
- unsigned insert_bio_sectors;
- unsigned recoverable:1;
- unsigned write:1;
- unsigned read_dirty_data:1;
- unsigned cache_missed:1;
+ unsigned int insert_bio_sectors;
+ unsigned int recoverable:1;
+ unsigned int write:1;
+ unsigned int read_dirty_data:1;
+ unsigned int cache_missed:1;
unsigned long start_time;
@@ -514,20 +517,20 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
struct search *s = container_of(op, struct search, op);
struct bio *n, *bio = &s->bio.bio;
struct bkey *bio_key;
- unsigned ptr;
+ unsigned int ptr;
if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
return MAP_CONTINUE;
if (KEY_INODE(k) != s->iop.inode ||
KEY_START(k) > bio->bi_iter.bi_sector) {
- unsigned bio_sectors = bio_sectors(bio);
- unsigned sectors = KEY_INODE(k) == s->iop.inode
+ unsigned int bio_sectors = bio_sectors(bio);
+ unsigned int sectors = KEY_INODE(k) == s->iop.inode
? min_t(uint64_t, INT_MAX,
KEY_START(k) - bio->bi_iter.bi_sector)
: INT_MAX;
-
int ret = s->d->cache_miss(b, s, bio, sectors);
+
if (ret != MAP_CONTINUE)
return ret;
@@ -623,6 +626,7 @@ static void request_endio(struct bio *bio)
if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
+
s->iop.status = bio->bi_status;
/* Only cache read errors are recoverable */
s->recoverable = false;
@@ -667,8 +671,7 @@ static void backing_request_endio(struct bio *bio)
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
- generic_end_io_acct(s->d->disk->queue,
- bio_data_dir(s->orig_bio),
+ generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio),
&s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
@@ -702,6 +705,8 @@ static void search_free(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
+ atomic_dec(&s->d->c->search_inflight);
+
if (s->iop.bio)
bio_put(s->iop.bio);
@@ -719,6 +724,7 @@ static inline struct search *search_alloc(struct bio *bio,
closure_init(&s->cl, NULL);
do_bio_hook(s, bio, request_endio);
+ atomic_inc(&d->c->search_inflight);
s->orig_bio = bio;
s->cache_miss = NULL;
@@ -811,7 +817,8 @@ static void cached_dev_read_done(struct closure *cl)
if (s->iop.bio) {
bio_reset(s->iop.bio);
- s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector;
+ s->iop.bio->bi_iter.bi_sector =
+ s->cache_miss->bi_iter.bi_sector;
bio_copy_dev(s->iop.bio, s->cache_miss);
s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
bch_bio_map(s->iop.bio, NULL);
@@ -854,10 +861,10 @@ static void cached_dev_read_done_bh(struct closure *cl)
}
static int cached_dev_cache_miss(struct btree *b, struct search *s,
- struct bio *bio, unsigned sectors)
+ struct bio *bio, unsigned int sectors)
{
int ret = MAP_CONTINUE;
- unsigned reada = 0;
+ unsigned int reada = 0;
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
@@ -1062,8 +1069,7 @@ static void detached_dev_end_io(struct bio *bio)
bio->bi_end_io = ddip->bi_end_io;
bio->bi_private = ddip->bi_private;
- generic_end_io_acct(ddip->d->disk->queue,
- bio_data_dir(bio),
+ generic_end_io_acct(ddip->d->disk->queue, bio_op(bio),
&ddip->d->disk->part0, ddip->start_time);
if (bio->bi_status) {
@@ -1102,6 +1108,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
generic_make_request(bio);
}
+static void quit_max_writeback_rate(struct cache_set *c,
+ struct cached_dev *this_dc)
+{
+ int i;
+ struct bcache_device *d;
+ struct cached_dev *dc;
+
+ /*
+ * mutex bch_register_lock may compete with other parallel requesters,
+ * or attach/detach operations on other backing device. Waiting to
+ * the mutex lock may increase I/O request latency for seconds or more.
+ * To avoid such situation, if mutext_trylock() failed, only writeback
+ * rate of current cached device is set to 1, and __update_write_back()
+ * will decide writeback rate of other cached devices (remember now
+ * c->idle_counter is 0 already).
+ */
+ if (mutex_trylock(&bch_register_lock)) {
+ for (i = 0; i < c->devices_max_used; i++) {
+ if (!c->devices[i])
+ continue;
+
+ if (UUID_FLASH_ONLY(&c->uuids[i]))
+ continue;
+
+ d = c->devices[i];
+ dc = container_of(d, struct cached_dev, disk);
+ /*
+ * set writeback rate to default minimum value,
+ * then let update_writeback_rate() to decide the
+ * upcoming rate.
+ */
+ atomic_long_set(&dc->writeback_rate.rate, 1);
+ }
+ mutex_unlock(&bch_register_lock);
+ } else
+ atomic_long_set(&this_dc->writeback_rate.rate, 1);
+}
+
/* Cached devices - read & write stuff */
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -1119,8 +1163,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
return BLK_QC_T_NONE;
}
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+ if (likely(d->c)) {
+ if (atomic_read(&d->c->idle_counter))
+ atomic_set(&d->c->idle_counter, 0);
+ /*
+ * If at_max_writeback_rate of cache set is true and new I/O
+ * comes, quit max writeback rate of all cached devices
+ * attached to this cache set, and set at_max_writeback_rate
+ * to false.
+ */
+ if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
+ atomic_set(&d->c->at_max_writeback_rate, 0);
+ quit_max_writeback_rate(d->c, dc);
+ }
+ }
+
+ generic_start_io_acct(q,
+ bio_op(bio),
+ bio_sectors(bio),
+ &d->disk->part0);
bio_set_dev(bio, dc->bdev);
bio->bi_iter.bi_sector += dc->sb.data_offset;
@@ -1156,6 +1217,7 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
}
@@ -1170,7 +1232,7 @@ static int cached_dev_congested(void *data, int bits)
return 1;
if (cached_dev_get(dc)) {
- unsigned i;
+ unsigned int i;
struct cache *ca;
for_each_cache(ca, d->c, i) {
@@ -1197,9 +1259,9 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
/* Flash backed devices */
static int flash_dev_cache_miss(struct btree *b, struct search *s,
- struct bio *bio, unsigned sectors)
+ struct bio *bio, unsigned int sectors)
{
- unsigned bytes = min(sectors, bio_sectors(bio)) << 9;
+ unsigned int bytes = min(sectors, bio_sectors(bio)) << 9;
swap(bio->bi_iter.bi_size, bytes);
zero_fill_bio(bio);
@@ -1229,7 +1291,6 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct search *s;
struct closure *cl;
struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
bio->bi_status = BLK_STS_IOERR;
@@ -1237,7 +1298,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
return BLK_QC_T_NONE;
}
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+ generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);
cl = &s->cl;
@@ -1254,7 +1315,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
flash_dev_nodata,
bcache_wq);
return BLK_QC_T_NONE;
- } else if (rw) {
+ } else if (bio_data_dir(bio)) {
bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
&KEY(d->id, bio->bi_iter.bi_sector, 0),
&KEY(d->id, bio_end_sector(bio), 0));
@@ -1283,7 +1344,7 @@ static int flash_dev_congested(void *data, int bits)
struct bcache_device *d = data;
struct request_queue *q;
struct cache *ca;
- unsigned i;
+ unsigned int i;
int ret = 0;
for_each_cache(ca, d->c, i) {
@@ -1306,8 +1367,7 @@ void bch_flash_dev_request_init(struct bcache_device *d)
void bch_request_exit(void)
{
- if (bch_search_cache)
- kmem_cache_destroy(bch_search_cache);
+ kmem_cache_destroy(bch_search_cache);
}
int __init bch_request_init(void)
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index dea0886b81c1..aa055cfeb099 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -8,7 +8,7 @@ struct data_insert_op {
struct bio *bio;
struct workqueue_struct *wq;
- unsigned inode;
+ unsigned int inode;
uint16_t write_point;
uint16_t write_prio;
blk_status_t status;
@@ -17,15 +17,15 @@ struct data_insert_op {
uint16_t flags;
struct {
- unsigned bypass:1;
- unsigned writeback:1;
- unsigned flush_journal:1;
- unsigned csum:1;
+ unsigned int bypass:1;
+ unsigned int writeback:1;
+ unsigned int flush_journal:1;
+ unsigned int csum:1;
- unsigned replace:1;
- unsigned replace_collision:1;
+ unsigned int replace:1;
+ unsigned int replace_collision:1;
- unsigned insert_data_done:1;
+ unsigned int insert_data_done:1;
};
};
@@ -33,7 +33,7 @@ struct data_insert_op {
BKEY_PADDED(replace_key);
};
-unsigned bch_get_congested(struct cache_set *);
+unsigned int bch_get_congested(struct cache_set *c);
void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index be119326297b..894410f3f829 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -33,11 +33,11 @@
* stored left shifted by 16, and scaled back in the sysfs show() function.
*/
-static const unsigned DAY_RESCALE = 288;
-static const unsigned HOUR_RESCALE = 12;
-static const unsigned FIVE_MINUTE_RESCALE = 1;
-static const unsigned accounting_delay = (HZ * 300) / 22;
-static const unsigned accounting_weight = 32;
+static const unsigned int DAY_RESCALE = 288;
+static const unsigned int HOUR_RESCALE = 12;
+static const unsigned int FIVE_MINUTE_RESCALE = 1;
+static const unsigned int accounting_delay = (HZ * 300) / 22;
+static const unsigned int accounting_weight = 32;
/* sysfs reading/writing */
@@ -152,7 +152,7 @@ static void scale_accounting(struct timer_list *t)
struct cache_accounting *acc = from_timer(acc, t, timer);
#define move_stat(name) do { \
- unsigned t = atomic_xchg(&acc->collector.name, 0); \
+ unsigned int t = atomic_xchg(&acc->collector.name, 0); \
t <<= 16; \
acc->five_minute.name += t; \
acc->hour.name += t; \
@@ -200,6 +200,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
bool hit, bool bypass)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
mark_cache_stats(&dc->accounting.collector, hit, bypass);
mark_cache_stats(&c->accounting.collector, hit, bypass);
}
@@ -207,6 +208,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
atomic_inc(&dc->accounting.collector.cache_readaheads);
atomic_inc(&c->accounting.collector.cache_readaheads);
}
@@ -214,6 +216,7 @@ void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
atomic_inc(&dc->accounting.collector.cache_miss_collisions);
atomic_inc(&c->accounting.collector.cache_miss_collisions);
}
diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
index 0b70f9de0c03..abfaabf7e7fc 100644
--- a/drivers/md/bcache/stats.h
+++ b/drivers/md/bcache/stats.h
@@ -23,7 +23,7 @@ struct cache_stats {
unsigned long cache_miss_collisions;
unsigned long sectors_bypassed;
- unsigned rescale;
+ unsigned int rescale;
};
struct cache_accounting {
@@ -53,10 +53,13 @@ void bch_cache_accounting_clear(struct cache_accounting *acc);
void bch_cache_accounting_destroy(struct cache_accounting *acc);
-void bch_mark_cache_accounting(struct cache_set *, struct bcache_device *,
- bool, bool);
-void bch_mark_cache_readahead(struct cache_set *, struct bcache_device *);
-void bch_mark_cache_miss_collision(struct cache_set *, struct bcache_device *);
-void bch_mark_sectors_bypassed(struct cache_set *, struct cached_dev *, int);
+void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
+ bool hit, bool bypass);
+void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d);
+void bch_mark_cache_miss_collision(struct cache_set *c,
+ struct bcache_device *d);
+void bch_mark_sectors_bypassed(struct cache_set *c,
+ struct cached_dev *dc,
+ int sectors);
#endif /* _BCACHE_STATS_H_ */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index fa4058e43202..94c756c66bd7 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bcache setup/teardown code, and some metadata io - read a superblock and
* figure out what to do with it.
@@ -61,7 +62,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
const char *err;
struct cache_sb *s;
struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
- unsigned i;
+ unsigned int i;
if (!bh)
return "IO error";
@@ -149,7 +150,8 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
goto err;
err = "Invalid superblock: device too small";
- if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets)
+ if (get_capacity(bdev->bd_disk) <
+ sb->bucket_size * sb->nbuckets)
goto err;
err = "Bad UUID";
@@ -181,7 +183,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
goto err;
}
- sb->last_mount = get_seconds();
+ sb->last_mount = (u32)ktime_get_real_seconds();
err = NULL;
get_page(bh->b_page);
@@ -202,7 +204,7 @@ static void write_bdev_super_endio(struct bio *bio)
static void __write_super(struct cache_sb *sb, struct bio *bio)
{
struct cache_sb *out = page_address(bio_first_page_all(bio));
- unsigned i;
+ unsigned int i;
bio->bi_iter.bi_sector = SB_SECTOR;
bio->bi_iter.bi_size = SB_SIZE;
@@ -282,7 +284,7 @@ void bcache_write_super(struct cache_set *c)
{
struct closure *cl = &c->sb_write;
struct cache *ca;
- unsigned i;
+ unsigned int i;
down(&c->sb_write_mutex);
closure_init(cl, &c->cl);
@@ -334,7 +336,7 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
{
struct closure *cl = &c->uuid_write;
struct uuid_entry *u;
- unsigned i;
+ unsigned int i;
char buf[80];
BUG_ON(!parent);
@@ -415,8 +417,8 @@ static int __uuid_write(struct cache_set *c)
{
BKEY_PADDED(key) k;
struct closure cl;
- closure_init_stack(&cl);
+ closure_init_stack(&cl);
lockdep_assert_held(&bch_register_lock);
if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
@@ -456,6 +458,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
static struct uuid_entry *uuid_find_empty(struct cache_set *c)
{
static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
return uuid_find(c, zero_uuid);
}
@@ -463,8 +466,8 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c)
* Bucket priorities/gens:
*
* For each bucket, we store on disk its
- * 8 bit gen
- * 16 bit priority
+ * 8 bit gen
+ * 16 bit priority
*
* See alloc.c for an explanation of the gen. The priority is used to implement
* lru (and in the future other) cache replacement policies; for most purposes
@@ -587,7 +590,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket *b;
- unsigned bucket_nr = 0;
+ unsigned int bucket_nr = 0;
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets;
@@ -599,7 +602,8 @@ static void prio_read(struct cache *ca, uint64_t bucket)
prio_io(ca, bucket, REQ_OP_READ, 0);
- if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8))
+ if (p->csum !=
+ bch_crc64(&p->magic, bucket_bytes(ca) - 8))
pr_warn("bad csum reading priorities");
if (p->magic != pset_magic(&ca->sb))
@@ -619,6 +623,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
static int open_dev(struct block_device *b, fmode_t mode)
{
struct bcache_device *d = b->bd_disk->private_data;
+
if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
return -ENXIO;
@@ -629,6 +634,7 @@ static int open_dev(struct block_device *b, fmode_t mode)
static void release_dev(struct gendisk *b, fmode_t mode)
{
struct bcache_device *d = b->private_data;
+
closure_put(&d->cl);
}
@@ -662,7 +668,7 @@ static void bcache_device_unlink(struct bcache_device *d)
lockdep_assert_held(&bch_register_lock);
if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
- unsigned i;
+ unsigned int i;
struct cache *ca;
sysfs_remove_link(&d->c->kobj, d->name);
@@ -676,7 +682,7 @@ static void bcache_device_unlink(struct bcache_device *d)
static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
const char *name)
{
- unsigned i;
+ unsigned int i;
struct cache *ca;
for_each_cache(ca, d->c, i)
@@ -696,12 +702,14 @@ static void bcache_device_detach(struct bcache_device *d)
{
lockdep_assert_held(&bch_register_lock);
+ atomic_dec(&d->c->attached_dev_nr);
+
if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
struct uuid_entry *u = d->c->uuids + d->id;
SET_UUID_FLASH_ONLY(u, 0);
memcpy(u->uuid, invalid_uuid, 16);
- u->invalidated = cpu_to_le32(get_seconds());
+ u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
bch_uuid_write(d->c);
}
@@ -713,7 +721,7 @@ static void bcache_device_detach(struct bcache_device *d)
}
static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
- unsigned id)
+ unsigned int id)
{
d->id = id;
d->c = c;
@@ -760,7 +768,7 @@ static void bcache_device_free(struct bcache_device *d)
closure_debug_destroy(&d->cl);
}
-static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
sector_t sectors)
{
struct request_queue *q;
@@ -776,7 +784,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
- (unsigned)d->nr_stripes);
+ (unsigned int)d->nr_stripes);
return -ENOMEM;
}
@@ -796,11 +804,12 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
return idx;
if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
- BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
- !(d->disk = alloc_disk(BCACHE_MINORS))) {
- ida_simple_remove(&bcache_device_idx, idx);
- return -ENOMEM;
- }
+ BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
+ goto err;
+
+ d->disk = alloc_disk(BCACHE_MINORS);
+ if (!d->disk)
+ goto err;
set_capacity(d->disk, sectors);
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
@@ -834,6 +843,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
blk_queue_write_cache(q, true, true);
return 0;
+
+err:
+ ida_simple_remove(&bcache_device_idx, idx);
+ return -ENOMEM;
+
}
/* Cached device */
@@ -911,6 +925,7 @@ void bch_cached_dev_run(struct cached_dev *dc)
if (!d->c &&
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
struct closure cl;
+
closure_init_stack(&cl);
SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE);
@@ -920,8 +935,10 @@ void bch_cached_dev_run(struct cached_dev *dc)
add_disk(d->disk);
bd_link_disk_holder(dc->bdev, dc->disk.disk);
- /* won't show up in the uevent file, use udevadm monitor -e instead
- * only class / kset properties are persistent */
+ /*
+ * won't show up in the uevent file, use udevadm monitor -e instead
+ * only class / kset properties are persistent
+ */
kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
kfree(env[1]);
kfree(env[2]);
@@ -968,6 +985,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
struct closure cl;
+
closure_init_stack(&cl);
BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
@@ -1027,7 +1045,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
uint8_t *set_uuid)
{
- uint32_t rtime = cpu_to_le32(get_seconds());
+ uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
struct uuid_entry *u;
struct cached_dev *exist_dc, *t;
@@ -1070,7 +1088,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
(BDEV_STATE(&dc->sb) == BDEV_STATE_STALE ||
BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) {
memcpy(u->uuid, invalid_uuid, 16);
- u->invalidated = cpu_to_le32(get_seconds());
+ u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
u = NULL;
}
@@ -1089,12 +1107,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
}
}
- /* Deadlocks since we're called via sysfs...
- sysfs_remove_file(&dc->kobj, &sysfs_attach);
+ /*
+ * Deadlocks since we're called via sysfs...
+ * sysfs_remove_file(&dc->kobj, &sysfs_attach);
*/
if (bch_is_zero(u->uuid, 16)) {
struct closure cl;
+
closure_init_stack(&cl);
memcpy(u->uuid, dc->sb.uuid, 16);
@@ -1116,11 +1136,11 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
list_move(&dc->list, &c->cached_devs);
calc_cached_dev_sectors(c);
- smp_wmb();
/*
* dc->c must be set before dc->count != 0 - paired with the mb in
* cached_dev_get()
*/
+ smp_wmb();
refcount_set(&dc->count, 1);
/* Block writeback thread, but spawn it */
@@ -1138,6 +1158,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
bch_cached_dev_run(dc);
bcache_device_link(&dc->disk, c, "bdev");
+ atomic_inc(&c->attached_dev_nr);
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
@@ -1203,7 +1224,7 @@ static void cached_dev_flush(struct closure *cl)
continue_at(cl, cached_dev_free, system_wq);
}
-static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
+static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
{
int ret;
struct io *io;
@@ -1285,6 +1306,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
pr_info("registered backing device %s", dc->backing_dev_name);
list_add(&dc->list, &uncached_devices);
+ /* attach to a matched cache set if it exists */
list_for_each_entry(c, &bch_cache_sets, list)
bch_cached_dev_attach(dc, c, NULL);
@@ -1310,7 +1332,10 @@ void bch_flash_dev_release(struct kobject *kobj)
static void flash_dev_free(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+
mutex_lock(&bch_register_lock);
+ atomic_long_sub(bcache_dev_sectors_dirty(d),
+ &d->c->flash_dev_dirty_sectors);
bcache_device_free(d);
mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
@@ -1390,7 +1415,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
get_random_bytes(u->uuid, 16);
memset(u->label, 0, 32);
- u->first_reg = u->last_reg = cpu_to_le32(get_seconds());
+ u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds());
SET_UUID_FLASH_ONLY(u, 1);
u->sectors = size >> 9;
@@ -1447,17 +1472,18 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
pr_info("CACHE_SET_IO_DISABLE already set");
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
+ /*
+ * XXX: we can be called from atomic context
+ * acquire_console_sem();
+ */
- printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid);
+ pr_err("bcache: error on %pU: ", c->sb.set_uuid);
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
- printk(", disabling caching\n");
+ pr_err(", disabling caching\n");
if (c->on_error == ON_ERROR_PANIC)
panic("panic forced after error\n");
@@ -1469,6 +1495,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
void bch_cache_set_release(struct kobject *kobj)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+
kfree(c);
module_put(THIS_MODULE);
}
@@ -1477,7 +1504,7 @@ static void cache_set_free(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, cl);
struct cache *ca;
- unsigned i;
+ unsigned int i;
if (!IS_ERR_OR_NULL(c->debug))
debugfs_remove(c->debug);
@@ -1520,7 +1547,7 @@ static void cache_set_flush(struct closure *cl)
struct cache_set *c = container_of(cl, struct cache_set, caching);
struct cache *ca;
struct btree *b;
- unsigned i;
+ unsigned int i;
bch_cache_accounting_destroy(&c->accounting);
@@ -1659,6 +1686,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
{
int iter_size;
struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
+
if (!c)
return NULL;
@@ -1687,6 +1715,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->block_bits = ilog2(sb->block_size);
c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
c->devices_max_used = 0;
+ atomic_set(&c->attached_dev_nr, 0);
c->btree_pages = bucket_pages(c);
if (c->btree_pages > BTREE_MAX_PAGES)
c->btree_pages = max_t(int, c->btree_pages / 4,
@@ -1718,8 +1747,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) ||
mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
mempool_init_kmalloc_pool(&c->bio_meta, 2,
- sizeof(struct bbio) + sizeof(struct bio_vec) *
- bucket_pages(c)) ||
+ sizeof(struct bbio) + sizeof(struct bio_vec) *
+ bucket_pages(c)) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
@@ -1749,7 +1778,7 @@ static void run_cache_set(struct cache_set *c)
struct cached_dev *dc, *t;
struct cache *ca;
struct closure cl;
- unsigned i;
+ unsigned int i;
closure_init_stack(&cl);
@@ -1791,7 +1820,9 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
- c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
+ c->root = bch_btree_node_get(c, NULL, k,
+ j->btree_level,
+ true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@@ -1840,7 +1871,7 @@ static void run_cache_set(struct cache_set *c)
pr_notice("invalidating existing data");
for_each_cache(ca, c, i) {
- unsigned j;
+ unsigned int j;
ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
2, SB_JOURNAL_BUCKETS);
@@ -1894,7 +1925,7 @@ static void run_cache_set(struct cache_set *c)
goto err;
closure_sync(&cl);
- c->sb.last_mount = get_seconds();
+ c->sb.last_mount = (u32)ktime_get_real_seconds();
bcache_write_super(c);
list_for_each_entry_safe(dc, t, &uncached_devices, list)
@@ -1985,7 +2016,7 @@ err:
void bch_cache_release(struct kobject *kobj)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
- unsigned i;
+ unsigned int i;
if (ca->set) {
BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
@@ -2085,7 +2116,9 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
}
- if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
+ if (kobject_add(&ca->kobj,
+ &part_to_dev(bdev->bd_part)->kobj,
+ "bcache")) {
err = "error calling kobject_add";
ret = -ENOMEM;
goto out;
@@ -2114,13 +2147,14 @@ err:
/* Global interfaces/init */
-static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
- const char *, size_t);
+static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
+ const char *buffer, size_t size);
kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
-static bool bch_is_open_backing(struct block_device *bdev) {
+static bool bch_is_open_backing(struct block_device *bdev)
+{
struct cache_set *c, *tc;
struct cached_dev *dc, *t;
@@ -2134,10 +2168,11 @@ static bool bch_is_open_backing(struct block_device *bdev) {
return false;
}
-static bool bch_is_open_cache(struct block_device *bdev) {
+static bool bch_is_open_cache(struct block_device *bdev)
+{
struct cache_set *c, *tc;
struct cache *ca;
- unsigned i;
+ unsigned int i;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
for_each_cache(ca, c, i)
@@ -2146,7 +2181,8 @@ static bool bch_is_open_cache(struct block_device *bdev) {
return false;
}
-static bool bch_is_open(struct block_device *bdev) {
+static bool bch_is_open(struct block_device *bdev)
+{
return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
}
@@ -2163,8 +2199,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!try_module_get(THIS_MODULE))
return -EBUSY;
- if (!(path = kstrndup(buffer, size, GFP_KERNEL)) ||
- !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL)))
+ path = kstrndup(buffer, size, GFP_KERNEL);
+ if (!path)
+ goto err;
+
+ sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
+ if (!sb)
goto err;
err = "failed to open device";
@@ -2199,6 +2239,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
err = "failed to register device";
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+
if (!dc)
goto err_close;
@@ -2207,6 +2248,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
mutex_unlock(&bch_register_lock);
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
if (!ca)
goto err_close;
@@ -2324,13 +2366,21 @@ static int __init bcache_init(void)
return bcache_major;
}
- if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
- !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
- bch_request_init() ||
- bch_debug_init(bcache_kobj) || closure_debug_init() ||
+ bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
+ if (!bcache_wq)
+ goto err;
+
+ bcache_kobj = kobject_create_and_add("bcache", fs_kobj);
+ if (!bcache_kobj)
+ goto err;
+
+ if (bch_request_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;
+ bch_debug_init(bcache_kobj);
+ closure_debug_init();
+
return 0;
err:
bcache_exit();
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 225b15aa0340..150cf4f4cf74 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -130,8 +130,10 @@ rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
rw_attribute(size);
-static ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
- size_t selected)
+static ssize_t bch_snprint_string_list(char *buf,
+ size_t size,
+ const char * const list[],
+ size_t selected)
{
char *out = buf;
size_t i;
@@ -148,7 +150,8 @@ SHOW(__bch_cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
- const char *states[] = { "no cache", "clean", "dirty", "inconsistent" };
+ char const *states[] = { "no cache", "clean", "dirty", "inconsistent" };
+ int wb = dc->writeback_running;
#define var(stat) (dc->stat)
@@ -170,7 +173,8 @@ SHOW(__bch_cached_dev)
var_printf(writeback_running, "%i");
var_print(writeback_delay);
var_print(writeback_percent);
- sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
+ sysfs_hprint(writeback_rate,
+ wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
sysfs_printf(io_error_limit, "%i", dc->error_limit);
sysfs_printf(io_disable, "%i", dc->io_disable);
@@ -188,15 +192,22 @@ SHOW(__bch_cached_dev)
char change[20];
s64 next_io;
- bch_hprint(rate, dc->writeback_rate.rate << 9);
- bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
- bch_hprint(target, dc->writeback_rate_target << 9);
- bch_hprint(proportional,dc->writeback_rate_proportional << 9);
- bch_hprint(integral, dc->writeback_rate_integral_scaled << 9);
- bch_hprint(change, dc->writeback_rate_change << 9);
-
- next_io = div64_s64(dc->writeback_rate.next - local_clock(),
- NSEC_PER_MSEC);
+ /*
+ * Except for dirty and target, other values should
+ * be 0 if writeback is not running.
+ */
+ bch_hprint(rate,
+ wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
+ : 0);
+ bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
+ bch_hprint(target, dc->writeback_rate_target << 9);
+ bch_hprint(proportional,
+ wb ? dc->writeback_rate_proportional << 9 : 0);
+ bch_hprint(integral,
+ wb ? dc->writeback_rate_integral_scaled << 9 : 0);
+ bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0);
+ next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(),
+ NSEC_PER_MSEC) : 0;
return sprintf(buf,
"rate:\t\t%s/sec\n"
@@ -255,8 +266,19 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
- sysfs_strtoul_clamp(writeback_rate,
- dc->writeback_rate.rate, 1, INT_MAX);
+ if (attr == &sysfs_writeback_rate) {
+ ssize_t ret;
+ long int v = atomic_long_read(&dc->writeback_rate.rate);
+
+ ret = strtoul_safe_clamp(buf, v, 1, INT_MAX);
+
+ if (!ret) {
+ atomic_long_set(&dc->writeback_rate.rate, v);
+ ret = size;
+ }
+
+ return ret;
+ }
sysfs_strtoul_clamp(writeback_rate_update_seconds,
dc->writeback_rate_update_seconds,
@@ -287,7 +309,7 @@ STORE(__cached_dev)
if (v < 0)
return v;
- if ((unsigned) v != BDEV_CACHE_MODE(&dc->sb)) {
+ if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
SET_BDEV_CACHE_MODE(&dc->sb, v);
bch_write_bdev_super(dc, NULL);
}
@@ -321,8 +343,9 @@ STORE(__cached_dev)
add_uevent_var(env, "DRIVER=bcache");
add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
add_uevent_var(env, "CACHED_LABEL=%s", buf);
- kobject_uevent_env(
- &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
+ kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
+ KOBJ_CHANGE,
+ env->envp);
kfree(env);
}
@@ -338,8 +361,8 @@ STORE(__cached_dev)
if (!v)
return size;
}
-
- pr_err("Can't attach %s: cache set not found", buf);
+ if (v == -ENOENT)
+ pr_err("Can't attach %s: cache set not found", buf);
return v;
}
@@ -439,6 +462,7 @@ STORE(__bch_flash_dev)
if (attr == &sysfs_size) {
uint64_t v;
+
strtoi_h_or_return(buf, v);
u->sectors = v >> 9;
@@ -513,9 +537,9 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
op.stats.floats, op.stats.failed);
}
-static unsigned bch_root_usage(struct cache_set *c)
+static unsigned int bch_root_usage(struct cache_set *c)
{
- unsigned bytes = 0;
+ unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
@@ -550,9 +574,9 @@ static size_t bch_cache_size(struct cache_set *c)
return ret;
}
-static unsigned bch_cache_max_chain(struct cache_set *c)
+static unsigned int bch_cache_max_chain(struct cache_set *c)
{
- unsigned ret = 0;
+ unsigned int ret = 0;
struct hlist_head *h;
mutex_lock(&c->bucket_lock);
@@ -560,7 +584,7 @@ static unsigned bch_cache_max_chain(struct cache_set *c)
for (h = c->bucket_hash;
h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
h++) {
- unsigned i = 0;
+ unsigned int i = 0;
struct hlist_node *p;
hlist_for_each(p, h)
@@ -573,13 +597,13 @@ static unsigned bch_cache_max_chain(struct cache_set *c)
return ret;
}
-static unsigned bch_btree_used(struct cache_set *c)
+static unsigned int bch_btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
-static unsigned bch_average_key_size(struct cache_set *c)
+static unsigned int bch_average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
@@ -683,6 +707,7 @@ STORE(__bch_cache_set)
if (attr == &sysfs_flash_vol_create) {
int r;
uint64_t v;
+
strtoi_h_or_return(buf, v);
r = bch_flash_dev_create(c, v);
@@ -716,6 +741,7 @@ STORE(__bch_cache_set)
if (attr == &sysfs_prune_cache) {
struct shrink_control sc;
+
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
c->shrink.scan_objects(&c->shrink, &sc);
@@ -769,12 +795,14 @@ STORE_LOCKED(bch_cache_set)
SHOW(bch_cache_set_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
+
return bch_cache_set_show(&c->kobj, attr, buf);
}
STORE(bch_cache_set_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
+
return bch_cache_set_store(&c->kobj, attr, buf, size);
}
@@ -976,7 +1004,7 @@ STORE(__bch_cache)
if (v < 0)
return v;
- if ((unsigned) v != CACHE_REPLACEMENT(&ca->sb)) {
+ if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
mutex_lock(&ca->set->bucket_lock);
SET_CACHE_REPLACEMENT(&ca->sb, v);
mutex_unlock(&ca->set->bucket_lock);
diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h
index b54fe9602529..3fe82425859c 100644
--- a/drivers/md/bcache/sysfs.h
+++ b/drivers/md/bcache/sysfs.h
@@ -44,9 +44,9 @@ STORE(fn) \
static struct attribute sysfs_##_name = \
{ .name = #_name, .mode = _mode }
-#define write_attribute(n) __sysfs_attribute(n, S_IWUSR)
-#define read_attribute(n) __sysfs_attribute(n, S_IRUGO)
-#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR)
+#define write_attribute(n) __sysfs_attribute(n, 0200)
+#define read_attribute(n) __sysfs_attribute(n, 0444)
+#define rw_attribute(n) __sysfs_attribute(n, 0644)
#define sysfs_printf(file, fmt, ...) \
do { \
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index fc479b026d6d..20eddeac1531 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* random utiility code, for bcache but in theory not specific to bcache
*
@@ -133,6 +134,7 @@ bool bch_is_zero(const char *p, size_t n)
int bch_parse_uuid(const char *s, char *uuid)
{
size_t i, j, x;
+
memset(uuid, 0, 16);
for (i = 0, j = 0;
@@ -200,7 +202,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
uint64_t now = local_clock();
- d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+ d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));
/* Bound the time. Don't let us fall further than 2 seconds behind
* (this prevents unnecessary backlog that would make it impossible
@@ -279,134 +281,3 @@ int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
return 0;
}
-
-/*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
- * use permitted, subject to terms of PostgreSQL license; see.)
-
- * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
- * usual sort of implementation. (See Ross Williams' excellent introduction
- * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
- * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
- * If we have no working 64-bit type, then fake it with two 32-bit registers.
- *
- * The present implementation is a normal (not "reflected", in Williams'
- * terms) 64-bit CRC, using initial all-ones register contents and a final
- * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
- * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
- *
- * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x + 1
-*/
-
-static const uint64_t crc_table[256] = {
- 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
- 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
- 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
- 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
- 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
- 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
- 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
- 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
- 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
- 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
- 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
- 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
- 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
- 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
- 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
- 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
- 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
- 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
- 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
- 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
- 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
- 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
- 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
- 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
- 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
- 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
- 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
- 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
- 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
- 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
- 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
- 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
- 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
- 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
- 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
- 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
- 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
- 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
- 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
- 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
- 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
- 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
- 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
- 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
- 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
- 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
- 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
- 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
- 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
- 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
- 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
- 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
- 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
- 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
- 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
- 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
- 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
- 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
- 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
- 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
- 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
- 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
- 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
- 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
- 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
- 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
- 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
- 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
- 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
- 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
- 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
- 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
- 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
- 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
- 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
- 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
- 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
- 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
- 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
- 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
- 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
- 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
- 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
- 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
- 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
- 0x9AFCE626CE85B507ULL,
-};
-
-uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len)
-{
- const unsigned char *data = _data;
-
- while (len--) {
- int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
- crc = crc_table[i] ^ (crc << 8);
- }
-
- return crc;
-}
-
-uint64_t bch_crc64(const void *data, size_t len)
-{
- uint64_t crc = 0xffffffffffffffffULL;
-
- crc = bch_crc64_update(crc, data, len);
-
- return crc ^ 0xffffffffffffffffULL;
-}
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index cced87f8eb27..00aab6abcfe4 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -11,6 +11,7 @@
#include <linux/ratelimit.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
+#include <linux/crc64.h>
#include "closure.h"
@@ -288,10 +289,10 @@ do { \
#define ANYSINT_MAX(t) \
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
-int bch_strtoint_h(const char *, int *);
-int bch_strtouint_h(const char *, unsigned int *);
-int bch_strtoll_h(const char *, long long *);
-int bch_strtoull_h(const char *, unsigned long long *);
+int bch_strtoint_h(const char *cp, int *res);
+int bch_strtouint_h(const char *cp, unsigned int *res);
+int bch_strtoll_h(const char *cp, long long *res);
+int bch_strtoull_h(const char *cp, unsigned long long *res);
static inline int bch_strtol_h(const char *cp, long *res)
{
@@ -347,7 +348,7 @@ static inline int bch_strtoul_h(const char *cp, long *res)
snprintf(buf, size, \
__builtin_types_compatible_p(typeof(var), int) \
? "%i\n" : \
- __builtin_types_compatible_p(typeof(var), unsigned) \
+ __builtin_types_compatible_p(typeof(var), unsigned int) \
? "%u\n" : \
__builtin_types_compatible_p(typeof(var), long) \
? "%li\n" : \
@@ -379,7 +380,7 @@ struct time_stats {
void bch_time_stats_update(struct time_stats *stats, uint64_t time);
-static inline unsigned local_clock_us(void)
+static inline unsigned int local_clock_us(void)
{
return local_clock() >> 10;
}
@@ -402,7 +403,8 @@ do { \
__print_time_stat(stats, name, \
average_duration, duration_units); \
sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
- div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
+ div_u64((stats)->max_duration, \
+ NSEC_PER_ ## duration_units)); \
\
sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
? div_s64(local_clock() - (stats)->last, \
@@ -442,7 +444,7 @@ struct bch_ratelimit {
* Rate at which we want to do work, in units per second
* The units here correspond to the units passed to bch_next_delay()
*/
- uint32_t rate;
+ atomic_long_t rate;
};
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
@@ -542,10 +544,27 @@ dup: \
#define RB_PREV(ptr, member) \
container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member)
+static inline uint64_t bch_crc64(const void *p, size_t len)
+{
+ uint64_t crc = 0xffffffffffffffffULL;
+
+ crc = crc64_be(crc, p, len);
+ return crc ^ 0xffffffffffffffffULL;
+}
+
+static inline uint64_t bch_crc64_update(uint64_t crc,
+ const void *p,
+ size_t len)
+{
+ crc = crc64_be(crc, p, len);
+ return crc;
+}
+
/* Does linear interpolation between powers of two */
-static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
+static inline unsigned int fract_exp_two(unsigned int x,
+ unsigned int fract_bits)
{
- unsigned fract = x & ~(~0 << fract_bits);
+ unsigned int fract = x & ~(~0 << fract_bits);
x >>= fract_bits;
x = 1 << x;
@@ -561,8 +580,4 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> 9;
}
-
-uint64_t bch_crc64_update(uint64_t, const void *, size_t);
-uint64_t bch_crc64(const void *, size_t);
-
#endif /* _BCACHE_UTIL_H */
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index ad45ebe1a74b..6be05bd7ca67 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -27,7 +27,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc)
* flash-only devices
*/
uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
- bcache_flash_devs_sectors_dirty(c);
+ atomic_long_read(&c->flash_dev_dirty_sectors);
/*
* Unfortunately there is no control of global dirty data. If the
@@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc)
dc->writeback_rate_proportional = proportional_scaled;
dc->writeback_rate_integral_scaled = integral_scaled;
- dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
- dc->writeback_rate.rate = new_rate;
+ dc->writeback_rate_change = new_rate -
+ atomic_long_read(&dc->writeback_rate.rate);
+ atomic_long_set(&dc->writeback_rate.rate, new_rate);
dc->writeback_rate_target = target;
}
+static bool set_at_max_writeback_rate(struct cache_set *c,
+ struct cached_dev *dc)
+{
+ /*
+ * Idle_counter is increased everytime when update_writeback_rate() is
+ * called. If all backing devices attached to the same cache set have
+ * identical dc->writeback_rate_update_seconds values, it is about 6
+ * rounds of update_writeback_rate() on each backing device before
+ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
+ * to each dc->writeback_rate.rate.
+ * In order to avoid extra locking cost for counting exact dirty cached
+ * devices number, c->attached_dev_nr is used to calculate the idle
+ * throushold. It might be bigger if not all cached device are in write-
+ * back mode, but it still works well with limited extra rounds of
+ * update_writeback_rate().
+ */
+ if (atomic_inc_return(&c->idle_counter) <
+ atomic_read(&c->attached_dev_nr) * 6)
+ return false;
+
+ if (atomic_read(&c->at_max_writeback_rate) != 1)
+ atomic_set(&c->at_max_writeback_rate, 1);
+
+ atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
+
+ /* keep writeback_rate_target as existing value */
+ dc->writeback_rate_proportional = 0;
+ dc->writeback_rate_integral_scaled = 0;
+ dc->writeback_rate_change = 0;
+
+ /*
+ * Check c->idle_counter and c->at_max_writeback_rate agagain in case
+ * new I/O arrives during before set_at_max_writeback_rate() returns.
+ * Then the writeback rate is set to 1, and its new value should be
+ * decided via __update_writeback_rate().
+ */
+ if ((atomic_read(&c->idle_counter) <
+ atomic_read(&c->attached_dev_nr) * 6) ||
+ !atomic_read(&c->at_max_writeback_rate))
+ return false;
+
+ return true;
+}
+
static void update_writeback_rate(struct work_struct *work)
{
struct cached_dev *dc = container_of(to_delayed_work(work),
@@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work)
return;
}
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
- dc->writeback_percent)
- __update_writeback_rate(dc);
+ if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
+ /*
+ * If the whole cache set is idle, set_at_max_writeback_rate()
+ * will set writeback rate to a max number. Then it is
+ * unncessary to update writeback rate for an idle cache set
+ * in maximum writeback rate number(s).
+ */
+ if (!set_at_max_writeback_rate(c, dc)) {
+ down_read(&dc->writeback_lock);
+ __update_writeback_rate(dc);
+ up_read(&dc->writeback_lock);
+ }
+ }
- up_read(&dc->writeback_lock);
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -163,7 +215,8 @@ static void update_writeback_rate(struct work_struct *work)
smp_mb();
}
-static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
+static unsigned int writeback_delay(struct cached_dev *dc,
+ unsigned int sectors)
{
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
!dc->writeback_percent)
@@ -197,6 +250,7 @@ static void dirty_init(struct keybuf_key *w)
static void dirty_io_destructor(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+
kfree(io);
}
@@ -211,7 +265,7 @@ static void write_dirty_finish(struct closure *cl)
/* This is kind of a dumb way of signalling errors. */
if (KEY_DIRTY(&w->key)) {
int ret;
- unsigned i;
+ unsigned int i;
struct keylist keys;
bch_keylist_init(&keys);
@@ -325,7 +379,7 @@ static void read_dirty_submit(struct closure *cl)
static void read_dirty(struct cached_dev *dc)
{
- unsigned delay = 0;
+ unsigned int delay = 0;
struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w;
size_t size;
int nk, i;
@@ -390,7 +444,8 @@ static void read_dirty(struct cached_dev *dc)
io = kzalloc(sizeof(struct dirty_io) +
sizeof(struct bio_vec) *
- DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
+ DIV_ROUND_UP(KEY_SIZE(&w->key),
+ PAGE_SECTORS),
GFP_KERNEL);
if (!io)
goto err;
@@ -413,7 +468,8 @@ static void read_dirty(struct cached_dev *dc)
down(&dc->in_flight);
- /* We've acquired a semaphore for the maximum
+ /*
+ * We've acquired a semaphore for the maximum
* simultaneous number of writebacks; from here
* everything happens asynchronously.
*/
@@ -422,27 +478,6 @@ static void read_dirty(struct cached_dev *dc)
delay = writeback_delay(dc, size);
- /* If the control system would wait for at least half a
- * second, and there's been no reqs hitting the backing disk
- * for awhile: use an alternate mode where we have at most
- * one contiguous set of writebacks in flight at a time. If
- * someone wants to do IO it will be quick, as it will only
- * have to contend with one operation in flight, and we'll
- * be round-tripping data to the backing disk as quickly as
- * it can accept it.
- */
- if (delay >= HZ / 2) {
- /* 3 means at least 1.5 seconds, up to 7.5 if we
- * have slowed way down.
- */
- if (atomic_inc_return(&dc->backing_idle) >= 3) {
- /* Wait for current I/Os to finish */
- closure_sync(&cl);
- /* And immediately launch a new set. */
- delay = 0;
- }
- }
-
while (!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
delay) {
@@ -467,20 +502,23 @@ err:
/* Scan for dirty data */
-void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
uint64_t offset, int nr_sectors)
{
struct bcache_device *d = c->devices[inode];
- unsigned stripe_offset, stripe, sectors_dirty;
+ unsigned int stripe_offset, stripe, sectors_dirty;
if (!d)
return;
+ if (UUID_FLASH_ONLY(&c->uuids[inode]))
+ atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors);
+
stripe = offset_to_stripe(d, offset);
stripe_offset = offset & (d->stripe_size - 1);
while (nr_sectors) {
- int s = min_t(unsigned, abs(nr_sectors),
+ int s = min_t(unsigned int, abs(nr_sectors),
d->stripe_size - stripe_offset);
if (nr_sectors < 0)
@@ -504,7 +542,9 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
static bool dirty_pred(struct keybuf *buf, struct bkey *k)
{
- struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
+ struct cached_dev *dc = container_of(buf,
+ struct cached_dev,
+ writeback_keys);
BUG_ON(KEY_INODE(k) != dc->disk.id);
@@ -514,7 +554,7 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
static void refill_full_stripes(struct cached_dev *dc)
{
struct keybuf *buf = &dc->writeback_keys;
- unsigned start_stripe, stripe, next_stripe;
+ unsigned int start_stripe, stripe, next_stripe;
bool wrapped = false;
stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned));
@@ -654,7 +694,7 @@ static int bch_writeback_thread(void *arg)
read_dirty(dc);
if (searched_full_index) {
- unsigned delay = dc->writeback_delay * HZ;
+ unsigned int delay = dc->writeback_delay * HZ;
while (delay &&
!kthread_should_stop() &&
@@ -673,10 +713,14 @@ static int bch_writeback_thread(void *arg)
}
/* Init */
+#define INIT_KEYS_EACH_TIME 500000
+#define INIT_KEYS_SLEEP_MS 100
struct sectors_dirty_init {
struct btree_op op;
- unsigned inode;
+ unsigned int inode;
+ size_t count;
+ struct bkey start;
};
static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
@@ -691,18 +735,37 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
KEY_START(k), KEY_SIZE(k));
+ op->count++;
+ if (atomic_read(&b->c->search_inflight) &&
+ !(op->count % INIT_KEYS_EACH_TIME)) {
+ bkey_copy_key(&op->start, k);
+ return -EAGAIN;
+ }
+
return MAP_CONTINUE;
}
void bch_sectors_dirty_init(struct bcache_device *d)
{
struct sectors_dirty_init op;
+ int ret;
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
-
- bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0),
- sectors_dirty_init_fn, 0);
+ op.count = 0;
+ op.start = KEY(op.inode, 0, 0);
+
+ do {
+ ret = bch_btree_map_keys(&op.op, d->c, &op.start,
+ sectors_dirty_init_fn, 0);
+ if (ret == -EAGAIN)
+ schedule_timeout_interruptible(
+ msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
+ else if (ret < 0) {
+ pr_warn("sectors dirty init failed, ret=%d!", ret);
+ break;
+ }
+ } while (ret == -EAGAIN);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
@@ -715,7 +778,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_running = true;
dc->writeback_percent = 10;
dc->writeback_delay = 30;
- dc->writeback_rate.rate = 1024;
+ atomic_long_set(&dc->writeback_rate.rate, 1024);
dc->writeback_rate_minimum = 8;
dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 610fb01de629..d2b9fdbc8994 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -28,26 +28,7 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
return ret;
}
-static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
-{
- uint64_t i, ret = 0;
-
- mutex_lock(&bch_register_lock);
-
- for (i = 0; i < c->devices_max_used; i++) {
- struct bcache_device *d = c->devices[i];
-
- if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
- continue;
- ret += bcache_dev_sectors_dirty(d);
- }
-
- mutex_unlock(&bch_register_lock);
-
- return ret;
-}
-
-static inline unsigned offset_to_stripe(struct bcache_device *d,
+static inline unsigned int offset_to_stripe(struct bcache_device *d,
uint64_t offset)
{
do_div(offset, d->stripe_size);
@@ -56,9 +37,9 @@ static inline unsigned offset_to_stripe(struct bcache_device *d,
static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
uint64_t offset,
- unsigned nr_sectors)
+ unsigned int nr_sectors)
{
- unsigned stripe = offset_to_stripe(&dc->disk, offset);
+ unsigned int stripe = offset_to_stripe(&dc->disk, offset);
while (1) {
if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
@@ -73,9 +54,9 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
}
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
- unsigned cache_mode, bool would_skip)
+ unsigned int cache_mode, bool would_skip)
{
- unsigned in_use = dc->disk.c->gc_stats.in_use;
+ unsigned int in_use = dc->disk.c->gc_stats.in_use;
if (cache_mode != CACHE_MODE_WRITEBACK ||
test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
@@ -115,10 +96,11 @@ static inline void bch_writeback_add(struct cached_dev *dc)
}
}
-void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
+ uint64_t offset, int nr_sectors);
-void bch_sectors_dirty_init(struct bcache_device *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
-int bch_cached_dev_writeback_start(struct cached_dev *);
+void bch_sectors_dirty_init(struct bcache_device *d);
+void bch_cached_dev_writeback_init(struct cached_dev *dc);
+int bch_cached_dev_writeback_start(struct cached_dev *dc);
#endif