summaryrefslogtreecommitdiff
path: root/drivers/md/bcache
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/bset.c12
-rw-r--r--drivers/md/bcache/btree.c21
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/features.c2
-rw-r--r--drivers/md/bcache/features.h36
-rw-r--r--drivers/md/bcache/journal.c4
-rw-r--r--drivers/md/bcache/request.c39
-rw-r--r--drivers/md/bcache/super.c79
-rw-r--r--drivers/md/bcache/sysfs.c29
-rw-r--r--drivers/md/bcache/writeback.c42
-rw-r--r--drivers/md/bcache/writeback.h4
12 files changed, 238 insertions, 39 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 1d57f48307e6..848dd4db1659 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -373,6 +373,7 @@ struct cached_dev {
unsigned int partial_stripes_expensive:1;
unsigned int writeback_metadata:1;
unsigned int writeback_running:1;
+ unsigned int writeback_consider_fragment:1;
unsigned char writeback_percent;
unsigned int writeback_delay;
@@ -385,6 +386,9 @@ struct cached_dev {
unsigned int writeback_rate_update_seconds;
unsigned int writeback_rate_i_term_inverse;
unsigned int writeback_rate_p_term_inverse;
+ unsigned int writeback_rate_fp_term_low;
+ unsigned int writeback_rate_fp_term_mid;
+ unsigned int writeback_rate_fp_term_high;
unsigned int writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
@@ -1001,6 +1005,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
extern struct workqueue_struct *bch_journal_wq;
+extern struct workqueue_struct *bch_flush_wq;
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
@@ -1042,5 +1047,7 @@ void bch_debug_exit(void);
void bch_debug_init(void);
void bch_request_exit(void);
int bch_request_init(void);
+void bch_btree_exit(void);
+int bch_btree_init(void);
#endif /* _BCACHE_H */
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 67a2c47f4201..94d38e8a59b3 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -712,8 +712,10 @@ void bch_bset_build_written_tree(struct btree_keys *b)
for (j = inorder_next(0, t->size);
j;
j = inorder_next(j, t->size)) {
- while (bkey_to_cacheline(t, k) < cacheline)
- prev = k, k = bkey_next(k);
+ while (bkey_to_cacheline(t, k) < cacheline) {
+ prev = k;
+ k = bkey_next(k);
+ }
t->prev[j] = bkey_u64s(prev);
t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k);
@@ -901,8 +903,10 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
status = BTREE_INSERT_STATUS_INSERT;
while (m != bset_bkey_last(i) &&
- bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0)
- prev = m, m = bkey_next(m);
+ bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0) {
+ prev = m;
+ m = bkey_next(m);
+ }
/* prev is in the tree, if we merge we're done */
status = BTREE_INSERT_STATUS_BACK_MERGE;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 910df242c83d..fe6dce125aba 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -99,6 +99,8 @@
#define PTR_HASH(c, k) \
(((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0))
+static struct workqueue_struct *btree_io_wq;
+
#define insert_lock(s, b) ((b)->level <= (s)->lock)
@@ -308,7 +310,7 @@ static void __btree_node_write_done(struct closure *cl)
btree_complete_write(b, w);
if (btree_node_dirty(b))
- schedule_delayed_work(&b->work, 30 * HZ);
+ queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
closure_return_with_destructor(cl, btree_node_write_unlock);
}
@@ -481,7 +483,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
BUG_ON(!i->keys);
if (!btree_node_dirty(b))
- schedule_delayed_work(&b->work, 30 * HZ);
+ queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
set_btree_node_dirty(b);
@@ -2764,3 +2766,18 @@ void bch_keybuf_init(struct keybuf *buf)
spin_lock_init(&buf->lock);
array_allocator_init(&buf->freelist);
}
+
+void bch_btree_exit(void)
+{
+ if (btree_io_wq)
+ destroy_workqueue(btree_io_wq);
+}
+
+int __init bch_btree_init(void)
+{
+ btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0);
+ if (!btree_io_wq)
+ return -ENOMEM;
+
+ return 0;
+}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index b00fd08d696b..63e809f38e3f 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -114,7 +114,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
if (!check)
return;
- check->bi_disk = bio->bi_disk;
+ bio_set_dev(check, bio->bi_bdev);
check->bi_opf = REQ_OP_READ;
check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
check->bi_iter.bi_size = bio->bi_iter.bi_size;
diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c
index 6469223f0b77..d636b7b2d070 100644
--- a/drivers/md/bcache/features.c
+++ b/drivers/md/bcache/features.c
@@ -17,7 +17,7 @@ struct feature {
};
static struct feature feature_list[] = {
- {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET,
+ {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE,
"large_bucket"},
{0, 0, 0 },
};
diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index a1653c478041..d1c8fd3977fc 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -13,11 +13,15 @@
/* Feature set definition */
/* Incompat feature set */
-#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */
+/* 32bit bucket size, obsoleted */
+#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001
+/* real bucket size is (1 << bucket_size) */
+#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002
-#define BCH_FEATURE_COMPAT_SUUP 0
-#define BCH_FEATURE_RO_COMPAT_SUUP 0
-#define BCH_FEATURE_INCOMPAT_SUUP BCH_FEATURE_INCOMPAT_LARGE_BUCKET
+#define BCH_FEATURE_COMPAT_SUPP 0
+#define BCH_FEATURE_RO_COMPAT_SUPP 0
+#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \
+ BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE)
#define BCH_HAS_COMPAT_FEATURE(sb, mask) \
((sb)->feature_compat & (mask))
@@ -29,6 +33,8 @@
#define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \
static inline int bch_has_feature_##name(struct cache_sb *sb) \
{ \
+ if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \
+ return 0; \
return (((sb)->feature_compat & \
BCH##_FEATURE_COMPAT_##flagname) != 0); \
} \
@@ -46,6 +52,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
#define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
static inline int bch_has_feature_##name(struct cache_sb *sb) \
{ \
+ if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \
+ return 0; \
return (((sb)->feature_ro_compat & \
BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \
} \
@@ -63,6 +71,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
#define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \
static inline int bch_has_feature_##name(struct cache_sb *sb) \
{ \
+ if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \
+ return 0; \
return (((sb)->feature_incompat & \
BCH##_FEATURE_INCOMPAT_##flagname) != 0); \
} \
@@ -77,7 +87,23 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
~BCH##_FEATURE_INCOMPAT_##flagname; \
}
-BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE);
+
+static inline bool bch_has_unknown_compat_features(struct cache_sb *sb)
+{
+ return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0);
+}
+
+static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb)
+{
+ return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0);
+}
+
+static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb)
+{
+ return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0);
+}
int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size);
int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index aefbdb7e003b..c6613e817333 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -932,8 +932,8 @@ atomic_t *bch_journal(struct cache_set *c,
journal_try_write(c);
} else if (!w->dirty) {
w->dirty = true;
- schedule_delayed_work(&c->journal.work,
- msecs_to_jiffies(c->journal_delay_ms));
+ queue_delayed_work(bch_flush_wq, &c->journal.work,
+ msecs_to_jiffies(c->journal_delay_ms));
spin_unlock(&c->journal.lock);
} else {
spin_unlock(&c->journal.lock);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 85b1f2a9b72d..29c231758293 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -475,7 +475,7 @@ struct search {
unsigned int read_dirty_data:1;
unsigned int cache_missed:1;
- struct block_device *part;
+ struct block_device *orig_bdev;
unsigned long start_time;
struct btree_op op;
@@ -670,8 +670,8 @@ static void bio_complete(struct search *s)
{
if (s->orig_bio) {
/* Count on bcache device */
- part_end_io_acct(s->part, s->orig_bio, s->start_time);
-
+ bio_end_io_acct_remapped(s->orig_bio, s->start_time,
+ s->orig_bdev);
trace_bcache_request_end(s->d, s->orig_bio);
s->orig_bio->bi_status = s->iop.status;
bio_endio(s->orig_bio);
@@ -714,7 +714,8 @@ static void search_free(struct closure *cl)
}
static inline struct search *search_alloc(struct bio *bio,
- struct bcache_device *d)
+ struct bcache_device *d, struct block_device *orig_bdev,
+ unsigned long start_time)
{
struct search *s;
@@ -732,7 +733,8 @@ static inline struct search *search_alloc(struct bio *bio,
s->write = op_is_write(bio_op(bio));
s->read_dirty_data = 0;
/* Count on the bcache device */
- s->start_time = part_start_io_acct(d->disk, &s->part, bio);
+ s->orig_bdev = orig_bdev;
+ s->start_time = start_time;
s->iop.c = d->c;
s->iop.bio = NULL;
s->iop.inode = d->id;
@@ -894,7 +896,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
reada = min_t(sector_t, dc->readahead >> 9,
- get_capacity(bio->bi_disk) - bio_end_sector(bio));
+ get_capacity(bio->bi_bdev->bd_disk) -
+ bio_end_sector(bio));
s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
@@ -1073,7 +1076,7 @@ struct detached_dev_io_private {
unsigned long start_time;
bio_end_io_t *bi_end_io;
void *bi_private;
- struct block_device *part;
+ struct block_device *orig_bdev;
};
static void detached_dev_end_io(struct bio *bio)
@@ -1085,7 +1088,7 @@ static void detached_dev_end_io(struct bio *bio)
bio->bi_private = ddip->bi_private;
/* Count on the bcache device */
- part_end_io_acct(ddip->part, bio, ddip->start_time);
+ bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
if (bio->bi_status) {
struct cached_dev *dc = container_of(ddip->d,
@@ -1098,7 +1101,8 @@ static void detached_dev_end_io(struct bio *bio)
bio->bi_end_io(bio);
}
-static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
+ struct block_device *orig_bdev, unsigned long start_time)
{
struct detached_dev_io_private *ddip;
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
@@ -1111,7 +1115,8 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
ddip->d = d;
/* Count on the bcache device */
- ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio);
+ ddip->orig_bdev = orig_bdev;
+ ddip->start_time = start_time;
ddip->bi_end_io = bio->bi_end_io;
ddip->bi_private = bio->bi_private;
bio->bi_end_io = detached_dev_end_io;
@@ -1167,8 +1172,10 @@ static void quit_max_writeback_rate(struct cache_set *c,
blk_qc_t cached_dev_submit_bio(struct bio *bio)
{
struct search *s;
- struct bcache_device *d = bio->bi_disk->private_data;
+ struct block_device *orig_bdev = bio->bi_bdev;
+ struct bcache_device *d = orig_bdev->bd_disk->private_data;
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+ unsigned long start_time;
int rw = bio_data_dir(bio);
if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
@@ -1193,11 +1200,13 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
}
}
+ start_time = bio_start_io_acct(bio);
+
bio_set_dev(bio, dc->bdev);
bio->bi_iter.bi_sector += dc->sb.data_offset;
if (cached_dev_get(dc)) {
- s = search_alloc(bio, d);
+ s = search_alloc(bio, d, orig_bdev, start_time);
trace_bcache_request_start(s->d, bio);
if (!bio->bi_iter.bi_size) {
@@ -1218,7 +1227,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
}
} else
/* I/O request sent to backing device */
- detached_dev_do_request(d, bio);
+ detached_dev_do_request(d, bio, orig_bdev, start_time);
return BLK_QC_T_NONE;
}
@@ -1274,7 +1283,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
{
struct search *s;
struct closure *cl;
- struct bcache_device *d = bio->bi_disk->private_data;
+ struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
bio->bi_status = BLK_STS_IOERR;
@@ -1282,7 +1291,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
return BLK_QC_T_NONE;
}
- s = search_alloc(bio, d);
+ s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio));
cl = &s->cl;
bio = &s->bio.bio;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index a4752ac410dc..71691f32959b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -49,6 +49,7 @@ static int bcache_major;
static DEFINE_IDA(bcache_device_idx);
static wait_queue_head_t unregister_wait;
struct workqueue_struct *bcache_wq;
+struct workqueue_struct *bch_flush_wq;
struct workqueue_struct *bch_journal_wq;
@@ -64,9 +65,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s
{
unsigned int bucket_size = le16_to_cpu(s->bucket_size);
- if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES &&
- bch_has_feature_large_bucket(sb))
- bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16;
+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
+ if (bch_has_feature_large_bucket(sb)) {
+ unsigned int max, order;
+
+ max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
+ order = le16_to_cpu(s->bucket_size);
+ /*
+ * bcache tool will make sure the overflow won't
+ * happen, an error message here is enough.
+ */
+ if (order > max)
+ pr_err("Bucket size (1 << %u) overflows\n",
+ order);
+ bucket_size = 1 << order;
+ } else if (bch_has_feature_obso_large_bucket(sb)) {
+ bucket_size +=
+ le16_to_cpu(s->obso_bucket_size_hi) << 16;
+ }
+ }
return bucket_size;
}
@@ -228,6 +245,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
sb->feature_compat = le64_to_cpu(s->feature_compat);
sb->feature_incompat = le64_to_cpu(s->feature_incompat);
sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat);
+
+ /* Check incompatible features */
+ err = "Unsupported compatible feature found";
+ if (bch_has_unknown_compat_features(sb))
+ goto err;
+
+ err = "Unsupported read-only compatible feature found";
+ if (bch_has_unknown_ro_compat_features(sb))
+ goto err;
+
+ err = "Unsupported incompatible feature found";
+ if (bch_has_unknown_incompat_features(sb))
+ goto err;
+
err = read_super_common(sb, bdev, s);
if (err)
goto err;
@@ -1302,6 +1333,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
bcache_device_link(&dc->disk, c, "bdev");
atomic_inc(&c->attached_dev_nr);
+ if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) {
+ pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
+ pr_err("Please update to the latest bcache-tools to create the cache device\n");
+ set_disk_ro(dc->disk.disk, 1);
+ }
+
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
@@ -1524,6 +1561,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
bcache_device_link(d, c, "volume");
+ if (bch_has_feature_obso_large_bucket(&c->cache->sb)) {
+ pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
+ pr_err("Please update to the latest bcache-tools to create the cache device\n");
+ set_disk_ro(d->disk, 1);
+ }
+
return 0;
err:
kobject_put(&d->kobj);
@@ -1897,7 +1940,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
goto err;
if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
- BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
+ BIOSET_NEED_RESCUER))
goto err;
c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb);
@@ -2083,6 +2126,9 @@ static int run_cache_set(struct cache_set *c)
c->cache->sb.last_mount = (u32)ktime_get_real_seconds();
bcache_write_super(c);
+ if (bch_has_feature_obso_large_bucket(&c->cache->sb))
+ pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n");
+
list_for_each_entry_safe(dc, t, &uncached_devices, list)
bch_cached_dev_attach(dc, c, NULL);
@@ -2472,7 +2518,7 @@ out:
module_put(THIS_MODULE);
}
-static void register_device_aync(struct async_reg_args *args)
+static void register_device_async(struct async_reg_args *args)
{
if (SB_IS_BDEV(args->sb))
INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker);
@@ -2566,7 +2612,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
args->sb = sb;
args->sb_disk = sb_disk;
args->bdev = bdev;
- register_device_aync(args);
+ register_device_async(args);
/* No wait and returns to user space */
goto async_done;
}
@@ -2644,8 +2690,8 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
}
list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
+ char *pdev_set_uuid = pdev->dc->sb.set_uuid;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
- char *pdev_set_uuid = pdev->dc->sb.set_uuid;
char *set_uuid = c->set_uuid;
if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
@@ -2776,6 +2822,9 @@ static void bcache_exit(void)
destroy_workqueue(bcache_wq);
if (bch_journal_wq)
destroy_workqueue(bch_journal_wq);
+ if (bch_flush_wq)
+ destroy_workqueue(bch_flush_wq);
+ bch_btree_exit();
if (bcache_major)
unregister_blkdev(bcache_major, "bcache");
@@ -2831,10 +2880,26 @@ static int __init bcache_init(void)
return bcache_major;
}
+ if (bch_btree_init())
+ goto err;
+
bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
if (!bcache_wq)
goto err;
+ /*
+ * Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
+ *
+ * 1. It used `system_wq` before which also does no memory reclaim.
+ * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
+ * reduced throughput can be observed.
+ *
+ * We still want to user our own queue to not congest the `system_wq`.
+ */
+ bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
+ if (!bch_flush_wq)
+ goto err;
+
bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
if (!bch_journal_wq)
goto err;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 00a520c03f41..cc89f3156d1a 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -117,10 +117,14 @@ rw_attribute(writeback_running);
rw_attribute(writeback_percent);
rw_attribute(writeback_delay);
rw_attribute(writeback_rate);
+rw_attribute(writeback_consider_fragment);
rw_attribute(writeback_rate_update_seconds);
rw_attribute(writeback_rate_i_term_inverse);
rw_attribute(writeback_rate_p_term_inverse);
+rw_attribute(writeback_rate_fp_term_low);
+rw_attribute(writeback_rate_fp_term_mid);
+rw_attribute(writeback_rate_fp_term_high);
rw_attribute(writeback_rate_minimum);
read_attribute(writeback_rate_debug);
@@ -195,6 +199,7 @@ SHOW(__bch_cached_dev)
var_printf(bypass_torture_test, "%i");
var_printf(writeback_metadata, "%i");
var_printf(writeback_running, "%i");
+ var_printf(writeback_consider_fragment, "%i");
var_print(writeback_delay);
var_print(writeback_percent);
sysfs_hprint(writeback_rate,
@@ -205,6 +210,9 @@ SHOW(__bch_cached_dev)
var_print(writeback_rate_update_seconds);
var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
+ var_print(writeback_rate_fp_term_low);
+ var_print(writeback_rate_fp_term_mid);
+ var_print(writeback_rate_fp_term_high);
var_print(writeback_rate_minimum);
if (attr == &sysfs_writeback_rate_debug) {
@@ -303,6 +311,7 @@ STORE(__cached_dev)
sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
sysfs_strtoul_bool(writeback_running, dc->writeback_running);
+ sysfs_strtoul_bool(writeback_consider_fragment, dc->writeback_consider_fragment);
sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX);
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
@@ -331,6 +340,16 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
dc->writeback_rate_p_term_inverse,
1, UINT_MAX);
+ sysfs_strtoul_clamp(writeback_rate_fp_term_low,
+ dc->writeback_rate_fp_term_low,
+ 1, dc->writeback_rate_fp_term_mid - 1);
+ sysfs_strtoul_clamp(writeback_rate_fp_term_mid,
+ dc->writeback_rate_fp_term_mid,
+ dc->writeback_rate_fp_term_low + 1,
+ dc->writeback_rate_fp_term_high - 1);
+ sysfs_strtoul_clamp(writeback_rate_fp_term_high,
+ dc->writeback_rate_fp_term_high,
+ dc->writeback_rate_fp_term_mid + 1, UINT_MAX);
sysfs_strtoul_clamp(writeback_rate_minimum,
dc->writeback_rate_minimum,
1, UINT_MAX);
@@ -499,9 +518,13 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_delay,
&sysfs_writeback_percent,
&sysfs_writeback_rate,
+ &sysfs_writeback_consider_fragment,
&sysfs_writeback_rate_update_seconds,
&sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
+ &sysfs_writeback_rate_fp_term_low,
+ &sysfs_writeback_rate_fp_term_mid,
+ &sysfs_writeback_rate_fp_term_high,
&sysfs_writeback_rate_minimum,
&sysfs_writeback_rate_debug,
&sysfs_io_errors,
@@ -1071,8 +1094,10 @@ SHOW(__bch_cache)
--n;
while (cached < p + n &&
- *cached == BTREE_PRIO)
- cached++, n--;
+ *cached == BTREE_PRIO) {
+ cached++;
+ n--;
+ }
for (i = 0; i < n; i++)
sum += INITIAL_PRIO - cached[i];
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index a129e4d2707c..82d4e0880a99 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -88,6 +88,44 @@ static void __update_writeback_rate(struct cached_dev *dc)
int64_t integral_scaled;
uint32_t new_rate;
+ /*
+ * We need to consider the number of dirty buckets as well
+ * when calculating the proportional_scaled, Otherwise we might
+ * have an unreasonable small writeback rate at a highly fragmented situation
+ * when very few dirty sectors consumed a lot dirty buckets, the
+ * worst case is when dirty buckets reached cutoff_writeback_sync and
+ * dirty data is still not even reached to writeback percent, so the rate
+ * still will be at the minimum value, which will cause the write
+ * stuck at a non-writeback mode.
+ */
+ struct cache_set *c = dc->disk.c;
+
+ int64_t dirty_buckets = c->nbuckets - c->avail_nbuckets;
+
+ if (dc->writeback_consider_fragment &&
+ c->gc_stats.in_use > BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW && dirty > 0) {
+ int64_t fragment =
+ div_s64((dirty_buckets * c->cache->sb.bucket_size), dirty);
+ int64_t fp_term;
+ int64_t fps;
+
+ if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID) {
+ fp_term = dc->writeback_rate_fp_term_low *
+ (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW);
+ } else if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH) {
+ fp_term = dc->writeback_rate_fp_term_mid *
+ (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID);
+ } else {
+ fp_term = dc->writeback_rate_fp_term_high *
+ (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH);
+ }
+ fps = div_s64(dirty, dirty_buckets) * fp_term;
+ if (fragment > 3 && fps > proportional_scaled) {
+ /* Only overrite the p when fragment > 3 */
+ proportional_scaled = fps;
+ }
+ }
+
if ((error < 0 && dc->writeback_rate_integral > 0) ||
(error > 0 && time_before64(local_clock(),
dc->writeback_rate.next + NSEC_PER_MSEC))) {
@@ -977,6 +1015,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_metadata = true;
dc->writeback_running = false;
+ dc->writeback_consider_fragment = true;
dc->writeback_percent = 10;
dc->writeback_delay = 30;
atomic_long_set(&dc->writeback_rate.rate, 1024);
@@ -984,6 +1023,9 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
dc->writeback_rate_p_term_inverse = 40;
+ dc->writeback_rate_fp_term_low = 1;
+ dc->writeback_rate_fp_term_mid = 10;
+ dc->writeback_rate_fp_term_high = 1000;
dc->writeback_rate_i_term_inverse = 10000;
WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 3f1230e22de0..02b2f9df73f6 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -16,6 +16,10 @@
#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
+#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW 50
+#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID 57
+#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH 64
+
#define BCH_DIRTY_INIT_THRD_MAX 64
/*
* 14 (16384ths) is chosen here as something that each backing device