summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/writeback.c
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2018-08-16 23:13:03 +0300
committerJason Gunthorpe <jgg@mellanox.com>2018-08-16 23:21:29 +0300
commit0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch)
treed6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /drivers/md/bcache/writeback.c
parent92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff)
parent5c60a7389d795e001c8748b458eb76e3a5b6008c (diff)
downloadlinux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.tar.xz
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window Conflicts: drivers/infiniband/core/rdma_core.c - Use the rdma code and revise with the new spelling for atomic_fetch_add_unless drivers/nvme/host/rdma.c - Replace max_sge with max_send_sge in new blk code drivers/nvme/target/rdma.c - Use the blk code and revise to use NULL for ib_post_recv when appropriate - Replace max_sge with max_recv_sge in new blk code net/rds/ib_send.c - Use the net code and revise to use NULL for ib_post_recv when appropriate Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/md/bcache/writeback.c')
-rw-r--r--drivers/md/bcache/writeback.c125
1 files changed, 91 insertions, 34 deletions
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index ad45ebe1a74b..481d4cf38ac0 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -27,7 +27,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc)
* flash-only devices
*/
uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
- bcache_flash_devs_sectors_dirty(c);
+ atomic_long_read(&c->flash_dev_dirty_sectors);
/*
* Unfortunately there is no control of global dirty data. If the
@@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc)
dc->writeback_rate_proportional = proportional_scaled;
dc->writeback_rate_integral_scaled = integral_scaled;
- dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
- dc->writeback_rate.rate = new_rate;
+ dc->writeback_rate_change = new_rate -
+ atomic_long_read(&dc->writeback_rate.rate);
+ atomic_long_set(&dc->writeback_rate.rate, new_rate);
dc->writeback_rate_target = target;
}
+static bool set_at_max_writeback_rate(struct cache_set *c,
+ struct cached_dev *dc)
+{
+ /*
+ * Idle_counter is increased everytime when update_writeback_rate() is
+ * called. If all backing devices attached to the same cache set have
+ * identical dc->writeback_rate_update_seconds values, it is about 6
+ * rounds of update_writeback_rate() on each backing device before
+ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
+ * to each dc->writeback_rate.rate.
+ * In order to avoid extra locking cost for counting exact dirty cached
+ * devices number, c->attached_dev_nr is used to calculate the idle
+ * throushold. It might be bigger if not all cached device are in write-
+ * back mode, but it still works well with limited extra rounds of
+ * update_writeback_rate().
+ */
+ if (atomic_inc_return(&c->idle_counter) <
+ atomic_read(&c->attached_dev_nr) * 6)
+ return false;
+
+ if (atomic_read(&c->at_max_writeback_rate) != 1)
+ atomic_set(&c->at_max_writeback_rate, 1);
+
+ atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
+
+ /* keep writeback_rate_target as existing value */
+ dc->writeback_rate_proportional = 0;
+ dc->writeback_rate_integral_scaled = 0;
+ dc->writeback_rate_change = 0;
+
+ /*
+ * Check c->idle_counter and c->at_max_writeback_rate agagain in case
+ * new I/O arrives during before set_at_max_writeback_rate() returns.
+ * Then the writeback rate is set to 1, and its new value should be
+ * decided via __update_writeback_rate().
+ */
+ if ((atomic_read(&c->idle_counter) <
+ atomic_read(&c->attached_dev_nr) * 6) ||
+ !atomic_read(&c->at_max_writeback_rate))
+ return false;
+
+ return true;
+}
+
static void update_writeback_rate(struct work_struct *work)
{
struct cached_dev *dc = container_of(to_delayed_work(work),
@@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work)
return;
}
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
- dc->writeback_percent)
- __update_writeback_rate(dc);
+ if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
+ /*
+ * If the whole cache set is idle, set_at_max_writeback_rate()
+ * will set writeback rate to a max number. Then it is
+ * unncessary to update writeback rate for an idle cache set
+ * in maximum writeback rate number(s).
+ */
+ if (!set_at_max_writeback_rate(c, dc)) {
+ down_read(&dc->writeback_lock);
+ __update_writeback_rate(dc);
+ up_read(&dc->writeback_lock);
+ }
+ }
- up_read(&dc->writeback_lock);
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc)
delay = writeback_delay(dc, size);
- /* If the control system would wait for at least half a
- * second, and there's been no reqs hitting the backing disk
- * for awhile: use an alternate mode where we have at most
- * one contiguous set of writebacks in flight at a time. If
- * someone wants to do IO it will be quick, as it will only
- * have to contend with one operation in flight, and we'll
- * be round-tripping data to the backing disk as quickly as
- * it can accept it.
- */
- if (delay >= HZ / 2) {
- /* 3 means at least 1.5 seconds, up to 7.5 if we
- * have slowed way down.
- */
- if (atomic_inc_return(&dc->backing_idle) >= 3) {
- /* Wait for current I/Os to finish */
- closure_sync(&cl);
- /* And immediately launch a new set. */
- delay = 0;
- }
- }
-
while (!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
delay) {
@@ -476,6 +507,9 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
if (!d)
return;
+ if (UUID_FLASH_ONLY(&c->uuids[inode]))
+ atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors);
+
stripe = offset_to_stripe(d, offset);
stripe_offset = offset & (d->stripe_size - 1);
@@ -673,10 +707,14 @@ static int bch_writeback_thread(void *arg)
}
/* Init */
+#define INIT_KEYS_EACH_TIME 500000
+#define INIT_KEYS_SLEEP_MS 100
struct sectors_dirty_init {
struct btree_op op;
unsigned inode;
+ size_t count;
+ struct bkey start;
};
static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
@@ -691,18 +729,37 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
KEY_START(k), KEY_SIZE(k));
+ op->count++;
+ if (atomic_read(&b->c->search_inflight) &&
+ !(op->count % INIT_KEYS_EACH_TIME)) {
+ bkey_copy_key(&op->start, k);
+ return -EAGAIN;
+ }
+
return MAP_CONTINUE;
}
void bch_sectors_dirty_init(struct bcache_device *d)
{
struct sectors_dirty_init op;
+ int ret;
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
-
- bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0),
- sectors_dirty_init_fn, 0);
+ op.count = 0;
+ op.start = KEY(op.inode, 0, 0);
+
+ do {
+ ret = bch_btree_map_keys(&op.op, d->c, &op.start,
+ sectors_dirty_init_fn, 0);
+ if (ret == -EAGAIN)
+ schedule_timeout_interruptible(
+ msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
+ else if (ret < 0) {
+ pr_warn("sectors dirty init failed, ret=%d!", ret);
+ break;
+ }
+ } while (ret == -EAGAIN);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
@@ -715,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_running = true;
dc->writeback_percent = 10;
dc->writeback_delay = 30;
- dc->writeback_rate.rate = 1024;
+ atomic_long_set(&dc->writeback_rate.rate, 1024);
dc->writeback_rate_minimum = 8;
dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;