diff options
author | Kent Overstreet <koverstreet@google.com> | 2013-06-27 04:25:38 +0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-08-29 20:47:40 +0400 |
commit | ae61fd4496f9d9290b9e84fc373818b2ee780137 (patch) | |
tree | 0b84deba412f79533e43c20601dad2d9d6eb2909 | |
parent | ba5c60fc8f5f574c7cec70740ca19d358b780c57 (diff) | |
download | linux-ae61fd4496f9d9290b9e84fc373818b2ee780137.tar.xz |
bcache: FUA fixes
commit e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 upstream.
Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node
writes have... weird ordering requirements.
Signed-off-by: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/md/bcache/btree.c | 23 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 13 |
3 files changed, 34 insertions, 4 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7b687a6f3dec..833c590806ba 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -326,10 +326,25 @@ static void do_btree_write(struct btree *b) i->csum = btree_csum_set(b, i); btree_bio_init(b); - b->bio->bi_rw = REQ_META|WRITE_SYNC; + b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); + /* + * If we're appending to a leaf node, we don't technically need FUA - + * this write just needs to be persisted before the next journal write, + * which will be marked FLUSH|FUA. + * + * Similarly if we're writing a new btree root - the pointer is going to + * be in the next journal entry. + * + * But if we're writing a new btree node (that isn't a root) or + * appending to a non leaf btree node, we need either FUA or a flush + * when we write the parent with the new pointer. FUA is cheaper than a + * flush, and writes appending to leaf nodes aren't blocking anything so + * just make all btree node writes FUA to keep things sane. + */ + bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); @@ -2142,6 +2157,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) void bch_btree_set_root(struct btree *b) { unsigned i; + struct closure cl; + + closure_init_stack(&cl); BUG_ON(!b->written); @@ -2155,8 +2173,9 @@ void bch_btree_set_root(struct btree *b) b->c->root = b; __bkey_put(b->c, &b->key); - bch_journal_meta(b->c, NULL); + bch_journal_meta(b->c, &cl); pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); + closure_sync(&cl); } /* Cache lookup */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8a54d3b4f517..b49abb246bb6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -622,7 +622,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_sector = PTR_OFFSET(k, i); bio->bi_bdev = ca->bdev; - bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; bio->bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2f36743ce708..afb9a998a737 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1053,9 +1053,20 @@ static void request_write(struct cached_dev *dc, struct search *s) trace_bcache_writethrough(s->orig_bio); closure_bio_submit(bio, cl, s->d); } else { - s->op.cache_bio = bio; trace_bcache_writeback(s->orig_bio); bch_writeback_add(dc, bio_sectors(bio)); + + if (s->op.flush_journal) { + /* Also need to send a flush to the backing device */ + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + bio->bi_size = 0; + bio->bi_vcnt = 0; + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + } } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); |