From 5794351146199b9ac67a5ab1beab82be8bfd7b5d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Apr 2013 13:58:35 -0700 Subject: bcache: Refactor btree io The most significant change is that btree reads are now done synchronously, instead of asynchronously and doing the post read stuff from a workqueue. This was originally done because we can't block on IO under generic_make_request(). But - we already have a mechanism to punt cache lookups to workqueue if needed, so if we just use that we don't have to deal with the complexity of doing things asynchronously. The main benefit is this makes the locking situation saner; we can hold our write lock on the btree node until we're finished reading it, and we don't need that btree_node_read_done() flag anymore. Also, for writes, btree_write() was broken out into btree_node_write() and btree_leaf_dirty() - the old code with the boolean argument was dumb and confusing. The prio_blocked mechanism was improved a bit too, now the only counter is in struct btree_write, we don't mess with transfering a count from struct btree anymore. This required changing garbage collection to block prios at the start and unblock when it finishes, which is cleaner than what it was doing anyways (the old code had mostly the same effect, but was doing it in a convoluted way) And the btree iter btree_node_read_done() uses was converted to a real mempool. Signed-off-by: Kent Overstreet --- drivers/md/bcache/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/bcache/debug.c') diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 89fd5204924e..ae6096c6845d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -144,7 +144,7 @@ void bch_btree_verify(struct btree *b, struct bset *new) v->written = 0; v->level = b->level; - bch_btree_read(v); + bch_btree_node_read(v); closure_wait_event(&v->io.wait, &cl, atomic_read(&b->io.cl.remaining) == -1); @@ -512,7 +512,7 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, bch_btree_sort(b); fill->written = 0; - bch_btree_read_done(&fill->io.cl); + bch_btree_node_read_done(fill); if (b->sets[0].data->keys != fill->sets[0].data->keys || memcmp(b->sets[0].data->start, -- cgit v1.2.3 From 85b1492ee113486d871de7676a61f506a43ca475 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 14 May 2013 20:33:16 -0700 Subject: bcache: Rip out pkey()/pbtree() Old gcc doesnt like the struct hack, and it is kind of ugly. So finish off the work to convert pr_debug() statements to tracepoints, and delete pkey()/pbtree(). Signed-off-by: Kent Overstreet --- drivers/md/bcache/bset.c | 16 ++++++++++++---- drivers/md/bcache/btree.c | 21 ++++++--------------- drivers/md/bcache/btree.h | 7 +++++++ drivers/md/bcache/debug.c | 40 +++++++++++++++++++++++++--------------- drivers/md/bcache/debug.h | 11 ++--------- drivers/md/bcache/super.c | 5 +++-- drivers/md/bcache/trace.c | 2 ++ include/trace/events/bcache.h | 33 +++++++++++++++++++++++++++++++++ 8 files changed, 90 insertions(+), 45 deletions(-) (limited to 'drivers/md/bcache/debug.c') diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index cb4578a327b9..e9399ed7f688 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -78,6 +78,7 @@ struct bkey *bch_keylist_pop(struct keylist *l) bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) { unsigned i; + char buf[80]; if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) goto bad; @@ -102,7 +103,8 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) return false; bad: - cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k)); + bch_bkey_to_text(buf, sizeof(buf), k); + cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k)); return true; } @@ -162,10 +164,16 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) #ifdef CONFIG_BCACHE_EDEBUG bug: mutex_unlock(&b->c->bucket_lock); - btree_bug(b, + + { + char buf[80]; + + bch_bkey_to_text(buf, sizeof(buf), k); + btree_bug(b, "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", - pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), - g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + } return true; #endif } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 218d486259a3..53a0f4ef4e32 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1770,7 +1770,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, { struct bset *i = b->sets[b->nsets].data; struct bkey *m, *prev; - const char *status = "insert"; + unsigned status = BTREE_INSERT_STATUS_INSERT; BUG_ON(bkey_cmp(k, &b->key) > 0); BUG_ON(b->level && !KEY_PTRS(k)); @@ -1803,17 +1803,17 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, goto insert; /* prev is in the tree, if we merge we're done */ - status = "back merging"; + status = BTREE_INSERT_STATUS_BACK_MERGE; if (prev && bch_bkey_try_merge(b, prev, k)) goto merged; - status = "overwrote front"; + status = BTREE_INSERT_STATUS_OVERWROTE; if (m != end(i) && KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) goto copy; - status = "front merge"; + status = BTREE_INSERT_STATUS_FRONT_MERGE; if (m != end(i) && bch_bkey_try_merge(b, k, m)) goto copy; @@ -1823,16 +1823,12 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, insert: shift_keys(b, m, k); copy: bkey_copy(m, k); merged: - bch_check_keys(b, "%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); - bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); + bch_check_keys(b, "%u for %s", status, op_type(op)); if (b->level && !KEY_OFFSET(k)) btree_current_write(b)->prio_blocked++; - pr_debug("%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); + trace_bcache_btree_insert_key(b, k, op->type, status); return true; } @@ -2234,9 +2230,6 @@ int bch_btree_search_recurse(struct btree *b, struct btree_op *op) struct btree_iter iter; bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0)); - pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode, - (uint64_t) bio->bi_sector); - do { k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); if (!k) { @@ -2302,8 +2295,6 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (buf->key_predicate(buf, k)) { struct keybuf_key *w; - pr_debug("%s", pkey(k)); - spin_lock(&buf->lock); w = array_alloc(&buf->freelist); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 809bd77847a2..2b016b93cad4 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -271,6 +271,13 @@ struct btree_op { BKEY_PADDED(replace); }; +enum { + BTREE_INSERT_STATUS_INSERT, + BTREE_INSERT_STATUS_BACK_MERGE, + BTREE_INSERT_STATUS_OVERWROTE, + BTREE_INSERT_STATUS_FRONT_MERGE, +}; + void bch_btree_op_init_stack(struct btree_op *); static inline void rw_lock(bool w, struct btree *b, int level) diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index ae6096c6845d..82e3a07771ec 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -47,11 +47,10 @@ const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) return ""; } -struct keyprint_hack bch_pkey(const struct bkey *k) +int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k) { unsigned i = 0; - struct keyprint_hack r; - char *out = r.s, *end = r.s + KEYHACK_SIZE; + char *out = buf, *end = buf + size; #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) @@ -75,16 +74,14 @@ struct keyprint_hack bch_pkey(const struct bkey *k) if (KEY_CSUM(k)) p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); #undef p - return r; + return out - buf; } -struct keyprint_hack bch_pbtree(const struct btree *b) +int bch_btree_to_text(char *buf, size_t size, const struct btree *b) { - struct keyprint_hack r; - - snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0), - b->level, b->c->root ? b->c->root->level : -1); - return r; + return scnprintf(buf, size, "%zu level %i/%i", + PTR_BUCKET_NR(b->c, &b->key, 0), + b->level, b->c->root ? b->c->root->level : -1); } #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG) @@ -100,10 +97,12 @@ static void dump_bset(struct btree *b, struct bset *i) { struct bkey *k; unsigned j; + char buf[80]; for (k = i->start; k < end(i); k = bkey_next(k)) { + bch_bkey_to_text(buf, sizeof(buf), k); printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), - (uint64_t *) k - i->d, i->keys, pkey(k)); + (uint64_t *) k - i->d, i->keys, buf); for (j = 0; j < KEY_PTRS(k); j++) { size_t n = PTR_BUCKET_NR(b->c, k, j); @@ -252,6 +251,7 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt, va_list args) { unsigned i; + char buf[80]; console_lock(); @@ -262,7 +262,8 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt, console_unlock(); - panic("at %s\n", pbtree(b)); + bch_btree_to_text(buf, sizeof(buf), b); + panic("at %s\n", buf); } void bch_check_key_order_msg(struct btree *b, struct bset *i, @@ -337,6 +338,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, { struct dump_iterator *i = file->private_data; ssize_t ret = 0; + char kbuf[80]; while (size) { struct keybuf_key *w; @@ -359,7 +361,8 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, if (!w) break; - i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key)); + bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key); + i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf); bch_keybuf_del(&i->keys, w); } @@ -526,10 +529,17 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, k < end(i); k = bkey_next(k), l = bkey_next(l)) if (bkey_cmp(k, l) || - KEY_SIZE(k) != KEY_SIZE(l)) + KEY_SIZE(k) != KEY_SIZE(l)) { + char buf1[80]; + char buf2[80]; + + bch_bkey_to_text(buf1, sizeof(buf1), k); + bch_bkey_to_text(buf2, sizeof(buf2), l); + pr_err("key %zi differs: %s != %s", (uint64_t *) k - i->d, - pkey(k), pkey(l)); + buf1, buf2); + } for (j = 0; j < 3; j++) { pr_err("**** Set %i ****", j); diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h index f9378a218148..1c39b5a2489b 100644 --- a/drivers/md/bcache/debug.h +++ b/drivers/md/bcache/debug.h @@ -3,15 +3,8 @@ /* Btree/bkey debug printing */ -#define KEYHACK_SIZE 80 -struct keyprint_hack { - char s[KEYHACK_SIZE]; -}; - -struct keyprint_hack bch_pkey(const struct bkey *k); -struct keyprint_hack bch_pbtree(const struct btree *b); -#define pkey(k) (&bch_pkey(k).s[0]) -#define pbtree(b) (&bch_pbtree(b).s[0]) +int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k); +int bch_btree_to_text(char *buf, size_t size, const struct btree *b); #ifdef CONFIG_BCACHE_EDEBUG diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 47bc13745068..f24c2e0cbb1c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -343,6 +343,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, struct closure *cl = &c->uuid_write.cl; struct uuid_entry *u; unsigned i; + char buf[80]; BUG_ON(!parent); closure_lock(&c->uuid_write, parent); @@ -363,8 +364,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw, break; } - pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", - pkey(&c->uuid_bucket)); + bch_bkey_to_text(buf, sizeof(buf), k); + pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index 7f4f38aa16ae..f7b6c197f90f 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -40,6 +40,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key); + EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index c9952b36fcea..5ebda976ea93 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -305,6 +305,39 @@ DEFINE_EVENT(bkey, bcache_gc_copy_collision, TP_ARGS(k) ); +TRACE_EVENT(bcache_btree_insert_key, + TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status), + TP_ARGS(b, k, op, status), + + TP_STRUCT__entry( + __field(u64, btree_node ) + __field(u32, btree_level ) + __field(u32, inode ) + __field(u64, offset ) + __field(u32, size ) + __field(u8, dirty ) + __field(u8, op ) + __field(u8, status ) + ), + + TP_fast_assign( + __entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->btree_level = b->level; + __entry->inode = KEY_INODE(k); + __entry->offset = KEY_OFFSET(k); + __entry->size = KEY_SIZE(k); + __entry->dirty = KEY_DIRTY(k); + __entry->op = op; + __entry->status = status; + ), + + TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u", + __entry->status, __entry->op, + __entry->btree_node, __entry->btree_level, + __entry->inode, __entry->offset, + __entry->size, __entry->dirty) +); + DECLARE_EVENT_CLASS(btree_split, TP_PROTO(struct btree *b, unsigned keys), TP_ARGS(b, keys), -- cgit v1.2.3 From 72c270612bd33192fa836ad0f2939af1ca218292 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2013 06:24:39 -0700 Subject: bcache: Write out full stripes Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 3 +-- drivers/md/bcache/btree.c | 19 ++++++++++--------- drivers/md/bcache/btree.h | 9 +++++---- drivers/md/bcache/debug.c | 4 ++-- drivers/md/bcache/movinggc.c | 5 +++-- drivers/md/bcache/request.c | 23 +++++++--------------- drivers/md/bcache/sysfs.c | 8 ++++++++ drivers/md/bcache/writeback.c | 44 +++++++++++++++++++++++++++++++++++++++++-- drivers/md/bcache/writeback.h | 43 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 121 insertions(+), 37 deletions(-) (limited to 'drivers/md/bcache/debug.c') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dbddef0cdb59..342ba86c6e4f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -387,8 +387,6 @@ struct keybuf_key { typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); struct keybuf { - keybuf_pred_fn *key_predicate; - struct bkey last_scanned; spinlock_t lock; @@ -532,6 +530,7 @@ struct cached_dev { unsigned sequential_merge:1; unsigned verify:1; + unsigned partial_stripes_expensive:1; unsigned writeback_metadata:1; unsigned writeback_running:1; unsigned char writeback_percent; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index b93cf56260a4..09fb8a2f43da 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2252,7 +2252,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, } static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, - struct keybuf *buf, struct bkey *end) + struct keybuf *buf, struct bkey *end, + keybuf_pred_fn *pred) { struct btree_iter iter; bch_btree_iter_init(b, &iter, &buf->last_scanned); @@ -2271,7 +2272,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (bkey_cmp(&buf->last_scanned, end) >= 0) break; - if (buf->key_predicate(buf, k)) { + if (pred(buf, k)) { struct keybuf_key *w; spin_lock(&buf->lock); @@ -2290,7 +2291,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (!k) break; - btree(refill_keybuf, k, b, op, buf, end); + btree(refill_keybuf, k, b, op, buf, end, pred); /* * Might get an error here, but can't really do anything * and it'll get logged elsewhere. Just read what we @@ -2308,7 +2309,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, } void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, - struct bkey *end) + struct bkey *end, keybuf_pred_fn *pred) { struct bkey start = buf->last_scanned; struct btree_op op; @@ -2316,7 +2317,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, cond_resched(); - btree_root(refill_keybuf, c, &op, buf, end); + btree_root(refill_keybuf, c, &op, buf, end, pred); closure_sync(&op.cl); pr_debug("found %s keys from %llu:%llu to %llu:%llu", @@ -2402,7 +2403,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf) struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, struct keybuf *buf, - struct bkey *end) + struct bkey *end, + keybuf_pred_fn *pred) { struct keybuf_key *ret; @@ -2416,15 +2418,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, break; } - bch_refill_keybuf(c, buf, end); + bch_refill_keybuf(c, buf, end, pred); } return ret; } -void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) +void bch_keybuf_init(struct keybuf *buf) { - buf->key_predicate = fn; buf->last_scanned = MAX_KEY; buf->keys = RB_ROOT; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 2b016b93cad4..f66d69a7baf1 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -391,13 +391,14 @@ void bch_moving_gc(struct closure *); int bch_btree_check(struct cache_set *, struct btree_op *); uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); -void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); -void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); +void bch_keybuf_init(struct keybuf *); +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *, + keybuf_pred_fn *); bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, struct bkey *); void bch_keybuf_del(struct keybuf *, struct keybuf_key *); struct keybuf_key *bch_keybuf_next(struct keybuf *); -struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, - struct keybuf *, struct bkey *); +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *, + struct bkey *, keybuf_pred_fn *); #endif diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 82e3a07771ec..1c8fd319846e 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -357,7 +357,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, if (i->bytes) break; - w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred); if (!w) break; @@ -380,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file) file->private_data = i; i->c = c; - bch_keybuf_init(&i->keys, dump_pred); + bch_keybuf_init(&i->keys); i->keys.last_scanned = KEY(0, 0, 0); return 0; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 04f6b97ffda6..a241e9fd4f7f 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -136,7 +136,8 @@ static void read_moving(struct closure *cl) /* XXX: if we error, background writeback could stall indefinitely */ while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { - w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, + &MAX_KEY, moving_pred); if (!w) break; @@ -248,5 +249,5 @@ void bch_moving_gc(struct closure *cl) void bch_moving_init_cache_set(struct cache_set *c) { - bch_keybuf_init(&c->moving_gc_keys, moving_pred); + bch_keybuf_init(&c->moving_gc_keys); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 017c95fced8e..17bd59704eba 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -22,8 +22,6 @@ #define CUTOFF_CACHE_ADD 95 #define CUTOFF_CACHE_READA 90 -#define CUTOFF_WRITEBACK 50 -#define CUTOFF_WRITEBACK_SYNC 75 struct kmem_cache *bch_search_cache; @@ -998,17 +996,6 @@ static void cached_dev_write_complete(struct closure *cl) cached_dev_bio_complete(cl); } -static bool should_writeback(struct cached_dev *dc, struct bio *bio) -{ - unsigned threshold = (bio->bi_rw & REQ_SYNC) - ? CUTOFF_WRITEBACK_SYNC - : CUTOFF_WRITEBACK; - - return !atomic_read(&dc->disk.detaching) && - cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && - dc->disk.c->gc_stats.in_use < threshold; -} - static void request_write(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; @@ -1030,12 +1017,16 @@ static void request_write(struct cached_dev *dc, struct search *s) if (bio->bi_rw & REQ_DISCARD) goto skip; + if (should_writeback(dc, s->orig_bio, + cache_mode(dc, bio), + s->op.skip)) { + s->op.skip = false; + s->writeback = true; + } + if (s->op.skip) goto skip; - if (should_writeback(dc, s->orig_bio)) - s->writeback = true; - trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); if (!s->writeback) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index cf8d91ec3238..70c6dff0d0cd 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -81,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse); rw_attribute(writeback_rate_d_smooth); read_attribute(writeback_rate_debug); +read_attribute(stripe_size); +read_attribute(partial_stripes_expensive); + rw_attribute(synchronous); rw_attribute(journal_delay_ms); rw_attribute(discard); @@ -147,6 +150,9 @@ SHOW(__bch_cached_dev) sysfs_hprint(dirty_data, bcache_dev_sectors_dirty(&dc->disk) << 9); + sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9); + var_printf(partial_stripes_expensive, "%u"); + var_printf(sequential_merge, "%i"); var_hprint(sequential_cutoff); var_hprint(readahead); @@ -286,6 +292,8 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_d_smooth, &sysfs_writeback_rate_debug, &sysfs_dirty_data, + &sysfs_stripe_size, + &sysfs_partial_stripes_expensive, &sysfs_sequential_cutoff, &sysfs_sequential_merge, &sysfs_clear_stats, diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index dd815475c524..d81ee5ccc726 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -108,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) return KEY_DIRTY(k); } +static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) +{ + uint64_t stripe; + unsigned nr_sectors = KEY_SIZE(k); + struct cached_dev *dc = container_of(buf, struct cached_dev, + writeback_keys); + unsigned stripe_size = 1 << dc->disk.stripe_size_bits; + + if (!KEY_DIRTY(k)) + return false; + + stripe = KEY_START(k) >> dc->disk.stripe_size_bits; + while (1) { + if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) != + stripe_size) + return false; + + if (nr_sectors <= stripe_size) + return true; + + nr_sectors -= stripe_size; + stripe++; + } +} + static void dirty_init(struct keybuf_key *w) { struct dirty_io *io = w->private; @@ -152,7 +177,22 @@ static void refill_dirty(struct closure *cl) searched_from_start = true; } - bch_refill_keybuf(dc->disk.c, buf, &end); + if (dc->partial_stripes_expensive) { + uint64_t i; + + for (i = 0; i < dc->disk.nr_stripes; i++) + if (atomic_read(dc->disk.stripe_sectors_dirty + i) == + 1 << dc->disk.stripe_size_bits) + goto full_stripes; + + goto normal_refill; +full_stripes: + bch_refill_keybuf(dc->disk.c, buf, &end, + dirty_full_stripe_pred); + } else { +normal_refill: + bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); + } if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { /* Searched the entire btree - delay awhile */ @@ -446,7 +486,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) closure_init_unlocked(&dc->writeback); init_rwsem(&dc->writeback_lock); - bch_keybuf_init(&dc->writeback_keys, dirty_pred); + bch_keybuf_init(&dc->writeback_keys); dc->writeback_metadata = true; dc->writeback_running = true; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 5ce9771df047..c91f61bb95b6 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -1,6 +1,9 @@ #ifndef _BCACHE_WRITEBACK_H #define _BCACHE_WRITEBACK_H +#define CUTOFF_WRITEBACK 40 +#define CUTOFF_WRITEBACK_SYNC 70 + static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) { uint64_t i, ret = 0; @@ -11,6 +14,46 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline bool bcache_dev_stripe_dirty(struct bcache_device *d, + uint64_t offset, + unsigned nr_sectors) +{ + uint64_t stripe = offset >> d->stripe_size_bits; + + while (1) { + if (atomic_read(d->stripe_sectors_dirty + stripe)) + return true; + + if (nr_sectors <= 1 << d->stripe_size_bits) + return false; + + nr_sectors -= 1 << d->stripe_size_bits; + stripe++; + } +} + +static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, + unsigned cache_mode, bool would_skip) +{ + unsigned in_use = dc->disk.c->gc_stats.in_use; + + if (cache_mode != CACHE_MODE_WRITEBACK || + atomic_read(&dc->disk.detaching) || + in_use > CUTOFF_WRITEBACK_SYNC) + return false; + + if (dc->partial_stripes_expensive && + bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector, + bio_sectors(bio))) + return true; + + if (would_skip) + return false; + + return bio->bi_rw & REQ_SYNC || + in_use <= CUTOFF_WRITEBACK; +} + void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); void bch_writeback_queue(struct cached_dev *); void bch_writeback_add(struct cached_dev *); -- cgit v1.2.3 From f3059a54610f6c516c0942d58b9435921768ce2d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 May 2013 17:13:45 -0700 Subject: bcache: Delete fuzz tester This code has rotted and it hasn't been used in ages anyways. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 4 +- drivers/md/bcache/btree.h | 2 - drivers/md/bcache/debug.c | 148 ---------------------------------------------- 3 files changed, 2 insertions(+), 152 deletions(-) (limited to 'drivers/md/bcache/debug.c') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 04636a70ffaa..e0cca3673d0f 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -135,7 +135,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) return crc ^ 0xffffffffffffffffULL; } -void bch_btree_node_read_done(struct btree *b) +static void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = b->sets[0].data; @@ -1834,7 +1834,7 @@ merged: return true; } -bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) +static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) { bool ret = false; struct bkey *k; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index f66d69a7baf1..3333d3723633 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -369,7 +369,6 @@ static inline bool should_split(struct btree *b) } void bch_btree_node_read(struct btree *); -void bch_btree_node_read_done(struct btree *); void bch_btree_node_write(struct btree *, struct closure *); void bch_cannibalize_unlock(struct cache_set *, struct closure *); @@ -378,7 +377,6 @@ struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *); struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, struct btree_op *); -bool bch_btree_insert_keys(struct btree *, struct btree_op *); bool bch_btree_insert_check_key(struct btree *, struct btree_op *, struct bio *); int bch_btree_insert(struct btree_op *, struct cache_set *); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 1c8fd319846e..ba2ceee0fdbb 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -412,149 +412,6 @@ void bch_debug_init_cache_set(struct cache_set *c) #endif -/* Fuzz tester has rotted: */ -#if 0 - -static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, - const char *buffer, size_t size) -{ - void dump(struct btree *b) - { - struct bset *i; - - for (i = b->sets[0].data; - index(i, b) < btree_blocks(b) && - i->seq == b->sets[0].data->seq; - i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c)) - dump_bset(b, i); - } - - struct cache_sb *sb; - struct cache_set *c; - struct btree *all[3], *b, *fill, *orig; - int j; - - struct btree_op op; - bch_btree_op_init_stack(&op); - - sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL); - if (!sb) - return -ENOMEM; - - sb->bucket_size = 128; - sb->block_size = 4; - - c = bch_cache_set_alloc(sb); - if (!c) - return -ENOMEM; - - for (j = 0; j < 3; j++) { - BUG_ON(list_empty(&c->btree_cache)); - all[j] = list_first_entry(&c->btree_cache, struct btree, list); - list_del_init(&all[j]->list); - - all[j]->key = KEY(0, 0, c->sb.bucket_size); - bkey_copy_key(&all[j]->key, &MAX_KEY); - } - - b = all[0]; - fill = all[1]; - orig = all[2]; - - while (1) { - for (j = 0; j < 3; j++) - all[j]->written = all[j]->nsets = 0; - - bch_bset_init_next(b); - - while (1) { - struct bset *i = write_block(b); - struct bkey *k = op.keys.top; - unsigned rand; - - bkey_init(k); - rand = get_random_int(); - - op.type = rand & 1 - ? BTREE_INSERT - : BTREE_REPLACE; - rand >>= 1; - - SET_KEY_SIZE(k, bucket_remainder(c, rand)); - rand >>= c->bucket_bits; - rand &= 1024 * 512 - 1; - rand += c->sb.bucket_size; - SET_KEY_OFFSET(k, rand); -#if 0 - SET_KEY_PTRS(k, 1); -#endif - bch_keylist_push(&op.keys); - bch_btree_insert_keys(b, &op); - - if (should_split(b) || - set_blocks(i, b->c) != - __set_blocks(i, i->keys + 15, b->c)) { - i->csum = csum_set(i); - - memcpy(write_block(fill), - i, set_bytes(i)); - - b->written += set_blocks(i, b->c); - fill->written = b->written; - if (b->written == btree_blocks(b)) - break; - - bch_btree_sort_lazy(b); - bch_bset_init_next(b); - } - } - - memcpy(orig->sets[0].data, - fill->sets[0].data, - btree_bytes(c)); - - bch_btree_sort(b); - fill->written = 0; - bch_btree_node_read_done(fill); - - if (b->sets[0].data->keys != fill->sets[0].data->keys || - memcmp(b->sets[0].data->start, - fill->sets[0].data->start, - b->sets[0].data->keys * sizeof(uint64_t))) { - struct bset *i = b->sets[0].data; - struct bkey *k, *l; - - for (k = i->start, - l = fill->sets[0].data->start; - k < end(i); - k = bkey_next(k), l = bkey_next(l)) - if (bkey_cmp(k, l) || - KEY_SIZE(k) != KEY_SIZE(l)) { - char buf1[80]; - char buf2[80]; - - bch_bkey_to_text(buf1, sizeof(buf1), k); - bch_bkey_to_text(buf2, sizeof(buf2), l); - - pr_err("key %zi differs: %s != %s", - (uint64_t *) k - i->d, - buf1, buf2); - } - - for (j = 0; j < 3; j++) { - pr_err("**** Set %i ****", j); - dump(all[j]); - } - panic("\n"); - } - - pr_info("fuzz complete: %i keys", b->sets[0].data->keys); - } -} - -kobj_attribute_write(fuzz, btree_fuzz); -#endif - void bch_debug_exit(void) { if (!IS_ERR_OR_NULL(debug)) @@ -564,11 +421,6 @@ void bch_debug_exit(void) int __init bch_debug_init(struct kobject *kobj) { int ret = 0; -#if 0 - ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr); - if (ret) - return ret; -#endif debug = debugfs_create_dir("bcache", NULL); return ret; -- cgit v1.2.3 From 8e51e414a3c6d92ef2cc41720c67342a8e2c0bf7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2013 18:15:57 -0700 Subject: bcache: Use standard utility code Some of bcache's utility code has made it into the rest of the kernel, so drop the bcache versions. Bcache used to have a workaround for allocating from a bio set under generic_make_request() (if you allocated more than once, the bios you already allocated would get stuck on current->bio_list when you submitted, and you'd risk deadlock) - bcache would mask out __GFP_WAIT when allocating bios under generic_make_request() so that allocation could fail and it could retry from workqueue. But bio_alloc_bioset() has a workaround now, so we can drop this hack and the associated error handling. Signed-off-by: Kent Overstreet --- drivers/md/bcache/btree.c | 7 +--- drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/io.c | 64 +++++++++++-------------------- drivers/md/bcache/movinggc.c | 7 ++-- drivers/md/bcache/request.c | 87 +++++++++---------------------------------- drivers/md/bcache/util.c | 17 --------- drivers/md/bcache/util.h | 4 -- drivers/md/bcache/writeback.c | 7 ++-- 8 files changed, 51 insertions(+), 144 deletions(-) (limited to 'drivers/md/bcache/debug.c') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e0cca3673d0f..15b58239c683 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -350,7 +350,7 @@ static void do_btree_node_write(struct btree *b) bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); - if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bio_alloc_pages(b->bio, GFP_NOIO)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); @@ -1865,7 +1865,7 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, should_split(b)) goto out; - op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio)); + op->replace = KEY(op->inode, bio_end_sector(bio), bio_sectors(bio)); SET_KEY_PTRS(&op->replace, 1); get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t)); @@ -2194,9 +2194,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, KEY_OFFSET(k) - bio->bi_sector); n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); - if (!n) - return -EAGAIN; - if (n == bio) op->lookup_done = true; diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index ba2ceee0fdbb..88e6411eab4f 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -199,7 +199,7 @@ void bch_data_verify(struct search *s) if (!check) return; - if (bch_bio_alloc_pages(check, GFP_NOIO)) + if (bio_alloc_pages(check, GFP_NOIO)) goto out_put; check->bi_rw = READ_SYNC; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 0f6d69658b61..9056632995b1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -68,13 +68,6 @@ static void bch_generic_make_request_hack(struct bio *bio) * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a * bvec boundry; it is the caller's responsibility to ensure that @bio is not * freed before the split. - * - * If bch_bio_split() is running under generic_make_request(), it's not safe to - * allocate more than one bio from the same bio set. Therefore, if it is running - * under generic_make_request() it masks out __GFP_WAIT when doing the - * allocation. The caller must check for failure if there's any possibility of - * it being called from under generic_make_request(); it is then the caller's - * responsibility to retry from a safe context (by e.g. punting to workqueue). */ struct bio *bch_bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) @@ -85,15 +78,6 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, BUG_ON(sectors <= 0); - /* - * If we're being called from underneath generic_make_request() and we - * already allocated any bios from this bio set, we risk deadlock if we - * use the mempool. So instead, we possibly fail and let the caller punt - * to workqueue or somesuch and retry in a safe context. - */ - if (current->bio_list) - gfp &= ~__GFP_WAIT; - if (sectors >= bio_sectors(bio)) return bio; @@ -164,17 +148,18 @@ static unsigned bch_bio_max_sectors(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, queue_max_segments(q)); - struct bio_vec *bv, *end = bio_iovec(bio) + - min_t(int, bio_segments(bio), max_segments); if (bio->bi_rw & REQ_DISCARD) return min(ret, q->limits.max_discard_sectors); if (bio_segments(bio) > max_segments || q->merge_bvec_fn) { + struct bio_vec *bv; + int i, seg = 0; + ret = 0; - for (bv = bio_iovec(bio); bv < end; bv++) { + bio_for_each_segment(bv, bio, i) { struct bvec_merge_data bvm = { .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_sector, @@ -182,10 +167,14 @@ static unsigned bch_bio_max_sectors(struct bio *bio) .bi_rw = bio->bi_rw, }; + if (seg == max_segments) + break; + if (q->merge_bvec_fn && q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) break; + seg++; ret += bv->bv_len >> 9; } } @@ -222,30 +211,10 @@ static void bch_bio_submit_split_endio(struct bio *bio, int error) closure_put(cl); } -static void __bch_bio_submit_split(struct closure *cl) -{ - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); - struct bio *bio = s->bio, *n; - - do { - n = bch_bio_split(bio, bch_bio_max_sectors(bio), - GFP_NOIO, s->p->bio_split); - if (!n) - continue_at(cl, __bch_bio_submit_split, system_wq); - - n->bi_end_io = bch_bio_submit_split_endio; - n->bi_private = cl; - - closure_get(cl); - bch_generic_make_request_hack(n); - } while (n != bio); - - continue_at(cl, bch_bio_submit_split_done, NULL); -} - void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) { struct bio_split_hook *s; + struct bio *n; if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) goto submit; @@ -254,6 +223,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) goto submit; s = mempool_alloc(p->bio_split_hook, GFP_NOIO); + closure_init(&s->cl, NULL); s->bio = bio; s->p = p; @@ -261,8 +231,18 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) s->bi_private = bio->bi_private; bio_get(bio); - closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); - return; + do { + n = bch_bio_split(bio, bch_bio_max_sectors(bio), + GFP_NOIO, s->p->bio_split); + + n->bi_end_io = bch_bio_submit_split_endio; + n->bi_private = &s->cl; + + closure_get(&s->cl); + bch_generic_make_request_hack(n); + } while (n != bio); + + continue_at(&s->cl, bch_bio_submit_split_done, NULL); submit: bch_generic_make_request_hack(bio); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index a241e9fd4f7f..1a3b4f4786c3 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -46,9 +46,10 @@ static void write_moving_finish(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, s.cl); struct bio *bio = &io->bio.bio; - struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt); + struct bio_vec *bv; + int i; - while (bv-- != bio->bi_io_vec) + bio_for_each_segment_all(bv, bio, i) __free_page(bv->bv_page); if (io->s.op.insert_collision) @@ -158,7 +159,7 @@ static void read_moving(struct closure *cl) bio->bi_rw = READ; bio->bi_end_io = read_moving_endio; - if (bch_bio_alloc_pages(bio, GFP_KERNEL)) + if (bio_alloc_pages(bio, GFP_KERNEL)) goto err; trace_bcache_gc_copy(&w->key); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index bcdf1f782c3e..b6e74d3c8faf 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -509,10 +509,6 @@ static void bch_insert_data_loop(struct closure *cl) goto err; n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); - if (!n) { - __bkey_put(op->c, k); - continue_at(cl, bch_insert_data_loop, bcache_wq); - } n->bi_end_io = bch_insert_data_endio; n->bi_private = cl; @@ -821,53 +817,13 @@ static void request_read_done(struct closure *cl) */ if (s->op.cache_bio) { - struct bio_vec *src, *dst; - unsigned src_offset, dst_offset, bytes; - void *dst_ptr; - bio_reset(s->op.cache_bio); s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; bch_bio_map(s->op.cache_bio, NULL); - src = bio_iovec(s->op.cache_bio); - dst = bio_iovec(s->cache_miss); - src_offset = src->bv_offset; - dst_offset = dst->bv_offset; - dst_ptr = kmap(dst->bv_page); - - while (1) { - if (dst_offset == dst->bv_offset + dst->bv_len) { - kunmap(dst->bv_page); - dst++; - if (dst == bio_iovec_idx(s->cache_miss, - s->cache_miss->bi_vcnt)) - break; - - dst_offset = dst->bv_offset; - dst_ptr = kmap(dst->bv_page); - } - - if (src_offset == src->bv_offset + src->bv_len) { - src++; - if (src == bio_iovec_idx(s->op.cache_bio, - s->op.cache_bio->bi_vcnt)) - BUG(); - - src_offset = src->bv_offset; - } - - bytes = min(dst->bv_offset + dst->bv_len - dst_offset, - src->bv_offset + src->bv_len - src_offset); - - memcpy(dst_ptr + dst_offset, - page_address(src->bv_page) + src_offset, - bytes); - - src_offset += bytes; - dst_offset += bytes; - } + bio_copy_data(s->cache_miss, s->op.cache_bio); bio_put(s->cache_miss); s->cache_miss = NULL; @@ -912,9 +868,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct bio *miss; miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); - if (!miss) - return -EAGAIN; - if (miss == bio) s->op.lookup_done = true; @@ -933,8 +886,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, reada = min(dc->readahead >> 9, sectors - bio_sectors(miss)); - if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) - reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); + if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev)) + reada = bdev_sectors(miss->bi_bdev) - + bio_end_sector(miss); } s->cache_bio_sectors = bio_sectors(miss) + reada; @@ -958,7 +912,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_put; bch_bio_map(s->op.cache_bio, NULL); - if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; s->cache_miss = miss; @@ -1002,7 +956,7 @@ static void request_write(struct cached_dev *dc, struct search *s) struct bio *bio = &s->bio.bio; struct bkey start, end; start = KEY(dc->disk.id, bio->bi_sector, 0); - end = KEY(dc->disk.id, bio_end(bio), 0); + end = KEY(dc->disk.id, bio_end_sector(bio), 0); bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); @@ -1176,7 +1130,7 @@ found: if (i->sequential + bio->bi_size > i->sequential) i->sequential += bio->bi_size; - i->last = bio_end(bio); + i->last = bio_end_sector(bio); i->jiffies = jiffies + msecs_to_jiffies(5000); s->task->sequential_io = i->sequential; @@ -1294,30 +1248,25 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { + struct bio_vec *bv; + int i; + /* Zero fill bio */ - while (bio->bi_idx != bio->bi_vcnt) { - struct bio_vec *bv = bio_iovec(bio); + bio_for_each_segment(bv, bio, i) { unsigned j = min(bv->bv_len >> 9, sectors); void *p = kmap(bv->bv_page); memset(p + bv->bv_offset, 0, j << 9); kunmap(bv->bv_page); - bv->bv_len -= j << 9; - bv->bv_offset += j << 9; - - if (bv->bv_len) - return 0; - - bio->bi_sector += j; - bio->bi_size -= j << 9; - - bio->bi_idx++; - sectors -= j; + sectors -= j; } - s->op.lookup_done = true; + bio_advance(bio, min(sectors << 9, bio->bi_size)); + + if (!bio->bi_size) + s->op.lookup_done = true; return 0; } @@ -1344,8 +1293,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) closure_call(&s->op.cl, btree_read_async, NULL, cl); } else if (bio_has_data(bio) || s->op.skip) { bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, - &KEY(d->id, bio->bi_sector, 0), - &KEY(d->id, bio_end(bio), 0)); + &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio_end_sector(bio), 0)); s->writeback = true; s->op.cache_bio = bio; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index da3a99e85b1e..98eb81159a22 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -228,23 +228,6 @@ start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, } } -int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp) -{ - int i; - struct bio_vec *bv; - - bio_for_each_segment(bv, bio, i) { - bv->bv_page = alloc_page(gfp); - if (!bv->bv_page) { - while (bv-- != bio->bi_io_vec + bio->bi_idx) - __free_page(bv->bv_page); - return -ENOMEM; - } - } - - return 0; -} - /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any * use permitted, subject to terms of PostgreSQL license; see.) diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index e02780545f12..1ae2a73ad85f 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -564,12 +564,8 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) return x; } -#define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) - void bch_bio_map(struct bio *bio, void *base); -int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp); - static inline sector_t bdev_sectors(struct block_device *bdev) { return bdev->bd_inode->i_size >> 9; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d81ee5ccc726..22cbff551628 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -285,9 +285,10 @@ static void write_dirty_finish(struct closure *cl) struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; - struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); + struct bio_vec *bv; + int i; - while (bv-- != io->bio.bi_io_vec) + bio_for_each_segment_all(bv, &io->bio, i) __free_page(bv->bv_page); /* This is kind of a dumb way of signalling errors. */ @@ -418,7 +419,7 @@ static void read_dirty(struct closure *cl) io->bio.bi_rw = READ; io->bio.bi_end_io = read_dirty_endio; - if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) + if (bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; trace_bcache_writeback(&w->key); -- cgit v1.2.3