diff options
Diffstat (limited to 'drivers/md/bcache/request.c')
-rw-r--r-- | drivers/md/bcache/request.c | 204 |
1 files changed, 101 insertions, 103 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fbcc851ed5a5..5d5d031cf381 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) static void bcachecg_destroy(struct cgroup *cgroup) { struct bch_cgroup *cg = cgroup_to_bcache(cgroup); - free_css_id(&bcache_subsys, &cg->css); kfree(cg); } @@ -198,14 +197,14 @@ static bool verify(struct cached_dev *dc, struct bio *bio) static void bio_csum(struct bio *bio, struct bkey *k) { - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; uint64_t csum = 0; - int i; - bio_for_each_segment(bv, bio, i) { - void *d = kmap(bv->bv_page) + bv->bv_offset; - csum = bch_crc64_update(csum, d, bv->bv_len); - kunmap(bv->bv_page); + bio_for_each_segment(bv, bio, iter) { + void *d = kmap(bv.bv_page) + bv.bv_offset; + csum = bch_crc64_update(csum, d, bv.bv_len); + kunmap(bv.bv_page); } k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); @@ -255,26 +254,44 @@ static void bch_data_insert_keys(struct closure *cl) closure_return(cl); } +static int bch_keylist_realloc(struct keylist *l, unsigned u64s, + struct cache_set *c) +{ + size_t oldsize = bch_keylist_nkeys(l); + size_t newsize = oldsize + u64s; + + /* + * The journalling code doesn't handle the case where the keys to insert + * is bigger than an empty write: If we just return -ENOMEM here, + * bio_insert() and bio_invalidate() will insert the keys created so far + * and finish the rest when the keylist is empty. + */ + if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset)) + return -ENOMEM; + + return __bch_keylist_realloc(l, u64s); +} + static void bch_data_invalidate(struct closure *cl) { struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio; pr_debug("invalidating %i sectors from %llu", - bio_sectors(bio), (uint64_t) bio->bi_sector); + bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector); while (bio_sectors(bio)) { unsigned sectors = min(bio_sectors(bio), 1U << (KEY_SIZE_BITS - 1)); - if (bch_keylist_realloc(&op->insert_keys, 0, op->c)) + if (bch_keylist_realloc(&op->insert_keys, 2, op->c)) goto out; - bio->bi_sector += sectors; - bio->bi_size -= sectors << 9; + bio->bi_iter.bi_sector += sectors; + bio->bi_iter.bi_size -= sectors << 9; bch_keylist_add(&op->insert_keys, - &KEY(op->inode, bio->bi_sector, sectors)); + &KEY(op->inode, bio->bi_iter.bi_sector, sectors)); } op->insert_data_done = true; @@ -336,14 +353,14 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (op->bypass) - return bch_data_invalidate(cl); - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { set_gc_sectors(op->c); wake_up_gc(op->c); } + if (op->bypass) + return bch_data_invalidate(cl); + /* * Journal writes are marked REQ_FLUSH; if the original write was a * flush, it'll wait on the journal write. @@ -357,21 +374,21 @@ static void bch_data_insert_start(struct closure *cl) /* 1 for the device pointer and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, - 1 + (op->csum ? 1 : 0), + 3 + (op->csum ? 1 : 0), op->c)) continue_at(cl, bch_data_insert_keys, bcache_wq); k = op->insert_keys.top; bkey_init(k); SET_KEY_INODE(k, op->inode); - SET_KEY_OFFSET(k, bio->bi_sector); + SET_KEY_OFFSET(k, bio->bi_iter.bi_sector); if (!bch_alloc_sectors(op->c, k, bio_sectors(bio), op->write_point, op->write_prio, op->writeback)) goto err; - n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); + n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split); n->bi_end_io = bch_data_insert_endio; n->bi_private = cl; @@ -522,7 +539,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) (bio->bi_rw & REQ_WRITE))) goto skip; - if (bio->bi_sector & (c->sb.block_size - 1) || + if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { pr_debug("skipping unaligned io"); goto skip; @@ -546,8 +563,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) spin_lock(&dc->io_lock); - hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) - if (i->last == bio->bi_sector && + hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) + if (i->last == bio->bi_iter.bi_sector && time_before(jiffies, i->jiffies)) goto found; @@ -556,8 +573,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) add_sequential(task); i->sequential = 0; found: - if (i->sequential + bio->bi_size > i->sequential) - i->sequential += bio->bi_size; + if (i->sequential + bio->bi_iter.bi_size > i->sequential) + i->sequential += bio->bi_iter.bi_size; i->last = bio_end_sector(bio); i->jiffies = jiffies + msecs_to_jiffies(5000); @@ -597,16 +614,13 @@ struct search { /* Stack frame for bio_complete */ struct closure cl; - struct bcache_device *d; - struct bbio bio; struct bio *orig_bio; struct bio *cache_miss; + struct bcache_device *d; unsigned insert_bio_sectors; - unsigned recoverable:1; - unsigned unaligned_bvec:1; unsigned write:1; unsigned read_dirty_data:1; @@ -631,7 +645,8 @@ static void bch_cache_read_endio(struct bio *bio, int error) if (error) s->iop.error = error; - else if (ptr_stale(s->iop.c, &b->key, 0)) { + else if (!KEY_DIRTY(&b->key) && + ptr_stale(s->iop.c, &b->key, 0)) { atomic_long_inc(&s->iop.c->cache_read_races); s->iop.error = -EINTR; } @@ -650,15 +665,15 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) struct bkey *bio_key; unsigned ptr; - if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_sector, 0)) <= 0) + if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0) return MAP_CONTINUE; if (KEY_INODE(k) != s->iop.inode || - KEY_START(k) > bio->bi_sector) { + KEY_START(k) > bio->bi_iter.bi_sector) { unsigned bio_sectors = bio_sectors(bio); unsigned sectors = KEY_INODE(k) == s->iop.inode ? min_t(uint64_t, INT_MAX, - KEY_START(k) - bio->bi_sector) + KEY_START(k) - bio->bi_iter.bi_sector) : INT_MAX; int ret = s->d->cache_miss(b, s, bio, sectors); @@ -680,14 +695,14 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) if (KEY_DIRTY(k)) s->read_dirty_data = true; - n = bch_bio_split(bio, min_t(uint64_t, INT_MAX, - KEY_OFFSET(k) - bio->bi_sector), - GFP_NOIO, s->d->bio_split); + n = bio_next_split(bio, min_t(uint64_t, INT_MAX, + KEY_OFFSET(k) - bio->bi_iter.bi_sector), + GFP_NOIO, s->d->bio_split); bio_key = &container_of(n, struct bbio, bio)->key; bch_bkey_copy_single_ptr(bio_key, k, ptr); - bch_cut_front(&KEY(s->iop.inode, n->bi_sector, 0), bio_key); + bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key); bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key); n->bi_end_io = bch_cache_read_endio; @@ -712,10 +727,13 @@ static void cache_lookup(struct closure *cl) { struct search *s = container_of(cl, struct search, iop.cl); struct bio *bio = &s->bio.bio; + int ret; + + bch_btree_op_init(&s->op, -1); - int ret = bch_btree_map_keys(&s->op, s->iop.c, - &KEY(s->iop.inode, bio->bi_sector, 0), - cache_lookup_fn, MAP_END_KEY); + ret = bch_btree_map_keys(&s->op, s->iop.c, + &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0), + cache_lookup_fn, MAP_END_KEY); if (ret == -EAGAIN) continue_at(cl, cache_lookup, bcache_wq); @@ -756,13 +774,15 @@ static void bio_complete(struct search *s) } } -static void do_bio_hook(struct search *s) +static void do_bio_hook(struct search *s, struct bio *orig_bio) { struct bio *bio = &s->bio.bio; - memcpy(bio, s->orig_bio, sizeof(struct bio)); + bio_init(bio); + __bio_clone_fast(bio, orig_bio); bio->bi_end_io = request_endio; bio->bi_private = &s->cl; + atomic_set(&bio->bi_cnt, 3); } @@ -774,43 +794,36 @@ static void search_free(struct closure *cl) if (s->iop.bio) bio_put(s->iop.bio); - if (s->unaligned_bvec) - mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); - closure_debug_destroy(cl); mempool_free(s, s->d->c->search); } -static struct search *search_alloc(struct bio *bio, struct bcache_device *d) +static inline struct search *search_alloc(struct bio *bio, + struct bcache_device *d) { struct search *s; - struct bio_vec *bv; s = mempool_alloc(d->c->search, GFP_NOIO); - memset(s, 0, offsetof(struct search, iop.insert_keys)); - __closure_init(&s->cl, NULL); + closure_init(&s->cl, NULL); + do_bio_hook(s, bio); - s->iop.inode = d->id; - s->iop.c = d->c; - s->d = d; - s->op.lock = -1; - s->iop.write_point = hash_long((unsigned long) current, 16); s->orig_bio = bio; - s->write = (bio->bi_rw & REQ_WRITE) != 0; - s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; + s->cache_miss = NULL; + s->d = d; s->recoverable = 1; + s->write = (bio->bi_rw & REQ_WRITE) != 0; + s->read_dirty_data = 0; s->start_time = jiffies; - do_bio_hook(s); - if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) { - bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO); - memcpy(bv, bio_iovec(bio), - sizeof(struct bio_vec) * bio_segments(bio)); - - s->bio.bio.bi_io_vec = bv; - s->unaligned_bvec = 1; - } + s->iop.c = d->c; + s->iop.bio = NULL; + s->iop.inode = d->id; + s->iop.write_point = hash_long((unsigned long) current, 16); + s->iop.write_prio = 0; + s->iop.error = 0; + s->iop.flags = 0; + s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; return s; } @@ -850,26 +863,13 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; - struct bio_vec *bv; - int i; if (s->recoverable) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); s->iop.error = 0; - bv = s->bio.bio.bi_io_vec; - do_bio_hook(s); - s->bio.bio.bi_io_vec = bv; - - if (!s->unaligned_bvec) - bio_for_each_segment(bv, s->orig_bio, i) - bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; - else - memcpy(s->bio.bio.bi_io_vec, - bio_iovec(s->orig_bio), - sizeof(struct bio_vec) * - bio_segments(s->orig_bio)); + do_bio_hook(s, s->orig_bio); /* XXX: invalidate cache */ @@ -894,9 +894,9 @@ static void cached_dev_read_done(struct closure *cl) if (s->iop.bio) { bio_reset(s->iop.bio); - s->iop.bio->bi_sector = s->cache_miss->bi_sector; + s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector; s->iop.bio->bi_bdev = s->cache_miss->bi_bdev; - s->iop.bio->bi_size = s->insert_bio_sectors << 9; + s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; bch_bio_map(s->iop.bio, NULL); bio_copy_data(s->cache_miss, s->iop.bio); @@ -905,8 +905,7 @@ static void cached_dev_read_done(struct closure *cl) s->cache_miss = NULL; } - if (verify(dc, &s->bio.bio) && s->recoverable && - !s->unaligned_bvec && !s->read_dirty_data) + if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data) bch_data_verify(dc, s->orig_bio); bio_complete(s); @@ -946,7 +945,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct bio *miss, *cache_bio; if (s->cache_miss || s->iop.bypass) { - miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); ret = miss == bio ? MAP_DONE : MAP_CONTINUE; goto out_submit; } @@ -960,7 +959,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); s->iop.replace_key = KEY(s->iop.inode, - bio->bi_sector + s->insert_bio_sectors, + bio->bi_iter.bi_sector + s->insert_bio_sectors, s->insert_bio_sectors); ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key); @@ -969,7 +968,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->iop.replace = true; - miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); /* btree_search_recurse()'s btree iterator is no good anymore */ ret = miss == bio ? MAP_DONE : -EINTR; @@ -980,9 +979,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, if (!cache_bio) goto out_submit; - cache_bio->bi_sector = miss->bi_sector; - cache_bio->bi_bdev = miss->bi_bdev; - cache_bio->bi_size = s->insert_bio_sectors << 9; + cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector; + cache_bio->bi_bdev = miss->bi_bdev; + cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; cache_bio->bi_end_io = request_endio; cache_bio->bi_private = &s->cl; @@ -1032,7 +1031,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; struct bio *bio = &s->bio.bio; - struct bkey start = KEY(dc->disk.id, bio->bi_sector, 0); + struct bkey start = KEY(dc->disk.id, bio->bi_iter.bi_sector, 0); struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0); bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end); @@ -1088,8 +1087,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) closure_bio_submit(flush, cl, s->d); } } else { - s->iop.bio = bio_clone_bioset(bio, GFP_NOIO, - dc->disk.bio_split); + s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); closure_bio_submit(bio, cl, s->d); } @@ -1127,13 +1125,13 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) part_stat_unlock(); bio->bi_bdev = dc->bdev; - bio->bi_sector += dc->sb.data_offset; + bio->bi_iter.bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { s = search_alloc(bio, d); trace_bcache_request_start(s->d, bio); - if (!bio->bi_size) { + if (!bio->bi_iter.bi_size) { /* * can't call bch_journal_meta from under * generic_make_request @@ -1205,24 +1203,24 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { - struct bio_vec *bv; - int i; + struct bio_vec bv; + struct bvec_iter iter; /* Zero fill bio */ - bio_for_each_segment(bv, bio, i) { - unsigned j = min(bv->bv_len >> 9, sectors); + bio_for_each_segment(bv, bio, iter) { + unsigned j = min(bv.bv_len >> 9, sectors); - void *p = kmap(bv->bv_page); - memset(p + bv->bv_offset, 0, j << 9); - kunmap(bv->bv_page); + void *p = kmap(bv.bv_page); + memset(p + bv.bv_offset, 0, j << 9); + kunmap(bv.bv_page); sectors -= j; } - bio_advance(bio, min(sectors << 9, bio->bi_size)); + bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size)); - if (!bio->bi_size) + if (!bio->bi_iter.bi_size) return MAP_DONE; return MAP_CONTINUE; @@ -1256,7 +1254,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) trace_bcache_request_start(s->d, bio); - if (!bio->bi_size) { + if (!bio->bi_iter.bi_size) { /* * can't call bch_journal_meta from under * generic_make_request @@ -1266,7 +1264,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) bcache_wq); } else if (rw) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, - &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio->bi_iter.bi_sector, 0), &KEY(d->id, bio_end_sector(bio), 0)); s->iop.bypass = (bio->bi_rw & REQ_DISCARD) != 0; |