summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/request.c')
-rw-r--r--drivers/md/bcache/request.c204
1 files changed, 101 insertions, 103 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index fbcc851ed5a5..5d5d031cf381 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup)
static void bcachecg_destroy(struct cgroup *cgroup)
{
struct bch_cgroup *cg = cgroup_to_bcache(cgroup);
- free_css_id(&bcache_subsys, &cg->css);
kfree(cg);
}
@@ -198,14 +197,14 @@ static bool verify(struct cached_dev *dc, struct bio *bio)
static void bio_csum(struct bio *bio, struct bkey *k)
{
- struct bio_vec *bv;
+ struct bio_vec bv;
+ struct bvec_iter iter;
uint64_t csum = 0;
- int i;
- bio_for_each_segment(bv, bio, i) {
- void *d = kmap(bv->bv_page) + bv->bv_offset;
- csum = bch_crc64_update(csum, d, bv->bv_len);
- kunmap(bv->bv_page);
+ bio_for_each_segment(bv, bio, iter) {
+ void *d = kmap(bv.bv_page) + bv.bv_offset;
+ csum = bch_crc64_update(csum, d, bv.bv_len);
+ kunmap(bv.bv_page);
}
k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
@@ -255,26 +254,44 @@ static void bch_data_insert_keys(struct closure *cl)
closure_return(cl);
}
+static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
+ struct cache_set *c)
+{
+ size_t oldsize = bch_keylist_nkeys(l);
+ size_t newsize = oldsize + u64s;
+
+ /*
+ * The journalling code doesn't handle the case where the keys to insert
+ * is bigger than an empty write: If we just return -ENOMEM here,
+ * bio_insert() and bio_invalidate() will insert the keys created so far
+ * and finish the rest when the keylist is empty.
+ */
+ if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
+ return -ENOMEM;
+
+ return __bch_keylist_realloc(l, u64s);
+}
+
static void bch_data_invalidate(struct closure *cl)
{
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio;
pr_debug("invalidating %i sectors from %llu",
- bio_sectors(bio), (uint64_t) bio->bi_sector);
+ bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
while (bio_sectors(bio)) {
unsigned sectors = min(bio_sectors(bio),
1U << (KEY_SIZE_BITS - 1));
- if (bch_keylist_realloc(&op->insert_keys, 0, op->c))
+ if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
goto out;
- bio->bi_sector += sectors;
- bio->bi_size -= sectors << 9;
+ bio->bi_iter.bi_sector += sectors;
+ bio->bi_iter.bi_size -= sectors << 9;
bch_keylist_add(&op->insert_keys,
- &KEY(op->inode, bio->bi_sector, sectors));
+ &KEY(op->inode, bio->bi_iter.bi_sector, sectors));
}
op->insert_data_done = true;
@@ -336,14 +353,14 @@ static void bch_data_insert_start(struct closure *cl)
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio, *n;
- if (op->bypass)
- return bch_data_invalidate(cl);
-
if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
set_gc_sectors(op->c);
wake_up_gc(op->c);
}
+ if (op->bypass)
+ return bch_data_invalidate(cl);
+
/*
* Journal writes are marked REQ_FLUSH; if the original write was a
* flush, it'll wait on the journal write.
@@ -357,21 +374,21 @@ static void bch_data_insert_start(struct closure *cl)
/* 1 for the device pointer and 1 for the chksum */
if (bch_keylist_realloc(&op->insert_keys,
- 1 + (op->csum ? 1 : 0),
+ 3 + (op->csum ? 1 : 0),
op->c))
continue_at(cl, bch_data_insert_keys, bcache_wq);
k = op->insert_keys.top;
bkey_init(k);
SET_KEY_INODE(k, op->inode);
- SET_KEY_OFFSET(k, bio->bi_sector);
+ SET_KEY_OFFSET(k, bio->bi_iter.bi_sector);
if (!bch_alloc_sectors(op->c, k, bio_sectors(bio),
op->write_point, op->write_prio,
op->writeback))
goto err;
- n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
+ n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
n->bi_end_io = bch_data_insert_endio;
n->bi_private = cl;
@@ -522,7 +539,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
(bio->bi_rw & REQ_WRITE)))
goto skip;
- if (bio->bi_sector & (c->sb.block_size - 1) ||
+ if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
pr_debug("skipping unaligned io");
goto skip;
@@ -546,8 +563,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
spin_lock(&dc->io_lock);
- hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash)
- if (i->last == bio->bi_sector &&
+ hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
+ if (i->last == bio->bi_iter.bi_sector &&
time_before(jiffies, i->jiffies))
goto found;
@@ -556,8 +573,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
add_sequential(task);
i->sequential = 0;
found:
- if (i->sequential + bio->bi_size > i->sequential)
- i->sequential += bio->bi_size;
+ if (i->sequential + bio->bi_iter.bi_size > i->sequential)
+ i->sequential += bio->bi_iter.bi_size;
i->last = bio_end_sector(bio);
i->jiffies = jiffies + msecs_to_jiffies(5000);
@@ -597,16 +614,13 @@ struct search {
/* Stack frame for bio_complete */
struct closure cl;
- struct bcache_device *d;
-
struct bbio bio;
struct bio *orig_bio;
struct bio *cache_miss;
+ struct bcache_device *d;
unsigned insert_bio_sectors;
-
unsigned recoverable:1;
- unsigned unaligned_bvec:1;
unsigned write:1;
unsigned read_dirty_data:1;
@@ -631,7 +645,8 @@ static void bch_cache_read_endio(struct bio *bio, int error)
if (error)
s->iop.error = error;
- else if (ptr_stale(s->iop.c, &b->key, 0)) {
+ else if (!KEY_DIRTY(&b->key) &&
+ ptr_stale(s->iop.c, &b->key, 0)) {
atomic_long_inc(&s->iop.c->cache_read_races);
s->iop.error = -EINTR;
}
@@ -650,15 +665,15 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
struct bkey *bio_key;
unsigned ptr;
- if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_sector, 0)) <= 0)
+ if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
return MAP_CONTINUE;
if (KEY_INODE(k) != s->iop.inode ||
- KEY_START(k) > bio->bi_sector) {
+ KEY_START(k) > bio->bi_iter.bi_sector) {
unsigned bio_sectors = bio_sectors(bio);
unsigned sectors = KEY_INODE(k) == s->iop.inode
? min_t(uint64_t, INT_MAX,
- KEY_START(k) - bio->bi_sector)
+ KEY_START(k) - bio->bi_iter.bi_sector)
: INT_MAX;
int ret = s->d->cache_miss(b, s, bio, sectors);
@@ -680,14 +695,14 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
if (KEY_DIRTY(k))
s->read_dirty_data = true;
- n = bch_bio_split(bio, min_t(uint64_t, INT_MAX,
- KEY_OFFSET(k) - bio->bi_sector),
- GFP_NOIO, s->d->bio_split);
+ n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
+ KEY_OFFSET(k) - bio->bi_iter.bi_sector),
+ GFP_NOIO, s->d->bio_split);
bio_key = &container_of(n, struct bbio, bio)->key;
bch_bkey_copy_single_ptr(bio_key, k, ptr);
- bch_cut_front(&KEY(s->iop.inode, n->bi_sector, 0), bio_key);
+ bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key);
bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key);
n->bi_end_io = bch_cache_read_endio;
@@ -712,10 +727,13 @@ static void cache_lookup(struct closure *cl)
{
struct search *s = container_of(cl, struct search, iop.cl);
struct bio *bio = &s->bio.bio;
+ int ret;
+
+ bch_btree_op_init(&s->op, -1);
- int ret = bch_btree_map_keys(&s->op, s->iop.c,
- &KEY(s->iop.inode, bio->bi_sector, 0),
- cache_lookup_fn, MAP_END_KEY);
+ ret = bch_btree_map_keys(&s->op, s->iop.c,
+ &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0),
+ cache_lookup_fn, MAP_END_KEY);
if (ret == -EAGAIN)
continue_at(cl, cache_lookup, bcache_wq);
@@ -756,13 +774,15 @@ static void bio_complete(struct search *s)
}
}
-static void do_bio_hook(struct search *s)
+static void do_bio_hook(struct search *s, struct bio *orig_bio)
{
struct bio *bio = &s->bio.bio;
- memcpy(bio, s->orig_bio, sizeof(struct bio));
+ bio_init(bio);
+ __bio_clone_fast(bio, orig_bio);
bio->bi_end_io = request_endio;
bio->bi_private = &s->cl;
+
atomic_set(&bio->bi_cnt, 3);
}
@@ -774,43 +794,36 @@ static void search_free(struct closure *cl)
if (s->iop.bio)
bio_put(s->iop.bio);
- if (s->unaligned_bvec)
- mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec);
-
closure_debug_destroy(cl);
mempool_free(s, s->d->c->search);
}
-static struct search *search_alloc(struct bio *bio, struct bcache_device *d)
+static inline struct search *search_alloc(struct bio *bio,
+ struct bcache_device *d)
{
struct search *s;
- struct bio_vec *bv;
s = mempool_alloc(d->c->search, GFP_NOIO);
- memset(s, 0, offsetof(struct search, iop.insert_keys));
- __closure_init(&s->cl, NULL);
+ closure_init(&s->cl, NULL);
+ do_bio_hook(s, bio);
- s->iop.inode = d->id;
- s->iop.c = d->c;
- s->d = d;
- s->op.lock = -1;
- s->iop.write_point = hash_long((unsigned long) current, 16);
s->orig_bio = bio;
- s->write = (bio->bi_rw & REQ_WRITE) != 0;
- s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
+ s->cache_miss = NULL;
+ s->d = d;
s->recoverable = 1;
+ s->write = (bio->bi_rw & REQ_WRITE) != 0;
+ s->read_dirty_data = 0;
s->start_time = jiffies;
- do_bio_hook(s);
- if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) {
- bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO);
- memcpy(bv, bio_iovec(bio),
- sizeof(struct bio_vec) * bio_segments(bio));
-
- s->bio.bio.bi_io_vec = bv;
- s->unaligned_bvec = 1;
- }
+ s->iop.c = d->c;
+ s->iop.bio = NULL;
+ s->iop.inode = d->id;
+ s->iop.write_point = hash_long((unsigned long) current, 16);
+ s->iop.write_prio = 0;
+ s->iop.error = 0;
+ s->iop.flags = 0;
+ s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
return s;
}
@@ -850,26 +863,13 @@ static void cached_dev_read_error(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
struct bio *bio = &s->bio.bio;
- struct bio_vec *bv;
- int i;
if (s->recoverable) {
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
s->iop.error = 0;
- bv = s->bio.bio.bi_io_vec;
- do_bio_hook(s);
- s->bio.bio.bi_io_vec = bv;
-
- if (!s->unaligned_bvec)
- bio_for_each_segment(bv, s->orig_bio, i)
- bv->bv_offset = 0, bv->bv_len = PAGE_SIZE;
- else
- memcpy(s->bio.bio.bi_io_vec,
- bio_iovec(s->orig_bio),
- sizeof(struct bio_vec) *
- bio_segments(s->orig_bio));
+ do_bio_hook(s, s->orig_bio);
/* XXX: invalidate cache */
@@ -894,9 +894,9 @@ static void cached_dev_read_done(struct closure *cl)
if (s->iop.bio) {
bio_reset(s->iop.bio);
- s->iop.bio->bi_sector = s->cache_miss->bi_sector;
+ s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector;
s->iop.bio->bi_bdev = s->cache_miss->bi_bdev;
- s->iop.bio->bi_size = s->insert_bio_sectors << 9;
+ s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
bch_bio_map(s->iop.bio, NULL);
bio_copy_data(s->cache_miss, s->iop.bio);
@@ -905,8 +905,7 @@ static void cached_dev_read_done(struct closure *cl)
s->cache_miss = NULL;
}
- if (verify(dc, &s->bio.bio) && s->recoverable &&
- !s->unaligned_bvec && !s->read_dirty_data)
+ if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data)
bch_data_verify(dc, s->orig_bio);
bio_complete(s);
@@ -946,7 +945,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
struct bio *miss, *cache_bio;
if (s->cache_miss || s->iop.bypass) {
- miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+ miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
goto out_submit;
}
@@ -960,7 +959,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
s->iop.replace_key = KEY(s->iop.inode,
- bio->bi_sector + s->insert_bio_sectors,
+ bio->bi_iter.bi_sector + s->insert_bio_sectors,
s->insert_bio_sectors);
ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);
@@ -969,7 +968,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->iop.replace = true;
- miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+ miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
/* btree_search_recurse()'s btree iterator is no good anymore */
ret = miss == bio ? MAP_DONE : -EINTR;
@@ -980,9 +979,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
if (!cache_bio)
goto out_submit;
- cache_bio->bi_sector = miss->bi_sector;
- cache_bio->bi_bdev = miss->bi_bdev;
- cache_bio->bi_size = s->insert_bio_sectors << 9;
+ cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
+ cache_bio->bi_bdev = miss->bi_bdev;
+ cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
cache_bio->bi_end_io = request_endio;
cache_bio->bi_private = &s->cl;
@@ -1032,7 +1031,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
{
struct closure *cl = &s->cl;
struct bio *bio = &s->bio.bio;
- struct bkey start = KEY(dc->disk.id, bio->bi_sector, 0);
+ struct bkey start = KEY(dc->disk.id, bio->bi_iter.bi_sector, 0);
struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0);
bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end);
@@ -1088,8 +1087,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
closure_bio_submit(flush, cl, s->d);
}
} else {
- s->iop.bio = bio_clone_bioset(bio, GFP_NOIO,
- dc->disk.bio_split);
+ s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
closure_bio_submit(bio, cl, s->d);
}
@@ -1127,13 +1125,13 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
part_stat_unlock();
bio->bi_bdev = dc->bdev;
- bio->bi_sector += dc->sb.data_offset;
+ bio->bi_iter.bi_sector += dc->sb.data_offset;
if (cached_dev_get(dc)) {
s = search_alloc(bio, d);
trace_bcache_request_start(s->d, bio);
- if (!bio->bi_size) {
+ if (!bio->bi_iter.bi_size) {
/*
* can't call bch_journal_meta from under
* generic_make_request
@@ -1205,24 +1203,24 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
static int flash_dev_cache_miss(struct btree *b, struct search *s,
struct bio *bio, unsigned sectors)
{
- struct bio_vec *bv;
- int i;
+ struct bio_vec bv;
+ struct bvec_iter iter;
/* Zero fill bio */
- bio_for_each_segment(bv, bio, i) {
- unsigned j = min(bv->bv_len >> 9, sectors);
+ bio_for_each_segment(bv, bio, iter) {
+ unsigned j = min(bv.bv_len >> 9, sectors);
- void *p = kmap(bv->bv_page);
- memset(p + bv->bv_offset, 0, j << 9);
- kunmap(bv->bv_page);
+ void *p = kmap(bv.bv_page);
+ memset(p + bv.bv_offset, 0, j << 9);
+ kunmap(bv.bv_page);
sectors -= j;
}
- bio_advance(bio, min(sectors << 9, bio->bi_size));
+ bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size));
- if (!bio->bi_size)
+ if (!bio->bi_iter.bi_size)
return MAP_DONE;
return MAP_CONTINUE;
@@ -1256,7 +1254,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
trace_bcache_request_start(s->d, bio);
- if (!bio->bi_size) {
+ if (!bio->bi_iter.bi_size) {
/*
* can't call bch_journal_meta from under
* generic_make_request
@@ -1266,7 +1264,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
bcache_wq);
} else if (rw) {
bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
- &KEY(d->id, bio->bi_sector, 0),
+ &KEY(d->id, bio->bi_iter.bi_sector, 0),
&KEY(d->id, bio_end_sector(bio), 0));
s->iop.bypass = (bio->bi_rw & REQ_DISCARD) != 0;