summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/request.c12
-rw-r--r--drivers/md/bcache/super.c8
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-cache-target.c15
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-crypt.c12
-rw-r--r--drivers/md/dm-era-target.c2
-rw-r--r--drivers/md/dm-mpath.c136
-rw-r--r--drivers/md/dm-rq.c272
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-target.c7
-rw-r--r--drivers/md/dm-thin.c15
-rw-r--r--drivers/md/dm.c49
-rw-r--r--drivers/md/dm.h3
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/md.c11
-rw-r--r--drivers/md/multipath.c2
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c4
-rw-r--r--drivers/md/raid0.c6
-rw-r--r--drivers/md/raid1.c11
-rw-r--r--drivers/md/raid10.c10
-rw-r--r--drivers/md/raid5-cache.c106
-rw-r--r--drivers/md/raid5.c133
-rw-r--r--drivers/md/raid5.h7
25 files changed, 351 insertions, 479 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 76d20875503c..709c9cc34369 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.write_prio = 0;
s->iop.error = 0;
s->iop.flags = 0;
- s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0;
+ s->iop.flush_journal = op_is_flush(bio->bi_opf);
s->iop.wq = bcache_wq;
return s;
@@ -1009,7 +1009,7 @@ static int cached_dev_congested(void *data, int bits)
struct request_queue *q = bdev_get_queue(dc->bdev);
int ret = 0;
- if (bdi_congested(&q->backing_dev_info, bits))
+ if (bdi_congested(q->backing_dev_info, bits))
return 1;
if (cached_dev_get(dc)) {
@@ -1018,7 +1018,7 @@ static int cached_dev_congested(void *data, int bits)
for_each_cache(ca, d->c, i) {
q = bdev_get_queue(ca->bdev);
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
}
cached_dev_put(dc);
@@ -1032,7 +1032,7 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
struct gendisk *g = dc->disk.disk;
g->queue->make_request_fn = cached_dev_make_request;
- g->queue->backing_dev_info.congested_fn = cached_dev_congested;
+ g->queue->backing_dev_info->congested_fn = cached_dev_congested;
dc->disk.cache_miss = cached_dev_cache_miss;
dc->disk.ioctl = cached_dev_ioctl;
}
@@ -1125,7 +1125,7 @@ static int flash_dev_congested(void *data, int bits)
for_each_cache(ca, d->c, i) {
q = bdev_get_queue(ca->bdev);
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
}
return ret;
@@ -1136,7 +1136,7 @@ void bch_flash_dev_request_init(struct bcache_device *d)
struct gendisk *g = d->disk;
g->queue->make_request_fn = flash_dev_make_request;
- g->queue->backing_dev_info.congested_fn = flash_dev_congested;
+ g->queue->backing_dev_info->congested_fn = flash_dev_congested;
d->cache_miss = flash_dev_cache_miss;
d->ioctl = flash_dev_ioctl;
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3a19cbc8b230..85e3f21c2514 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -807,7 +807,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
blk_queue_make_request(q, NULL);
d->disk->queue = q;
q->queuedata = d;
- q->backing_dev_info.congested_data = d;
+ q->backing_dev_info->congested_data = d;
q->limits.max_hw_sectors = UINT_MAX;
q->limits.max_sectors = UINT_MAX;
q->limits.max_segment_size = UINT_MAX;
@@ -1132,9 +1132,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
set_capacity(dc->disk.disk,
dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
- dc->disk.disk->queue->backing_dev_info.ra_pages =
- max(dc->disk.disk->queue->backing_dev_info.ra_pages,
- q->backing_dev_info.ra_pages);
+ dc->disk.disk->queue->backing_dev_info->ra_pages =
+ max(dc->disk.disk->queue->backing_dev_info->ra_pages,
+ q->backing_dev_info->ra_pages);
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 84d2f0e4c754..d36d427a9efb 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -794,7 +794,7 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&c->free_buffer_wait, &wait);
- set_task_state(current, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
dm_bufio_unlock(c);
io_schedule();
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 5813d2a7eefe..9c689b34e6e7 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -788,8 +788,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
spin_lock_irqsave(&cache->lock, flags);
- if (cache->need_tick_bio &&
- !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
+ if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
bio_op(bio) != REQ_OP_DISCARD) {
pb->tick = true;
cache->need_tick_bio = false;
@@ -829,11 +828,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
return to_oblock(block_nr);
}
-static int bio_triggers_commit(struct cache *cache, struct bio *bio)
-{
- return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
-}
-
/*
* You must increment the deferred set whilst the prison cell is held. To
* encourage this, we ask for 'cell' to be passed in.
@@ -885,7 +879,7 @@ static void issue(struct cache *cache, struct bio *bio)
{
unsigned long flags;
- if (!bio_triggers_commit(cache, bio)) {
+ if (!op_is_flush(bio->bi_opf)) {
accounted_request(cache, bio);
return;
}
@@ -1070,8 +1064,7 @@ static void dec_io_migrations(struct cache *cache)
static bool discard_or_flush(struct bio *bio)
{
- return bio_op(bio) == REQ_OP_DISCARD ||
- bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
+ return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
}
static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
@@ -2292,7 +2285,7 @@ static void do_waker(struct work_struct *ws)
static int is_congested(struct dm_dev *dev, int bdi_bits)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
- return bdi_congested(&q->backing_dev_info, bdi_bits);
+ return bdi_congested(q->backing_dev_info, bdi_bits);
}
static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 40ceba1fe8be..136fda3ff9e5 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -92,7 +92,6 @@ struct mapped_device {
* io objects are allocated from here.
*/
mempool_t *io_pool;
- mempool_t *rq_pool;
struct bio_set *bs;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 7c6c57216bf2..1cb2ca9dfae3 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1210,14 +1210,14 @@ continue_locked:
spin_unlock_irq(&cc->write_thread_wait.lock);
if (unlikely(kthread_should_stop())) {
- set_task_state(current, TASK_RUNNING);
+ set_current_state(TASK_RUNNING);
remove_wait_queue(&cc->write_thread_wait, &wait);
break;
}
schedule();
- set_task_state(current, TASK_RUNNING);
+ set_current_state(TASK_RUNNING);
spin_lock_irq(&cc->write_thread_wait.lock);
__remove_wait_queue(&cc->write_thread_wait, &wait);
goto continue_locked;
@@ -1534,18 +1534,18 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
return PTR_ERR(key);
}
- rcu_read_lock();
+ down_read(&key->sem);
ukp = user_key_payload(key);
if (!ukp) {
- rcu_read_unlock();
+ up_read(&key->sem);
key_put(key);
kzfree(new_key_string);
return -EKEYREVOKED;
}
if (cc->key_size != ukp->datalen) {
- rcu_read_unlock();
+ up_read(&key->sem);
key_put(key);
kzfree(new_key_string);
return -EINVAL;
@@ -1553,7 +1553,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
memcpy(cc->key, ukp->data, cc->key_size);
- rcu_read_unlock();
+ up_read(&key->sem);
key_put(key);
/* clear the flag since following operations may invalidate previously valid key */
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index bf2b2676cb8a..9fab33b113c4 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1379,7 +1379,7 @@ static void stop_worker(struct era *era)
static int dev_is_congested(struct dm_dev *dev, int bdi_bits)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
- return bdi_congested(&q->backing_dev_info, bdi_bits);
+ return bdi_congested(q->backing_dev_info, bdi_bits);
}
static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6400cffb986d..7f223dbed49f 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -92,12 +92,6 @@ struct multipath {
unsigned queue_mode;
- /*
- * We must use a mempool of dm_mpath_io structs so that we
- * can resubmit bios on error.
- */
- mempool_t *mpio_pool;
-
struct mutex work_mutex;
struct work_struct trigger_event;
@@ -115,8 +109,6 @@ struct dm_mpath_io {
typedef int (*action_fn) (struct pgpath *pgpath);
-static struct kmem_cache *_mpio_cache;
-
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
static void trigger_event(struct work_struct *work);
static void activate_path(struct work_struct *work);
@@ -209,7 +201,6 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
- m->mpio_pool = NULL;
m->queue_mode = DM_TYPE_NONE;
m->ti = ti;
@@ -229,16 +220,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
- }
-
- if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
- unsigned min_ios = dm_get_reserved_rq_based_ios();
-
- m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
- if (!m->mpio_pool)
- return -ENOMEM;
- }
- else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
/*
* bio-based doesn't support any direct scsi_dh management;
@@ -263,7 +245,6 @@ static void free_multipath(struct multipath *m)
kfree(m->hw_handler_name);
kfree(m->hw_handler_params);
- mempool_destroy(m->mpio_pool);
kfree(m);
}
@@ -272,38 +253,6 @@ static struct dm_mpath_io *get_mpio(union map_info *info)
return info->ptr;
}
-static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
-{
- struct dm_mpath_io *mpio;
-
- if (!m->mpio_pool) {
- /* Use blk-mq pdu memory requested via per_io_data_size */
- mpio = get_mpio(info);
- memset(mpio, 0, sizeof(*mpio));
- return mpio;
- }
-
- mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
- if (!mpio)
- return NULL;
-
- memset(mpio, 0, sizeof(*mpio));
- info->ptr = mpio;
-
- return mpio;
-}
-
-static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
-{
- /* Only needed for non blk-mq (.request_fn) multipath */
- if (m->mpio_pool) {
- struct dm_mpath_io *mpio = info->ptr;
-
- info->ptr = NULL;
- mempool_free(mpio, m->mpio_pool);
- }
-}
-
static size_t multipath_per_bio_data_size(void)
{
return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
@@ -427,7 +376,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned long flags;
struct priority_group *pg;
struct pgpath *pgpath;
- bool bypassed = true;
+ unsigned bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
clear_bit(MPATHF_QUEUE_IO, &m->flags);
@@ -466,7 +415,7 @@ check_current_pg:
*/
do {
list_for_each_entry(pg, &m->priority_groups, list) {
- if (pg->bypassed == bypassed)
+ if (pg->bypassed == !!bypassed)
continue;
pgpath = choose_path_in_pg(m, pg, nr_bytes);
if (!IS_ERR_OR_NULL(pgpath)) {
@@ -530,16 +479,17 @@ static bool must_push_back_bio(struct multipath *m)
/*
* Map cloned requests (request-based multipath)
*/
-static int __multipath_map(struct dm_target *ti, struct request *clone,
- union map_info *map_context,
- struct request *rq, struct request **__clone)
+static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
+ union map_info *map_context,
+ struct request **__clone)
{
struct multipath *m = ti->private;
int r = DM_MAPIO_REQUEUE;
- size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
+ size_t nr_bytes = blk_rq_bytes(rq);
struct pgpath *pgpath;
struct block_device *bdev;
- struct dm_mpath_io *mpio;
+ struct dm_mpath_io *mpio = get_mpio(map_context);
+ struct request *clone;
/* Do we need to select a new pgpath? */
pgpath = lockless_dereference(m->current_pgpath);
@@ -556,42 +506,23 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
return r;
}
- mpio = set_mpio(m, map_context);
- if (!mpio)
- /* ENOMEM, requeue */
- return r;
-
+ memset(mpio, 0, sizeof(*mpio));
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
bdev = pgpath->path.dev->bdev;
- if (clone) {
- /*
- * Old request-based interface: allocated clone is passed in.
- * Used by: .request_fn stacked on .request_fn path(s).
- */
- clone->q = bdev_get_queue(bdev);
- clone->rq_disk = bdev->bd_disk;
- clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
- } else {
- /*
- * blk-mq request-based interface; used by both:
- * .request_fn stacked on blk-mq path(s) and
- * blk-mq stacked on blk-mq path(s).
- */
- clone = blk_mq_alloc_request(bdev_get_queue(bdev),
- rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
- if (IS_ERR(clone)) {
- /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
- clear_request_fn_mpio(m, map_context);
- return r;
- }
- clone->bio = clone->biotail = NULL;
- clone->rq_disk = bdev->bd_disk;
- clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
- *__clone = clone;
+ clone = blk_get_request(bdev_get_queue(bdev),
+ rq->cmd_flags | REQ_NOMERGE,
+ GFP_ATOMIC);
+ if (IS_ERR(clone)) {
+ /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
+ return r;
}
+ clone->bio = clone->biotail = NULL;
+ clone->rq_disk = bdev->bd_disk;
+ clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+ *__clone = clone;
if (pgpath->pg->ps.type->start_io)
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
@@ -600,22 +531,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
return DM_MAPIO_REMAPPED;
}
-static int multipath_map(struct dm_target *ti, struct request *clone,
- union map_info *map_context)
-{
- return __multipath_map(ti, clone, map_context, NULL, NULL);
-}
-
-static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
- union map_info *map_context,
- struct request **clone)
-{
- return __multipath_map(ti, NULL, map_context, rq, clone);
-}
-
static void multipath_release_clone(struct request *clone)
{
- blk_mq_free_request(clone);
+ blk_put_request(clone);
}
/*
@@ -1187,7 +1105,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_write_same_bios = 1;
if (m->queue_mode == DM_TYPE_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
- else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+ else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
return 0;
@@ -1610,7 +1528,6 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
- clear_request_fn_mpio(m, map_context);
return r;
}
@@ -2060,7 +1977,6 @@ static struct target_type multipath_target = {
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map_rq = multipath_map,
.clone_and_map_rq = multipath_clone_and_map,
.release_clone_rq = multipath_release_clone,
.rq_end_io = multipath_end_io,
@@ -2080,11 +1996,6 @@ static int __init dm_multipath_init(void)
{
int r;
- /* allocate a slab for the dm_mpath_ios */
- _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
- if (!_mpio_cache)
- return -ENOMEM;
-
r = dm_register_target(&multipath_target);
if (r < 0) {
DMERR("request-based register failed %d", r);
@@ -2120,8 +2031,6 @@ bad_alloc_kmpath_handlerd:
bad_alloc_kmultipathd:
dm_unregister_target(&multipath_target);
bad_register_target:
- kmem_cache_destroy(_mpio_cache);
-
return r;
}
@@ -2131,7 +2040,6 @@ static void __exit dm_multipath_exit(void)
destroy_workqueue(kmultipathd);
dm_unregister_target(&multipath_target);
- kmem_cache_destroy(_mpio_cache);
}
module_init(dm_multipath_init);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 9d7275fb541a..67d76f21fecd 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -109,28 +109,6 @@ void dm_stop_queue(struct request_queue *q)
dm_mq_stop_queue(q);
}
-static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
- gfp_t gfp_mask)
-{
- return mempool_alloc(md->io_pool, gfp_mask);
-}
-
-static void free_old_rq_tio(struct dm_rq_target_io *tio)
-{
- mempool_free(tio, tio->md->io_pool);
-}
-
-static struct request *alloc_old_clone_request(struct mapped_device *md,
- gfp_t gfp_mask)
-{
- return mempool_alloc(md->rq_pool, gfp_mask);
-}
-
-static void free_old_clone_request(struct mapped_device *md, struct request *rq)
-{
- mempool_free(rq, md->rq_pool);
-}
-
/*
* Partial completion handling for request-based dm
*/
@@ -185,7 +163,7 @@ static void end_clone_bio(struct bio *clone)
static struct dm_rq_target_io *tio_from_request(struct request *rq)
{
- return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
+ return blk_mq_rq_to_pdu(rq);
}
static void rq_end_stats(struct mapped_device *md, struct request *orig)
@@ -233,31 +211,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
dm_put(md);
}
-static void free_rq_clone(struct request *clone)
-{
- struct dm_rq_target_io *tio = clone->end_io_data;
- struct mapped_device *md = tio->md;
-
- blk_rq_unprep_clone(clone);
-
- /*
- * It is possible for a clone_old_rq() allocated clone to
- * get passed in -- it may not yet have a request_queue.
- * This is known to occur if the error target replaces
- * a multipath target that has a request_fn queue stacked
- * on blk-mq queue(s).
- */
- if (clone->q && clone->q->mq_ops)
- /* stacked on blk-mq queue(s) */
- tio->ti->type->release_clone_rq(clone);
- else if (!md->queue->mq_ops)
- /* request_fn queue stacked on request_fn queue(s) */
- free_old_clone_request(md, clone);
-
- if (!md->queue->mq_ops)
- free_old_rq_tio(tio);
-}
-
/*
* Complete the clone and the original request.
* Must be called without clone's queue lock held,
@@ -270,20 +223,9 @@ static void dm_end_request(struct request *clone, int error)
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
- if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
- rq->errors = clone->errors;
- rq->resid_len = clone->resid_len;
-
- if (rq->sense)
- /*
- * We are using the sense buffer of the original
- * request.
- * So setting the length of the sense data is enough.
- */
- rq->sense_len = clone->sense_len;
- }
+ blk_rq_unprep_clone(clone);
+ tio->ti->type->release_clone_rq(clone);
- free_rq_clone(clone);
rq_end_stats(md, rq);
if (!rq->q->mq_ops)
blk_end_request_all(rq, error);
@@ -292,22 +234,6 @@ static void dm_end_request(struct request *clone, int error)
rq_completed(md, rw, true);
}
-static void dm_unprep_request(struct request *rq)
-{
- struct dm_rq_target_io *tio = tio_from_request(rq);
- struct request *clone = tio->clone;
-
- if (!rq->q->mq_ops) {
- rq->special = NULL;
- rq->rq_flags &= ~RQF_DONTPREP;
- }
-
- if (clone)
- free_rq_clone(clone);
- else if (!tio->md->queue->mq_ops)
- free_old_rq_tio(tio);
-}
-
/*
* Requeue the original request of a clone.
*/
@@ -346,7 +272,10 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
int rw = rq_data_dir(rq);
rq_end_stats(md, rq);
- dm_unprep_request(rq);
+ if (tio->clone) {
+ blk_rq_unprep_clone(tio->clone);
+ tio->ti->type->release_clone_rq(tio->clone);
+ }
if (!rq->q->mq_ops)
dm_old_requeue_request(rq);
@@ -401,14 +330,11 @@ static void dm_softirq_done(struct request *rq)
if (!clone) {
rq_end_stats(tio->md, rq);
rw = rq_data_dir(rq);
- if (!rq->q->mq_ops) {
+ if (!rq->q->mq_ops)
blk_end_request_all(rq, tio->error);
- rq_completed(tio->md, rw, false);
- free_old_rq_tio(tio);
- } else {
+ else
blk_mq_end_request(rq, tio->error);
- rq_completed(tio->md, rw, false);
- }
+ rq_completed(tio->md, rw, false);
return;
}
@@ -452,16 +378,6 @@ static void end_clone_request(struct request *clone, int error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
- if (!clone->q->mq_ops) {
- /*
- * For just cleaning up the information of the queue in which
- * the clone was dispatched.
- * The clone is *NOT* freed actually here because it is alloced
- * from dm own mempool (RQF_ALLOCED isn't set).
- */
- __blk_put_request(clone->q, clone);
- }
-
/*
* Actual request completion is done in a softirq context which doesn't
* hold the clone's queue lock. Otherwise, deadlock could occur because:
@@ -511,9 +427,6 @@ static int setup_clone(struct request *clone, struct request *rq,
if (r)
return r;
- clone->cmd = rq->cmd;
- clone->cmd_len = rq->cmd_len;
- clone->sense = rq->sense;
clone->end_io = end_clone_request;
clone->end_io_data = tio;
@@ -522,28 +435,6 @@ static int setup_clone(struct request *clone, struct request *rq,
return 0;
}
-static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
- struct dm_rq_target_io *tio, gfp_t gfp_mask)
-{
- /*
- * Create clone for use with .request_fn request_queue
- */
- struct request *clone;
-
- clone = alloc_old_clone_request(md, gfp_mask);
- if (!clone)
- return NULL;
-
- blk_rq_init(NULL, clone);
- if (setup_clone(clone, rq, tio, gfp_mask)) {
- /* -ENOMEM */
- free_old_clone_request(md, clone);
- return NULL;
- }
-
- return clone;
-}
-
static void map_tio_request(struct kthread_work *work);
static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
@@ -565,60 +456,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
kthread_init_work(&tio->work, map_tio_request);
}
-static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
- struct mapped_device *md,
- gfp_t gfp_mask)
-{
- struct dm_rq_target_io *tio;
- int srcu_idx;
- struct dm_table *table;
-
- tio = alloc_old_rq_tio(md, gfp_mask);
- if (!tio)
- return NULL;
-
- init_tio(tio, rq, md);
-
- table = dm_get_live_table(md, &srcu_idx);
- /*
- * Must clone a request if this .request_fn DM device
- * is stacked on .request_fn device(s).
- */
- if (!dm_table_all_blk_mq_devices(table)) {
- if (!clone_old_rq(rq, md, tio, gfp_mask)) {
- dm_put_live_table(md, srcu_idx);
- free_old_rq_tio(tio);
- return NULL;
- }
- }
- dm_put_live_table(md, srcu_idx);
-
- return tio;
-}
-
-/*
- * Called with the queue lock held.
- */
-static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
-{
- struct mapped_device *md = q->queuedata;
- struct dm_rq_target_io *tio;
-
- if (unlikely(rq->special)) {
- DMWARN("Already has something in rq->special.");
- return BLKPREP_KILL;
- }
-
- tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
- if (!tio)
- return BLKPREP_DEFER;
-
- rq->special = tio;
- rq->rq_flags |= RQF_DONTPREP;
-
- return BLKPREP_OK;
-}
-
/*
* Returns:
* DM_MAPIO_* : the request has been processed as indicated
@@ -633,31 +470,18 @@ static int map_request(struct dm_rq_target_io *tio)
struct request *rq = tio->orig;
struct request *clone = NULL;
- if (tio->clone) {
- clone = tio->clone;
- r = ti->type->map_rq(ti, clone, &tio->info);
- if (r == DM_MAPIO_DELAY_REQUEUE)
- return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
- } else {
- r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
- if (r < 0) {
- /* The target wants to complete the I/O */
- dm_kill_unmapped_request(rq, r);
- return r;
- }
- if (r == DM_MAPIO_REMAPPED &&
- setup_clone(clone, rq, tio, GFP_ATOMIC)) {
- /* -ENOMEM */
- ti->type->release_clone_rq(clone);
- return DM_MAPIO_REQUEUE;
- }
- }
-
+ r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
switch (r) {
case DM_MAPIO_SUBMITTED:
/* The target has taken the I/O to submit by itself later */
break;
case DM_MAPIO_REMAPPED:
+ if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
+ /* -ENOMEM */
+ ti->type->release_clone_rq(clone);
+ return DM_MAPIO_REQUEUE;
+ }
+
/* The target has remapped the I/O so dispatch it */
trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
@@ -716,6 +540,29 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
dm_get(md);
}
+static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
+{
+ struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+
+ /*
+ * Must initialize md member of tio, otherwise it won't
+ * be available in dm_mq_queue_rq.
+ */
+ tio->md = md;
+
+ if (md->init_tio_pdu) {
+ /* target-specific per-io data is immediately after the tio */
+ tio->info.ptr = tio + 1;
+ }
+
+ return 0;
+}
+
+static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+{
+ return __dm_rq_init_rq(q->rq_alloc_data, rq);
+}
+
static void map_tio_request(struct kthread_work *work)
{
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
@@ -779,6 +626,10 @@ static void dm_old_request_fn(struct request_queue *q)
int srcu_idx;
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+ if (unlikely(!map)) {
+ dm_put_live_table(md, srcu_idx);
+ return;
+ }
ti = dm_table_find_target(map, pos);
dm_put_live_table(md, srcu_idx);
}
@@ -810,6 +661,7 @@ static void dm_old_request_fn(struct request_queue *q)
dm_start_request(md, rq);
tio = tio_from_request(rq);
+ init_tio(tio, rq, md);
/* Establish tio->ti before queuing work (map_tio_request) */
tio->ti = ti;
kthread_queue_work(&md->kworker, &tio->work);
@@ -820,10 +672,23 @@ static void dm_old_request_fn(struct request_queue *q)
/*
* Fully initialize a .request_fn request-based queue.
*/
-int dm_old_init_request_queue(struct mapped_device *md)
+int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
{
+ struct dm_target *immutable_tgt;
+
/* Fully initialize the queue */
- if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
+ md->queue->cmd_size = sizeof(struct dm_rq_target_io);
+ md->queue->rq_alloc_data = md;
+ md->queue->request_fn = dm_old_request_fn;
+ md->queue->init_rq_fn = dm_rq_init_rq;
+
+ immutable_tgt = dm_table_get_immutable_target(t);
+ if (immutable_tgt && immutable_tgt->per_io_data_size) {
+ /* any target-specific per-io data is immediately after the tio */
+ md->queue->cmd_size += immutable_tgt->per_io_data_size;
+ md->init_tio_pdu = true;
+ }
+ if (blk_init_allocated_queue(md->queue) < 0)
return -EINVAL;
/* disable dm_old_request_fn's merge heuristic by default */
@@ -831,7 +696,6 @@ int dm_old_init_request_queue(struct mapped_device *md)
dm_init_normal_md_queue(md);
blk_queue_softirq_done(md->queue, dm_softirq_done);
- blk_queue_prep_rq(md->queue, dm_old_prep_fn);
/* Initialize the request-based DM worker thread */
kthread_init_worker(&md->kworker);
@@ -852,21 +716,7 @@ static int dm_mq_init_request(void *data, struct request *rq,
unsigned int hctx_idx, unsigned int request_idx,
unsigned int numa_node)
{
- struct mapped_device *md = data;
- struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
-
- /*
- * Must initialize md member of tio, otherwise it won't
- * be available in dm_mq_queue_rq.
- */
- tio->md = md;
-
- if (md->init_tio_pdu) {
- /* target-specific per-io data is immediately after the tio */
- tio->info.ptr = tio + 1;
- }
-
- return 0;
+ return __dm_rq_init_rq(data, rq);
}
static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index 4da06cae7bad..f0020d21b95f 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -48,7 +48,7 @@ struct dm_rq_clone_bio_info {
bool dm_use_blk_mq_default(void);
bool dm_use_blk_mq(struct mapped_device *md);
-int dm_old_init_request_queue(struct mapped_device *md);
+int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
void dm_mq_cleanup_mapped_device(struct mapped_device *md);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0a427de23ed2..3ad16d9c9d5a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1750,7 +1750,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
char b[BDEVNAME_SIZE];
if (likely(q))
- r |= bdi_congested(&q->backing_dev_info, bdi_bits);
+ r |= bdi_congested(q->backing_dev_info, bdi_bits);
else
DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
dm_device_name(t->md),
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 710ae28fd618..43d3445b121d 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -131,12 +131,6 @@ static int io_err_map(struct dm_target *tt, struct bio *bio)
return -EIO;
}
-static int io_err_map_rq(struct dm_target *ti, struct request *clone,
- union map_info *map_context)
-{
- return -EIO;
-}
-
static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
union map_info *map_context,
struct request **clone)
@@ -161,7 +155,6 @@ static struct target_type error_target = {
.ctr = io_err_ctr,
.dtr = io_err_dtr,
.map = io_err_map,
- .map_rq = io_err_map_rq,
.clone_and_map_rq = io_err_clone_and_map_rq,
.release_clone_rq = io_err_release_clone_rq,
.direct_access = io_err_direct_access,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index d1c05c12a9db..2b266a2b5035 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
{
- return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
+ return op_is_flush(bio->bi_opf) &&
dm_thin_changed_this_transaction(tc->td);
}
@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context,
struct bio *bio;
while ((bio = bio_list_pop(&cell->bios))) {
- if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
- bio_op(bio) == REQ_OP_DISCARD)
+ if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
bio_list_add(&info->defer_bios, bio);
else {
inc_all_io_entry(info->tc->pool, bio);
@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context,
struct bio *bio;
while ((bio = bio_list_pop(&cell->bios))) {
- if ((bio_data_dir(bio) == WRITE) ||
- (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
- bio_op(bio) == REQ_OP_DISCARD))
+ if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
+ bio_op(bio) == REQ_OP_DISCARD)
bio_list_add(&info->defer_bios, bio);
else {
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
- if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
- bio_op(bio) == REQ_OP_DISCARD) {
+ if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
thin_defer_bio_with_throttle(tc, bio);
return DM_MAPIO_SUBMITTED;
}
@@ -2714,7 +2711,7 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
return 1;
q = bdev_get_queue(pt->data_dev->bdev);
- return bdi_congested(&q->backing_dev_info, bdi_bits);
+ return bdi_congested(q->backing_dev_info, bdi_bits);
}
static void requeue_bios(struct pool *pool)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0ff5469c03d2..9f37d7fc2786 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -91,7 +91,6 @@ static int dm_numa_node = DM_NUMA_NODE;
*/
struct dm_md_mempools {
mempool_t *io_pool;
- mempool_t *rq_pool;
struct bio_set *bs;
};
@@ -466,13 +465,16 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
if (r > 0) {
/*
- * Target determined this ioctl is being issued against
- * a logical partition of the parent bdev; so extra
- * validation is needed.
+ * Target determined this ioctl is being issued against a
+ * subset of the parent bdev; require extra privileges.
*/
- r = scsi_verify_blk_ioctl(NULL, cmd);
- if (r)
+ if (!capable(CAP_SYS_RAWIO)) {
+ DMWARN_LIMIT(
+ "%s: sending ioctl %x to DM device without required privilege.",
+ current->comm, cmd);
+ r = -ENOIOCTLCMD;
goto out;
+ }
}
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
@@ -1369,7 +1371,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
* With request-based DM we only need to check the
* top-level queue for congestion.
*/
- r = md->queue->backing_dev_info.wb.state & bdi_bits;
+ r = md->queue->backing_dev_info->wb.state & bdi_bits;
} else {
map = dm_get_live_table_fast(md);
if (map)
@@ -1452,7 +1454,7 @@ void dm_init_md_queue(struct mapped_device *md)
* - must do so here (in alloc_dev callchain) before queue is used
*/
md->queue->queuedata = md;
- md->queue->backing_dev_info.congested_data = md;
+ md->queue->backing_dev_info->congested_data = md;
}
void dm_init_normal_md_queue(struct mapped_device *md)
@@ -1463,7 +1465,7 @@ void dm_init_normal_md_queue(struct mapped_device *md)
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
- md->queue->backing_dev_info.congested_fn = dm_any_congested;
+ md->queue->backing_dev_info->congested_fn = dm_any_congested;
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
}
@@ -1474,7 +1476,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
if (md->kworker_task)
kthread_stop(md->kworker_task);
mempool_destroy(md->io_pool);
- mempool_destroy(md->rq_pool);
if (md->bs)
bioset_free(md->bs);
@@ -1650,12 +1651,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
goto out;
}
- BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
+ BUG_ON(!p || md->io_pool || md->bs);
md->io_pool = p->io_pool;
p->io_pool = NULL;
- md->rq_pool = p->rq_pool;
- p->rq_pool = NULL;
md->bs = p->bs;
p->bs = NULL;
@@ -1832,7 +1831,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
switch (type) {
case DM_TYPE_REQUEST_BASED:
- r = dm_old_init_request_queue(md);
+ r = dm_old_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based mapped device");
return r;
@@ -2548,7 +2547,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
unsigned integrity, unsigned per_io_data_size)
{
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
- struct kmem_cache *cachep = NULL;
unsigned int pool_size = 0;
unsigned int front_pad;
@@ -2558,20 +2556,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
switch (type) {
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
- cachep = _io_cache;
pool_size = dm_get_reserved_bio_based_ios();
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
+
+ pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+ if (!pools->io_pool)
+ goto out;
break;
case DM_TYPE_REQUEST_BASED:
- cachep = _rq_tio_cache;
- pool_size = dm_get_reserved_rq_based_ios();
- pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
- if (!pools->rq_pool)
- goto out;
- /* fall through to setup remaining rq-based pools */
case DM_TYPE_MQ_REQUEST_BASED:
- if (!pool_size)
- pool_size = dm_get_reserved_rq_based_ios();
+ pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_io_data_size is used for blk-mq pdu at queue allocation */
break;
@@ -2579,12 +2573,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
BUG();
}
- if (cachep) {
- pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
- if (!pools->io_pool)
- goto out;
- }
-
pools->bs = bioset_create_nobvec(pool_size, front_pad);
if (!pools->bs)
goto out;
@@ -2606,7 +2594,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
return;
mempool_destroy(pools->io_pool);
- mempool_destroy(pools->rq_pool);
if (pools->bs)
bioset_free(pools->bs);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index f0aad08b9654..f298b01f7ab3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -95,8 +95,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
/*
* To check whether the target type is request-based or not (bio-based).
*/
-#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \
- ((t)->type->clone_and_map_rq != NULL))
+#define dm_target_request_based(t) ((t)->type->clone_and_map_rq != NULL)
/*
* To check whether the target type is a hybrid (capable of being
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5975c9915684..f1c7bbac31a5 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -62,7 +62,7 @@ static int linear_congested(struct mddev *mddev, int bits)
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
}
return ret;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82821ee0d57f..ba485dcf1064 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev)
if (start_readonly && mddev->ro == 0)
mddev->ro = 2; /* read-only, but switch on first write */
+ /*
+ * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
+ * up mddev->thread. It is important to initialize critical
+ * resources for mddev->thread BEFORE calling pers->run().
+ */
err = pers->run(mddev);
if (err)
pr_warn("md: pers->run() failed ...\n");
@@ -5341,8 +5346,8 @@ int md_run(struct mddev *mddev)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
- mddev->queue->backing_dev_info.congested_data = mddev;
- mddev->queue->backing_dev_info.congested_fn = md_congested;
+ mddev->queue->backing_dev_info->congested_data = mddev;
+ mddev->queue->backing_dev_info->congested_fn = md_congested;
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
@@ -5699,7 +5704,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
__md_stop_writes(mddev);
__md_stop(mddev);
- mddev->queue->backing_dev_info.congested_fn = NULL;
+ mddev->queue->backing_dev_info->congested_fn = NULL;
/* tell userspace to handle 'inactive' */
sysfs_notify_dirent_safe(mddev->sysfs_state);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index aa8c4e5c1ee2..d457afa672d5 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -169,7 +169,7 @@ static int multipath_congested(struct mddev *mddev, int bits)
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q = bdev_get_queue(rdev->bdev);
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
/* Just like multipath_map, we just check the
* first available device
*/
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 8212f14214f1..0863905dee02 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -120,7 +120,7 @@ static int __check_holder(struct block_lock *lock)
static void __wait(struct waiter *w)
{
for (;;) {
- set_task_state(current, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!w->task)
break;
@@ -128,7 +128,7 @@ static void __wait(struct waiter *w)
schedule();
}
- set_task_state(current, TASK_RUNNING);
+ set_current_state(TASK_RUNNING);
}
static void __wake_waiter(struct waiter *w)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 848365d474f3..d6585239bff2 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -41,7 +41,7 @@ static int raid0_congested(struct mddev *mddev, int bits)
for (i = 0; i < raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
}
return ret;
}
@@ -420,8 +420,8 @@ static int raid0_run(struct mddev *mddev)
*/
int stripe = mddev->raid_disks *
(mddev->chunk_sectors << 9) / PAGE_SIZE;
- if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
- mddev->queue->backing_dev_info.ra_pages = 2* stripe;
+ if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
+ mddev->queue->backing_dev_info->ra_pages = 2* stripe;
}
dump_zones(mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7b0f647bcccb..830ff2b20346 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -744,9 +744,9 @@ static int raid1_congested(struct mddev *mddev, int bits)
* non-congested targets, it can be removed
*/
if ((bits & (1 << WB_async_congested)) || 1)
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
else
- ret &= bdi_congested(&q->backing_dev_info, bits);
+ ret &= bdi_congested(q->backing_dev_info, bits);
}
}
rcu_read_unlock();
@@ -1170,10 +1170,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
int i, disks;
struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
- const int op = bio_op(bio);
- const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
- const unsigned long do_flush_fua = (bio->bi_opf &
- (REQ_PREFLUSH | REQ_FUA));
struct md_rdev *blocked_rdev;
struct blk_plug_cb *cb;
struct raid1_plug_cb *plug = NULL;
@@ -1389,7 +1385,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
conf->mirrors[i].rdev->data_offset);
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_end_io = raid1_end_write_request;
- bio_set_op_attrs(mbio, op, do_flush_fua | do_sync);
+ mbio->bi_opf = bio_op(bio) |
+ (bio->bi_opf & (REQ_SYNC | REQ_PREFLUSH | REQ_FUA));
if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) &&
!test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) &&
conf->raid_disks - mddev->degraded > 1)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1920756828df..6bc5c2a85160 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -860,7 +860,7 @@ static int raid10_congested(struct mddev *mddev, int bits)
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q = bdev_get_queue(rdev->bdev);
- ret |= bdi_congested(&q->backing_dev_info, bits);
+ ret |= bdi_congested(q->backing_dev_info, bits);
}
}
rcu_read_unlock();
@@ -3841,8 +3841,8 @@ static int raid10_run(struct mddev *mddev)
* maybe...
*/
stripe /= conf->geo.near_copies;
- if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
- mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+ if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
+ mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}
if (md_integrity_register(mddev))
@@ -4643,8 +4643,8 @@ static void end_reshape(struct r10conf *conf)
int stripe = conf->geo.raid_disks *
((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
stripe /= conf->geo.near_copies;
- if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
- conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+ if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
+ conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}
conf->fullsync = 0;
}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0e8ed2c327b0..302dea3296ba 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -162,6 +162,8 @@ struct r5l_log {
/* to submit async io_units, to fulfill ordering of flush */
struct work_struct deferred_io_work;
+ /* to disable write back during in degraded mode */
+ struct work_struct disable_writeback_work;
};
/*
@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
r5l_do_submit_io(log, io);
}
+static void r5c_disable_writeback_async(struct work_struct *work)
+{
+ struct r5l_log *log = container_of(work, struct r5l_log,
+ disable_writeback_work);
+ struct mddev *mddev = log->rdev->mddev;
+
+ if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+ return;
+ pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
+ mdname(mddev));
+ mddev_suspend(mddev);
+ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+ mddev_resume(mddev);
+}
+
static void r5l_submit_current_io(struct r5l_log *log)
{
struct r5l_io_unit *io = log->current_io;
@@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
next_checkpoint = r5c_calculate_new_cp(conf);
spin_unlock_irq(&log->io_list_lock);
- BUG_ON(reclaimable < 0);
-
if (reclaimable == 0 || !write_super)
return;
@@ -2062,7 +2077,7 @@ static int
r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
struct r5l_recovery_ctx *ctx)
{
- struct stripe_head *sh, *next;
+ struct stripe_head *sh;
struct mddev *mddev = log->rdev->mddev;
struct page *page;
sector_t next_checkpoint = MaxSector;
@@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
WARN_ON(list_empty(&ctx->cached_list));
- list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
+ list_for_each_entry(sh, &ctx->cached_list, lru) {
struct r5l_meta_block *mb;
int i;
int offset;
@@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
ctx->pos = write_pos;
ctx->seq += 1;
next_checkpoint = sh->log_start;
- list_del_init(&sh->lru);
- raid5_release_stripe(sh);
}
log->next_checkpoint = next_checkpoint;
__free_page(page);
return 0;
}
+static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
+ struct r5l_recovery_ctx *ctx)
+{
+ struct mddev *mddev = log->rdev->mddev;
+ struct r5conf *conf = mddev->private;
+ struct stripe_head *sh, *next;
+
+ if (ctx->data_only_stripes == 0)
+ return;
+
+ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
+
+ list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
+ r5c_make_stripe_write_out(sh);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ list_del_init(&sh->lru);
+ raid5_release_stripe(sh);
+ }
+
+ md_wakeup_thread(conf->mddev->thread);
+ /* reuse conf->wait_for_quiescent in recovery */
+ wait_event(conf->wait_for_quiescent,
+ atomic_read(&conf->active_stripes) == 0);
+
+ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+}
+
static int r5l_recovery_log(struct r5l_log *log)
{
struct mddev *mddev = log->rdev->mddev;
@@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log)
pos = ctx.pos;
ctx.seq += 10000;
- if (ctx.data_only_stripes == 0) {
- log->next_checkpoint = ctx.pos;
- r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
- ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
- }
if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
pr_debug("md/raid:%s: starting from clean shutdown\n",
mdname(mddev));
- else {
+ else
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
mdname(mddev), ctx.data_only_stripes,
ctx.data_parity_stripes);
- if (ctx.data_only_stripes > 0)
- if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
- pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
- mdname(mddev));
- return -EIO;
- }
+ if (ctx.data_only_stripes == 0) {
+ log->next_checkpoint = ctx.pos;
+ r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
+ ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
+ } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
+ pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
+ mdname(mddev));
+ return -EIO;
}
log->log_start = ctx.pos;
log->seq = ctx.seq;
log->last_checkpoint = pos;
r5l_write_super(log, pos);
+
+ r5c_recovery_flush_data_only_stripes(log, &ctx);
return 0;
}
@@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
val > R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL;
+ if (raid5_calc_degraded(conf) > 0 &&
+ val == R5C_JOURNAL_MODE_WRITE_BACK)
+ return -EINVAL;
+
mddev_suspend(mddev);
conf->log->r5c_journal_mode = val;
mddev_resume(mddev);
@@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
set_bit(STRIPE_R5C_CACHING, &sh->state);
}
+ /*
+ * When run in degraded mode, array is set to write-through mode.
+ * This check helps drain pending write safely in the transition to
+ * write-through mode.
+ */
+ if (s->failed) {
+ r5c_make_stripe_write_out(sh);
+ return -EAGAIN;
+ }
+
for (i = disks; i--; ) {
dev = &sh->dev[i];
/* if non-overwrite, use writing-out phase */
@@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
struct page *p = sh->dev[i].orig_page;
sh->dev[i].orig_page = sh->dev[i].page;
+ clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
+
if (!using_disk_info_extra_page)
put_page(p);
}
@@ -2555,6 +2610,19 @@ ioerr:
return ret;
}
+void r5c_update_on_rdev_error(struct mddev *mddev)
+{
+ struct r5conf *conf = mddev->private;
+ struct r5l_log *log = conf->log;
+
+ if (!log)
+ return;
+
+ if (raid5_calc_degraded(conf) > 0 &&
+ conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
+ schedule_work(&log->disable_writeback_work);
+}
+
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->no_space_stripes_lock);
INIT_WORK(&log->deferred_io_work, r5l_submit_io_async);
+ INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
INIT_LIST_HEAD(&log->stripe_in_journal_list);
@@ -2659,6 +2728,7 @@ io_kc:
void r5l_exit_log(struct r5l_log *log)
{
+ flush_work(&log->disable_writeback_work);
md_unregister_thread(&log->reclaim_thread);
mempool_destroy(log->meta_pool);
bioset_free(log->bs);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 36c13e4be9c9..6214e699342c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -556,7 +556,7 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
* of the two sections, and some non-in_sync devices may
* be insync in the section most affected by failed devices.
*/
-static int calc_degraded(struct r5conf *conf)
+int raid5_calc_degraded(struct r5conf *conf)
{
int degraded, degraded2;
int i;
@@ -619,7 +619,7 @@ static int has_failed(struct r5conf *conf)
if (conf->mddev->reshape_position == MaxSector)
return conf->mddev->degraded > conf->max_degraded;
- degraded = calc_degraded(conf);
+ degraded = raid5_calc_degraded(conf);
if (degraded > conf->max_degraded)
return 1;
return 0;
@@ -1015,7 +1015,17 @@ again:
if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
- sh->dev[i].vec.bv_page = sh->dev[i].page;
+
+ if (!op_is_write(op) &&
+ test_bit(R5_InJournal, &sh->dev[i].flags))
+ /*
+ * issuing read for a page in journal, this
+ * must be preparing for prexor in rmw; read
+ * the data into orig_page
+ */
+ sh->dev[i].vec.bv_page = sh->dev[i].orig_page;
+ else
+ sh->dev[i].vec.bv_page = sh->dev[i].page;
bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
@@ -2380,6 +2390,13 @@ static void raid5_end_read_request(struct bio * bi)
} else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
+ if (test_bit(R5_InJournal, &sh->dev[i].flags))
+ /*
+ * end read for a page in journal, this
+ * must be preparing for prexor in rmw
+ */
+ set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
+
if (atomic_read(&rdev->read_errors))
atomic_set(&rdev->read_errors, 0);
} else {
@@ -2538,7 +2555,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
spin_lock_irqsave(&conf->device_lock, flags);
clear_bit(In_sync, &rdev->flags);
- mddev->degraded = calc_degraded(conf);
+ mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -2552,6 +2569,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
+ r5c_update_on_rdev_error(mddev);
}
/*
@@ -2880,6 +2898,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
return r_sector;
}
+/*
+ * There are cases where we want handle_stripe_dirtying() and
+ * schedule_reconstruction() to delay towrite to some dev of a stripe.
+ *
+ * This function checks whether we want to delay the towrite. Specifically,
+ * we delay the towrite when:
+ *
+ * 1. degraded stripe has a non-overwrite to the missing dev, AND this
+ * stripe has data in journal (for other devices).
+ *
+ * In this case, when reading data for the non-overwrite dev, it is
+ * necessary to handle complex rmw of write back cache (prexor with
+ * orig_page, and xor with page). To keep read path simple, we would
+ * like to flush data in journal to RAID disks first, so complex rmw
+ * is handled in the write patch (handle_stripe_dirtying).
+ *
+ */
+static inline bool delay_towrite(struct r5dev *dev,
+ struct stripe_head_state *s)
+{
+ return !test_bit(R5_OVERWRITE, &dev->flags) &&
+ !test_bit(R5_Insync, &dev->flags) && s->injournal;
+}
+
static void
schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand)
@@ -2900,7 +2942,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if (dev->towrite) {
+ if (dev->towrite && !delay_towrite(dev, s)) {
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantdrain, &dev->flags);
if (!expand)
@@ -3295,13 +3337,6 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
return rv;
}
-/* fetch_block - checks the given member device to see if its data needs
- * to be read or computed to satisfy a request.
- *
- * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill to continue
- */
-
static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks)
{
@@ -3392,6 +3427,12 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
return 0;
}
+/* fetch_block - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill to continue
+ */
static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks)
{
@@ -3478,10 +3519,26 @@ static void handle_stripe_fill(struct stripe_head *sh,
* midst of changing due to a write
*/
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
- !sh->reconstruct_state)
+ !sh->reconstruct_state) {
+
+ /*
+ * For degraded stripe with data in journal, do not handle
+ * read requests yet, instead, flush the stripe to raid
+ * disks first, this avoids handling complex rmw of write
+ * back cache (prexor with orig_page, and then xor with
+ * page) in the read path
+ */
+ if (s->injournal && s->failed) {
+ if (test_bit(STRIPE_R5C_CACHING, &sh->state))
+ r5c_make_stripe_write_out(sh);
+ goto out;
+ }
+
for (i = disks; i--; )
if (fetch_block(sh, s, i, disks))
break;
+ }
+out:
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -3594,6 +3651,21 @@ unhash:
break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
}
+/*
+ * For RMW in write back cache, we need extra page in prexor to store the
+ * old data. This page is stored in dev->orig_page.
+ *
+ * This function checks whether we have data for prexor. The exact logic
+ * is:
+ * R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE)
+ */
+static inline bool uptodate_for_rmw(struct r5dev *dev)
+{
+ return (test_bit(R5_UPTODATE, &dev->flags)) &&
+ (!test_bit(R5_InJournal, &dev->flags) ||
+ test_bit(R5_OrigPageUPTDODATE, &dev->flags));
+}
+
static int handle_stripe_dirtying(struct r5conf *conf,
struct stripe_head *sh,
struct stripe_head_state *s,
@@ -3622,12 +3694,11 @@ static int handle_stripe_dirtying(struct r5conf *conf,
} else for (i = disks; i--; ) {
/* would I have to read this buffer for read_modify_write */
struct r5dev *dev = &sh->dev[i];
- if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
+ if (((dev->towrite && !delay_towrite(dev, s)) ||
+ i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) &&
!test_bit(R5_LOCKED, &dev->flags) &&
- !((test_bit(R5_UPTODATE, &dev->flags) &&
- (!test_bit(R5_InJournal, &dev->flags) ||
- dev->page != dev->orig_page)) ||
+ !(uptodate_for_rmw(dev) ||
test_bit(R5_Wantcompute, &dev->flags))) {
if (test_bit(R5_Insync, &dev->flags))
rmw++;
@@ -3639,7 +3710,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
i != sh->pd_idx && i != sh->qd_idx &&
!test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) ||
- test_bit(R5_InJournal, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) {
if (test_bit(R5_Insync, &dev->flags))
rcw++;
@@ -3689,13 +3759,11 @@ static int handle_stripe_dirtying(struct r5conf *conf,
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if ((dev->towrite ||
+ if (((dev->towrite && !delay_towrite(dev, s)) ||
i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) &&
!test_bit(R5_LOCKED, &dev->flags) &&
- !((test_bit(R5_UPTODATE, &dev->flags) &&
- (!test_bit(R5_InJournal, &dev->flags) ||
- dev->page != dev->orig_page)) ||
+ !(uptodate_for_rmw(dev) ||
test_bit(R5_Wantcompute, &dev->flags)) &&
test_bit(R5_Insync, &dev->flags)) {
if (test_bit(STRIPE_PREREAD_ACTIVE,
@@ -3722,7 +3790,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
i != sh->pd_idx && i != sh->qd_idx &&
!test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) ||
- test_bit(R5_InJournal, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) {
rcw++;
if (test_bit(R5_Insync, &dev->flags) &&
@@ -6264,10 +6331,10 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
mddev_suspend(mddev);
conf->skip_copy = new;
if (new)
- mddev->queue->backing_dev_info.capabilities |=
+ mddev->queue->backing_dev_info->capabilities |=
BDI_CAP_STABLE_WRITES;
else
- mddev->queue->backing_dev_info.capabilities &=
+ mddev->queue->backing_dev_info->capabilities &=
~BDI_CAP_STABLE_WRITES;
mddev_resume(mddev);
}
@@ -7025,7 +7092,7 @@ static int raid5_run(struct mddev *mddev)
/*
* 0 for a fully functional array, 1 or 2 for a degraded array.
*/
- mddev->degraded = calc_degraded(conf);
+ mddev->degraded = raid5_calc_degraded(conf);
if (has_failed(conf)) {
pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n",
@@ -7086,8 +7153,8 @@ static int raid5_run(struct mddev *mddev)
int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
- if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
- mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+ if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
+ mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
@@ -7272,7 +7339,7 @@ static int raid5_spare_active(struct mddev *mddev)
}
}
spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded = calc_degraded(conf);
+ mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
print_raid5_conf(conf);
return count;
@@ -7632,7 +7699,7 @@ static int raid5_start_reshape(struct mddev *mddev)
* pre and post number of devices.
*/
spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded = calc_degraded(conf);
+ mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
mddev->raid_disks = conf->raid_disks;
@@ -7696,8 +7763,8 @@ static void end_reshape(struct r5conf *conf)
int data_disks = conf->raid_disks - conf->max_degraded;
int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);
- if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
- conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+ if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
+ conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}
}
}
@@ -7720,7 +7787,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
} else {
int d;
spin_lock_irq(&conf->device_lock);
- mddev->degraded = calc_degraded(conf);
+ mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irq(&conf->device_lock);
for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index ed8e1362ab36..1440fa26e296 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -322,6 +322,11 @@ enum r5dev_flags {
* data and parity being written are in the journal
* device
*/
+ R5_OrigPageUPTDODATE, /* with write back cache, we read old data into
+ * dev->orig_page for prexor. When this flag is
+ * set, orig_page contains latest data in the
+ * raid disk.
+ */
};
/*
@@ -753,6 +758,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
extern struct stripe_head *
raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
int previous, int noblock, int noquiesce);
+extern int raid5_calc_degraded(struct r5conf *conf);
extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
extern void r5l_exit_log(struct r5l_log *log);
extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
@@ -781,4 +787,5 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
extern void r5c_check_cached_full_stripe(struct r5conf *conf);
extern struct md_sysfs_entry r5c_journal_mode;
+extern void r5c_update_on_rdev_error(struct mddev *mddev);
#endif