summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c4
-rw-r--r--drivers/md/dm-cache-target.c51
-rw-r--r--drivers/md/dm-raid.c111
-rw-r--r--drivers/md/dm.c5
-rw-r--r--drivers/md/md.c239
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid1.c8
-rw-r--r--drivers/md/raid10.c24
-rw-r--r--drivers/md/raid5.c27
9 files changed, 344 insertions, 126 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 4fd9d6aeff6a..5a2c75499824 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -846,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
set_bit(bit, kaddr);
else
- test_and_set_bit_le(bit, kaddr);
+ set_bit_le(bit, kaddr);
kunmap_atomic(kaddr);
pr_debug("set file bit %lu page %lu\n", bit, page->index);
/* record page number so it gets flushed to disk when unplug occurs */
@@ -868,7 +868,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
clear_bit(bit, paddr);
else
- test_and_clear_bit_le(bit, paddr);
+ clear_bit_le(bit, paddr);
kunmap_atomic(paddr);
if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 66120bd46d15..10744091e6ca 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -6,6 +6,7 @@
#include "dm.h"
#include "dm-bio-prison.h"
+#include "dm-bio-record.h"
#include "dm-cache-metadata.h"
#include <linux/dm-io.h>
@@ -201,10 +202,15 @@ struct per_bio_data {
unsigned req_nr:2;
struct dm_deferred_entry *all_io_entry;
- /* writethrough fields */
+ /*
+ * writethrough fields. These MUST remain at the end of this
+ * structure and the 'cache' member must be the first as it
+ * is used to determine the offsetof the writethrough fields.
+ */
struct cache *cache;
dm_cblock_t cblock;
bio_end_io_t *saved_bi_end_io;
+ struct dm_bio_details bio_details;
};
struct dm_cache_migration {
@@ -513,16 +519,28 @@ static void save_stats(struct cache *cache)
/*----------------------------------------------------------------
* Per bio data
*--------------------------------------------------------------*/
-static struct per_bio_data *get_per_bio_data(struct bio *bio)
+
+/*
+ * If using writeback, leave out struct per_bio_data's writethrough fields.
+ */
+#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
+#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
+
+static size_t get_per_bio_data_size(struct cache *cache)
+{
+ return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
+}
+
+static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
{
- struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
+ struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
BUG_ON(!pb);
return pb;
}
-static struct per_bio_data *init_per_bio_data(struct bio *bio)
+static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
{
- struct per_bio_data *pb = get_per_bio_data(bio);
+ struct per_bio_data *pb = get_per_bio_data(bio, data_size);
pb->tick = false;
pb->req_nr = dm_bio_get_target_bio_nr(bio);
@@ -556,7 +574,8 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
{
unsigned long flags;
- struct per_bio_data *pb = get_per_bio_data(bio);
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
spin_lock_irqsave(&cache->lock, flags);
if (cache->need_tick_bio &&
@@ -635,7 +654,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
static void writethrough_endio(struct bio *bio, int err)
{
- struct per_bio_data *pb = get_per_bio_data(bio);
+ struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
bio->bi_end_io = pb->saved_bi_end_io;
if (err) {
@@ -643,6 +662,7 @@ static void writethrough_endio(struct bio *bio, int err)
return;
}
+ dm_bio_restore(&pb->bio_details, bio);
remap_to_cache(pb->cache, bio, pb->cblock);
/*
@@ -662,11 +682,12 @@ static void writethrough_endio(struct bio *bio, int err)
static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
- struct per_bio_data *pb = get_per_bio_data(bio);
+ struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
pb->cache = cache;
pb->cblock = cblock;
pb->saved_bi_end_io = bio->bi_end_io;
+ dm_bio_record(&pb->bio_details, bio);
bio->bi_end_io = writethrough_endio;
remap_to_origin_clear_discard(pb->cache, bio, oblock);
@@ -1035,7 +1056,8 @@ static void defer_bio(struct cache *cache, struct bio *bio)
static void process_flush_bio(struct cache *cache, struct bio *bio)
{
- struct per_bio_data *pb = get_per_bio_data(bio);
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
BUG_ON(bio->bi_size);
if (!pb->req_nr)
@@ -1107,7 +1129,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
dm_oblock_t block = get_bio_block(cache, bio);
struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
struct policy_result lookup_result;
- struct per_bio_data *pb = get_per_bio_data(bio);
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
@@ -1881,7 +1904,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
cache->ti = ca->ti;
ti->private = cache;
- ti->per_bio_data_size = sizeof(struct per_bio_data);
ti->num_flush_bios = 2;
ti->flush_supported = true;
@@ -1890,6 +1912,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->discard_zeroes_data_unsupported = true;
memcpy(&cache->features, &ca->features, sizeof(cache->features));
+ ti->per_bio_data_size = get_per_bio_data_size(cache);
cache->callbacks.congested_fn = cache_is_congested;
dm_table_add_target_callbacks(ti->table, &cache->callbacks);
@@ -2092,6 +2115,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
int r;
dm_oblock_t block = get_bio_block(cache, bio);
+ size_t pb_data_size = get_per_bio_data_size(cache);
bool can_migrate = false;
bool discarded_block;
struct dm_bio_prison_cell *cell;
@@ -2108,7 +2132,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
- pb = init_per_bio_data(bio);
+ pb = init_per_bio_data(bio, pb_data_size);
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
defer_bio(cache, bio);
@@ -2193,7 +2217,8 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct cache *cache = ti->private;
unsigned long flags;
- struct per_bio_data *pb = get_per_bio_data(bio);
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
if (pb->tick) {
policy_tick(cache->policy);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 311e3d35b272..1d3fe1a40a9b 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
+static const char *decipher_sync_action(struct mddev *mddev)
+{
+ if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
+ return "frozen";
+
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+ return "reshape";
+
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+ if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ return "resync";
+ else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+ return "check";
+ return "repair";
+ }
+
+ if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
+ return "recover";
+ }
+
+ return "idle";
+}
+
static void raid_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
@@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type,
sync = rs->md.recovery_cp;
if (sync >= rs->md.resync_max_sectors) {
+ /*
+ * Sync complete.
+ */
array_in_sync = 1;
sync = rs->md.resync_max_sectors;
+ } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
+ /*
+ * If "check" or "repair" is occurring, the array has
+ * undergone and initial sync and the health characters
+ * should not be 'a' anymore.
+ */
+ array_in_sync = 1;
} else {
/*
* The array may be doing an initial sync, or it may
@@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
array_in_sync = 1;
}
+
/*
* Status characters:
* 'D' = Dead/Failed device
@@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type,
(unsigned long long) sync,
(unsigned long long) rs->md.resync_max_sectors);
+ /*
+ * Sync action:
+ * See Documentation/device-mapper/dm-raid.c for
+ * information on each of these states.
+ */
+ DMEMIT(" %s", decipher_sync_action(&rs->md));
+
+ /*
+ * resync_mismatches/mismatch_cnt
+ * This field shows the number of discrepancies found when
+ * performing a "check" of the array.
+ */
+ DMEMIT(" %llu",
+ (unsigned long long)
+ atomic64_read(&rs->md.resync_mismatches));
break;
case STATUSTYPE_TABLE:
/* The string you would use to construct this array */
@@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type,
}
}
-static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
+static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+ struct raid_set *rs = ti->private;
+ struct mddev *mddev = &rs->md;
+
+ if (!strcasecmp(argv[0], "reshape")) {
+ DMERR("Reshape not supported.");
+ return -EINVAL;
+ }
+
+ if (!mddev->pers || !mddev->pers->sync_request)
+ return -EINVAL;
+
+ if (!strcasecmp(argv[0], "frozen"))
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ else
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+ if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
+ if (mddev->sync_thread) {
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ md_reap_sync_thread(mddev);
+ }
+ } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ return -EBUSY;
+ else if (!strcasecmp(argv[0], "resync"))
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ else if (!strcasecmp(argv[0], "recover")) {
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ } else {
+ if (!strcasecmp(argv[0], "check"))
+ set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ else if (!!strcasecmp(argv[0], "repair"))
+ return -EINVAL;
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ }
+ if (mddev->ro == 2) {
+ /* A write to sync_action is enough to justify
+ * canceling read-auto mode
+ */
+ mddev->ro = 0;
+ if (!mddev->suspended)
+ md_wakeup_thread(mddev->sync_thread);
+ }
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ if (!mddev->suspended)
+ md_wakeup_thread(mddev->thread);
+
+ return 0;
+}
+
+static int raid_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
{
struct raid_set *rs = ti->private;
unsigned i;
@@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 4, 2},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
.map = raid_map,
.status = raid_status,
+ .message = raid_message,
.iterate_devices = raid_iterate_devices,
.io_hints = raid_io_hints,
.presuspend = raid_presuspend,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7e469260fe5e..d5370a94b2c1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -339,7 +339,7 @@ out:
return md ? 0 : -ENXIO;
}
-static int dm_blk_close(struct gendisk *disk, fmode_t mode)
+static void dm_blk_close(struct gendisk *disk, fmode_t mode)
{
struct mapped_device *md = disk->private_data;
@@ -349,8 +349,6 @@ static int dm_blk_close(struct gendisk *disk, fmode_t mode)
dm_put(md);
spin_unlock(&_minor_lock);
-
- return 0;
}
int dm_open_count(struct mapped_device *md)
@@ -611,6 +609,7 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
+ trace_block_bio_complete(md->queue, bio, io_error);
bio_endio(bio, io_error);
}
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1d03ebde40b5..681d1099a2d5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
static struct workqueue_struct *md_misc_wq;
+static int remove_and_add_spares(struct mddev *mddev,
+ struct md_rdev *this);
+
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
@@ -1555,8 +1558,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sector, count, 1) == 0)
return -EINVAL;
}
- } else if (sb->bblog_offset == 0)
- rdev->badblocks.shift = -1;
+ } else if (sb->bblog_offset != 0)
+ rdev->badblocks.shift = 0;
if (!refdev) {
ret = 1;
@@ -2402,6 +2405,11 @@ static void md_update_sb(struct mddev * mddev, int force_change)
int nospares = 0;
int any_badblocks_changed = 0;
+ if (mddev->ro) {
+ if (force_change)
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ return;
+ }
repeat:
/* First make sure individual recovery_offsets are correct */
rdev_for_each(rdev, mddev) {
@@ -2791,12 +2799,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
/* personality does all needed checks */
if (rdev->mddev->pers->hot_remove_disk == NULL)
return -EINVAL;
- err = rdev->mddev->pers->
- hot_remove_disk(rdev->mddev, rdev);
- if (err)
- return err;
- sysfs_unlink_rdev(rdev->mddev, rdev);
- rdev->raid_disk = -1;
+ clear_bit(Blocked, &rdev->flags);
+ remove_and_add_spares(rdev->mddev, rdev);
+ if (rdev->raid_disk >= 0)
+ return -EBUSY;
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
} else if (rdev->mddev->pers) {
@@ -3212,7 +3218,7 @@ int md_rdev_init(struct md_rdev *rdev)
* be used - I wonder if that matters
*/
rdev->badblocks.count = 0;
- rdev->badblocks.shift = 0;
+ rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
seqlock_init(&rdev->badblocks.lock);
if (rdev->badblocks.page == NULL)
@@ -3284,9 +3290,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
goto abort_free;
}
}
- if (super_format == -1)
- /* hot-add for 0.90, or non-persistent: so no badblocks */
- rdev->badblocks.shift = -1;
return rdev;
@@ -4216,8 +4219,6 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", type);
}
-static void reap_sync_thread(struct mddev *mddev);
-
static ssize_t
action_store(struct mddev *mddev, const char *page, size_t len)
{
@@ -4232,7 +4233,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- reap_sync_thread(mddev);
+ md_reap_sync_thread(mddev);
}
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -5270,7 +5271,7 @@ static void __md_stop_writes(struct mddev *mddev)
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- reap_sync_thread(mddev);
+ md_reap_sync_thread(mddev);
}
del_timer_sync(&mddev->safemode_timer);
@@ -5278,7 +5279,8 @@ static void __md_stop_writes(struct mddev *mddev)
bitmap_flush(mddev);
md_super_wait(mddev);
- if (!mddev->in_sync || mddev->flags) {
+ if (mddev->ro == 0 &&
+ (!mddev->in_sync || mddev->flags)) {
/* mark array as shutdown cleanly */
mddev->in_sync = 1;
md_update_sb(mddev, 1);
@@ -5801,7 +5803,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
else
sysfs_notify_dirent_safe(rdev->sysfs_state);
- md_update_sb(mddev, 1);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
if (mddev->degraded)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5868,6 +5870,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev)
if (!rdev)
return -ENXIO;
+ clear_bit(Blocked, &rdev->flags);
+ remove_and_add_spares(mddev, rdev);
+
if (rdev->raid_disk >= 0)
goto busy;
@@ -6481,6 +6486,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
err = md_set_readonly(mddev, bdev);
goto done_unlock;
+ case HOT_REMOVE_DISK:
+ err = hot_remove_disk(mddev, new_decode_dev(arg));
+ goto done_unlock;
+
+ case ADD_NEW_DISK:
+ /* We can support ADD_NEW_DISK on read-only arrays
+ * on if we are re-adding a preexisting device.
+ * So require mddev->pers and MD_DISK_SYNC.
+ */
+ if (mddev->pers) {
+ mdu_disk_info_t info;
+ if (copy_from_user(&info, argp, sizeof(info)))
+ err = -EFAULT;
+ else if (!(info.state & (1<<MD_DISK_SYNC)))
+ /* Need to clear read-only for this */
+ break;
+ else
+ err = add_new_disk(mddev, &info);
+ goto done_unlock;
+ }
+ break;
+
case BLKROSET:
if (get_user(ro, (int __user *)(arg))) {
err = -EFAULT;
@@ -6551,10 +6578,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
goto done_unlock;
}
- case HOT_REMOVE_DISK:
- err = hot_remove_disk(mddev, new_decode_dev(arg));
- goto done_unlock;
-
case HOT_ADD_DISK:
err = hot_add_disk(mddev, new_decode_dev(arg));
goto done_unlock;
@@ -6642,15 +6665,13 @@ static int md_open(struct block_device *bdev, fmode_t mode)
return err;
}
-static int md_release(struct gendisk *disk, fmode_t mode)
+static void md_release(struct gendisk *disk, fmode_t mode)
{
struct mddev *mddev = disk->private_data;
BUG_ON(!mddev);
atomic_dec(&mddev->openers);
mddev_put(mddev);
-
- return 0;
}
static int md_media_changed(struct gendisk *disk)
@@ -7635,14 +7656,16 @@ void md_do_sync(struct md_thread *thread)
}
EXPORT_SYMBOL_GPL(md_do_sync);
-static int remove_and_add_spares(struct mddev *mddev)
+static int remove_and_add_spares(struct mddev *mddev,
+ struct md_rdev *this)
{
struct md_rdev *rdev;
int spares = 0;
int removed = 0;
rdev_for_each(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
+ if ((this == NULL || rdev == this) &&
+ rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
@@ -7657,74 +7680,52 @@ static int remove_and_add_spares(struct mddev *mddev)
if (removed && mddev->kobj.sd)
sysfs_notify(&mddev->kobj, NULL, "degraded");
+ if (this)
+ goto no_add;
+
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags))
spares++;
- if (rdev->raid_disk < 0
- && !test_bit(Faulty, &rdev->flags)) {
- rdev->recovery_offset = 0;
- if (mddev->pers->
- hot_add_disk(mddev, rdev) == 0) {
- if (sysfs_link_rdev(mddev, rdev))
- /* failure here is OK */;
- spares++;
- md_new_event(mddev);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
- }
+ if (rdev->raid_disk >= 0)
+ continue;
+ if (test_bit(Faulty, &rdev->flags))
+ continue;
+ if (mddev->ro &&
+ rdev->saved_raid_disk < 0)
+ continue;
+
+ rdev->recovery_offset = 0;
+ if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
+ spin_lock_irq(&mddev->write_lock);
+ if (mddev->in_sync)
+ /* OK, this device, which is in_sync,
+ * will definitely be noticed before
+ * the next write, so recovery isn't
+ * needed.
+ */
+ rdev->recovery_offset = mddev->recovery_cp;
+ spin_unlock_irq(&mddev->write_lock);
+ }
+ if (mddev->ro && rdev->recovery_offset != MaxSector)
+ /* not safe to add this disk now */
+ continue;
+ if (mddev->pers->
+ hot_add_disk(mddev, rdev) == 0) {
+ if (sysfs_link_rdev(mddev, rdev))
+ /* failure here is OK */;
+ spares++;
+ md_new_event(mddev);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
}
}
+no_add:
if (removed)
set_bit(MD_CHANGE_DEVS, &mddev->flags);
return spares;
}
-static void reap_sync_thread(struct mddev *mddev)
-{
- struct md_rdev *rdev;
-
- /* resync has finished, collect result */
- md_unregister_thread(&mddev->sync_thread);
- if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
- /* success...*/
- /* activate any spares */
- if (mddev->pers->spare_active(mddev)) {
- sysfs_notify(&mddev->kobj, NULL,
- "degraded");
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
- }
- }
- if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- mddev->pers->finish_reshape)
- mddev->pers->finish_reshape(mddev);
-
- /* If array is no-longer degraded, then any saved_raid_disk
- * information must be scrapped. Also if any device is now
- * In_sync we must scrape the saved_raid_disk for that device
- * do the superblock for an incrementally recovered device
- * written out.
- */
- rdev_for_each(rdev, mddev)
- if (!mddev->degraded ||
- test_bit(In_sync, &rdev->flags))
- rdev->saved_raid_disk = -1;
-
- md_update_sb(mddev, 1);
- clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
- clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
- clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
- /* flag recovery needed just to double check */
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- sysfs_notify_dirent_safe(mddev->sysfs_action);
- md_new_event(mddev);
- if (mddev->event_work.func)
- queue_work(md_misc_wq, &mddev->event_work);
-}
-
/*
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
@@ -7780,22 +7781,16 @@ void md_check_recovery(struct mddev *mddev)
int spares = 0;
if (mddev->ro) {
- /* Only thing we do on a ro array is remove
- * failed devices.
+ /* On a read-only array we can:
+ * - remove failed devices
+ * - add already-in_sync devices if the array itself
+ * is in-sync.
+ * As we only add devices that are already in-sync,
+ * we can activate the spares immediately.
*/
- struct md_rdev *rdev;
- rdev_for_each(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- !test_bit(Blocked, &rdev->flags) &&
- test_bit(Faulty, &rdev->flags) &&
- atomic_read(&rdev->nr_pending)==0) {
- if (mddev->pers->hot_remove_disk(
- mddev, rdev) == 0) {
- sysfs_unlink_rdev(mddev, rdev);
- rdev->raid_disk = -1;
- }
- }
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ remove_and_add_spares(mddev, NULL);
+ mddev->pers->spare_active(mddev);
goto unlock;
}
@@ -7827,7 +7822,7 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
}
if (mddev->sync_thread) {
- reap_sync_thread(mddev);
+ md_reap_sync_thread(mddev);
goto unlock;
}
/* Set RUNNING before clearing NEEDED to avoid
@@ -7858,7 +7853,7 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- } else if ((spares = remove_and_add_spares(mddev))) {
+ } else if ((spares = remove_and_add_spares(mddev, NULL))) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@ -7908,6 +7903,51 @@ void md_check_recovery(struct mddev *mddev)
}
}
+void md_reap_sync_thread(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+
+ /* resync has finished, collect result */
+ md_unregister_thread(&mddev->sync_thread);
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+ /* success...*/
+ /* activate any spares */
+ if (mddev->pers->spare_active(mddev)) {
+ sysfs_notify(&mddev->kobj, NULL,
+ "degraded");
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ }
+ }
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ mddev->pers->finish_reshape)
+ mddev->pers->finish_reshape(mddev);
+
+ /* If array is no-longer degraded, then any saved_raid_disk
+ * information must be scrapped. Also if any device is now
+ * In_sync we must scrape the saved_raid_disk for that device
+ * do the superblock for an incrementally recovered device
+ * written out.
+ */
+ rdev_for_each(rdev, mddev)
+ if (!mddev->degraded ||
+ test_bit(In_sync, &rdev->flags))
+ rdev->saved_raid_disk = -1;
+
+ md_update_sb(mddev, 1);
+ clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+ clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+ clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ /* flag recovery needed just to double check */
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ sysfs_notify_dirent_safe(mddev->sysfs_action);
+ md_new_event(mddev);
+ if (mddev->event_work.func)
+ queue_work(md_misc_wq, &mddev->event_work);
+}
+
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -8633,6 +8673,7 @@ EXPORT_SYMBOL(md_register_thread);
EXPORT_SYMBOL(md_unregister_thread);
EXPORT_SYMBOL(md_wakeup_thread);
EXPORT_SYMBOL(md_check_recovery);
+EXPORT_SYMBOL(md_reap_sync_thread);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD RAID framework");
MODULE_ALIAS("md");
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d90fb1a879e1..653f992b687a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -567,6 +567,7 @@ extern struct md_thread *md_register_thread(
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
+extern void md_reap_sync_thread(struct mddev *mddev);
extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index aeb4e3f74791..55951182af73 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -971,7 +971,12 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
- generic_make_request(bio);
+ if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+ !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ /* Just ignore it */
+ bio_endio(bio, 0);
+ else
+ generic_make_request(bio);
bio = next;
}
kfree(plug);
@@ -2854,6 +2859,7 @@ static int stop(struct mddev *mddev)
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
+ safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
kfree(conf);
mddev->private = NULL;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e32e8b1042f8..59d4daa5f4c7 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1133,7 +1133,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
- generic_make_request(bio);
+ if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+ !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ /* Just ignore it */
+ bio_endio(bio, 0);
+ else
+ generic_make_request(bio);
bio = next;
}
kfree(plug);
@@ -2888,6 +2893,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
if (init_resync(conf))
return 0;
+ /*
+ * Allow skipping a full rebuild for incremental assembly
+ * of a clean array, like RAID1 does.
+ */
+ if (mddev->bitmap == NULL &&
+ mddev->recovery_cp == MaxSector &&
+ !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
+ conf->fullsync == 0) {
+ *skipped = 1;
+ max_sector = mddev->dev_sectors;
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+ max_sector = mddev->resync_max_sectors;
+ return max_sector - sector_nr;
+ }
+
skipped:
max_sector = mddev->dev_sectors;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
@@ -3778,6 +3799,7 @@ static int stop(struct mddev *mddev)
if (conf->r10bio_pool)
mempool_destroy(conf->r10bio_pool);
+ safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf);
mddev->private = NULL;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2fefb9f2198e..9359828ffe26 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -184,6 +184,8 @@ static void return_io(struct bio *return_bi)
return_bi = bi->bi_next;
bi->bi_next = NULL;
bi->bi_size = 0;
+ trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+ bi, 0);
bio_endio(bi, 0);
bi = return_bi;
}
@@ -1884,8 +1886,15 @@ static void raid5_end_write_request(struct bio *bi, int error)
&rdev->mddev->recovery);
} else if (is_badblock(rdev, sh->sector,
STRIPE_SECTORS,
- &first_bad, &bad_sectors))
+ &first_bad, &bad_sectors)) {
set_bit(R5_MadeGood, &sh->dev[i].flags);
+ if (test_bit(R5_ReadError, &sh->dev[i].flags))
+ /* That was a successful write so make
+ * sure it looks like we already did
+ * a re-write.
+ */
+ set_bit(R5_ReWrite, &sh->dev[i].flags);
+ }
}
rdev_dec_pending(rdev, conf->mddev);
@@ -3913,6 +3922,8 @@ static void raid5_align_endio(struct bio *bi, int error)
rdev_dec_pending(rdev, conf->mddev);
if (!error && uptodate) {
+ trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
+ raid_bi, 0);
bio_endio(raid_bi, 0);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
@@ -4381,6 +4392,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ( rw == WRITE )
md_write_end(mddev);
+ trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+ bi, 0);
bio_endio(bi, 0);
}
}
@@ -4665,9 +4678,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
*skipped = 1;
return rv;
}
- if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
- !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
- !conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
+ if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
+ !conf->fullsync &&
+ !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
+ sync_blocks >= STRIPE_SECTORS) {
/* we can skip this block, and probably more */
sync_blocks /= STRIPE_SECTORS;
*skipped = 1;
@@ -4757,8 +4771,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
handled++;
}
remaining = raid5_dec_bi_active_stripes(raid_bio);
- if (remaining == 0)
+ if (remaining == 0) {
+ trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
+ raid_bio, 0);
bio_endio(raid_bio, 0);
+ }
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
return handled;