summaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c122
1 files changed, 81 insertions, 41 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 866825f10b4c..67642bacd597 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -285,7 +285,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
*/
sectors = bio_sectors(bio);
/* bio could be mergeable after passing to underlayer */
- bio->bi_rw &= ~REQ_NOMERGE;
+ bio->bi_opf &= ~REQ_NOMERGE;
mddev->pers->make_request(mddev, bio);
cpu = part_stat_lock();
@@ -394,8 +394,9 @@ static void submit_flushes(struct work_struct *ws)
bi->bi_end_io = md_end_flush;
bi->bi_private = rdev;
bi->bi_bdev = rdev->bdev;
+ bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
atomic_inc(&mddev->flush_pending);
- submit_bio(WRITE_FLUSH, bi);
+ submit_bio(bi);
rcu_read_lock();
rdev_dec_pending(rdev, mddev);
}
@@ -413,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws)
/* an empty barrier - all done */
bio_endio(bio);
else {
- bio->bi_rw &= ~REQ_FLUSH;
+ bio->bi_opf &= ~REQ_PREFLUSH;
mddev->pers->make_request(mddev, bio);
}
@@ -742,9 +743,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
bio->bi_end_io = super_written;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
atomic_inc(&mddev->pending_writes);
- submit_bio(WRITE_FLUSH_FUA, bio);
+ submit_bio(bio);
}
void md_super_wait(struct mddev *mddev)
@@ -754,13 +756,14 @@ void md_super_wait(struct mddev *mddev)
}
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
- struct page *page, int rw, bool metadata_op)
+ struct page *page, int op, int op_flags, bool metadata_op)
{
struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
int ret;
bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
rdev->meta_bdev : rdev->bdev;
+ bio_set_op_attrs(bio, op, op_flags);
if (metadata_op)
bio->bi_iter.bi_sector = sector + rdev->sb_start;
else if (rdev->mddev->reshape_position != MaxSector &&
@@ -770,7 +773,8 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
else
bio->bi_iter.bi_sector = sector + rdev->data_offset;
bio_add_page(bio, page, size, 0);
- submit_bio_wait(rw, bio);
+
+ submit_bio_wait(bio);
ret = !bio->bi_error;
bio_put(bio);
@@ -785,7 +789,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
if (rdev->sb_loaded)
return 0;
- if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
+ if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
goto fail;
rdev->sb_loaded = 1;
return 0;
@@ -1471,7 +1475,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
return -EINVAL;
bb_sector = (long long)offset;
if (!sync_page_io(rdev, bb_sector, sectors << 9,
- rdev->bb_page, READ, true))
+ rdev->bb_page, REQ_OP_READ, 0, true))
return -EIO;
bbp = (u64 *)page_address(rdev->bb_page);
rdev->badblocks.shift = sb->bblog_shift;
@@ -1600,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
- if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
+ if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags);
- if (mddev->recovery_cp == MaxSector)
- set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
- }
} else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for
* spares (which don't need an event count) */
@@ -2478,8 +2479,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
if (add_journal)
mddev_resume(mddev);
if (err) {
- unbind_rdev_from_array(rdev);
- export_rdev(rdev);
+ md_kick_rdev_from_array(rdev);
return err;
}
}
@@ -2596,6 +2596,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
else
err = -EBUSY;
} else if (cmd_match(buf, "remove")) {
+ if (rdev->mddev->pers) {
+ clear_bit(Blocked, &rdev->flags);
+ remove_and_add_spares(rdev->mddev, rdev);
+ }
if (rdev->raid_disk >= 0)
err = -EBUSY;
else {
@@ -3172,8 +3176,7 @@ int md_rdev_init(struct md_rdev *rdev)
rdev->data_offset = 0;
rdev->new_data_offset = 0;
rdev->sb_events = 0;
- rdev->last_read_error.tv_sec = 0;
- rdev->last_read_error.tv_nsec = 0;
+ rdev->last_read_error = 0;
rdev->sb_loaded = 0;
rdev->bb_page = NULL;
atomic_set(&rdev->nr_pending, 0);
@@ -3579,6 +3582,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->to_remove = &md_redundancy_group;
}
+ module_put(oldpers->owner);
+
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk < 0)
continue;
@@ -3936,6 +3941,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
} else
err = -EBUSY;
}
+ if (!err)
+ sysfs_notify_dirent_safe(mddev->sysfs_state);
spin_unlock(&mddev->lock);
return err ?: len;
}
@@ -4187,7 +4194,8 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
return err;
if (mddev->pers) {
err = update_size(mddev, sectors);
- md_update_sb(mddev, 1);
+ if (err == 0)
+ md_update_sb(mddev, 1);
} else {
if (mddev->dev_sectors == 0 ||
mddev->dev_sectors > sectors)
@@ -5840,6 +5848,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
working++;
if (test_bit(In_sync, &rdev->flags))
insync++;
+ else if (test_bit(Journal, &rdev->flags))
+ /* TODO: add journal count to md_u.h */
+ ;
else
spare++;
}
@@ -7809,6 +7820,7 @@ void md_do_sync(struct md_thread *thread)
if (ret)
goto skip;
+ set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
@@ -7850,6 +7862,7 @@ void md_do_sync(struct md_thread *thread)
*/
do {
+ int mddev2_minor = -1;
mddev->curr_resync = 2;
try_again:
@@ -7879,10 +7892,14 @@ void md_do_sync(struct md_thread *thread)
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) {
- printk(KERN_INFO "md: delaying %s of %s"
- " until %s has finished (they"
- " share one or more physical units)\n",
- desc, mdname(mddev), mdname(mddev2));
+ if (mddev2_minor != mddev2->md_minor) {
+ mddev2_minor = mddev2->md_minor;
+ printk(KERN_INFO "md: delaying %s of %s"
+ " until %s has finished (they"
+ " share one or more physical units)\n",
+ desc, mdname(mddev),
+ mdname(mddev2));
+ }
mddev_put(mddev2);
if (signal_pending(current))
flush_signals(current);
@@ -8147,18 +8164,11 @@ void md_do_sync(struct md_thread *thread)
}
}
skip:
- if (mddev_is_clustered(mddev) &&
- ret == 0) {
- /* set CHANGE_PENDING here since maybe another
- * update is needed, so other nodes are informed */
- set_mask_bits(&mddev->flags, 0,
- BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
- md_wakeup_thread(mddev->thread);
- wait_event(mddev->sb_wait,
- !test_bit(MD_CHANGE_PENDING, &mddev->flags));
- md_cluster_ops->resync_finish(mddev);
- } else
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ /* set CHANGE_PENDING here since maybe another update is needed,
+ * so other nodes are informed. It should be harmless for normal
+ * raid */
+ set_mask_bits(&mddev->flags, 0,
+ BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
spin_lock(&mddev->lock);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8184,15 +8194,34 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *rdev;
int spares = 0;
int removed = 0;
+ bool remove_some = false;
- rdev_for_each(rdev, mddev)
+ rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) &&
rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
- (test_bit(Faulty, &rdev->flags) ||
+ test_bit(Faulty, &rdev->flags) &&
+ atomic_read(&rdev->nr_pending)==0) {
+ /* Faulty non-Blocked devices with nr_pending == 0
+ * never get nr_pending incremented,
+ * never get Faulty cleared, and never get Blocked set.
+ * So we can synchronize_rcu now rather than once per device
+ */
+ remove_some = true;
+ set_bit(RemoveSynchronized, &rdev->flags);
+ }
+ }
+
+ if (remove_some)
+ synchronize_rcu();
+ rdev_for_each(rdev, mddev) {
+ if ((this == NULL || rdev == this) &&
+ rdev->raid_disk >= 0 &&
+ !test_bit(Blocked, &rdev->flags) &&
+ ((test_bit(RemoveSynchronized, &rdev->flags) ||
(!test_bit(In_sync, &rdev->flags) &&
!test_bit(Journal, &rdev->flags))) &&
- atomic_read(&rdev->nr_pending)==0) {
+ atomic_read(&rdev->nr_pending)==0)) {
if (mddev->pers->hot_remove_disk(
mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
@@ -8200,6 +8229,10 @@ static int remove_and_add_spares(struct mddev *mddev,
removed++;
}
}
+ if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
+ clear_bit(RemoveSynchronized, &rdev->flags);
+ }
+
if (removed && mddev->kobj.sd)
sysfs_notify(&mddev->kobj, NULL, "degraded");
@@ -8247,16 +8280,13 @@ no_add:
static void md_start_sync(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
- int ret = 0;
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"resync");
if (!mddev->sync_thread) {
- if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
- printk(KERN_ERR "%s: could not start resync"
- " thread...\n",
- mdname(mddev));
+ printk(KERN_ERR "%s: could not start resync thread...\n",
+ mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
@@ -8502,6 +8532,11 @@ void md_reap_sync_thread(struct mddev *mddev)
rdev->saved_raid_disk = -1;
md_update_sb(mddev, 1);
+ /* MD_CHANGE_PENDING should be cleared by md_update_sb, so we can
+ * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
+ * clustered raid */
+ if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
+ md_cluster_ops->resync_finish(mddev);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -8799,6 +8834,7 @@ EXPORT_SYMBOL(md_reload_sb);
* at boot time.
*/
+static DEFINE_MUTEX(detected_devices_mutex);
static LIST_HEAD(all_detected_devices);
struct detected_devices_node {
struct list_head list;
@@ -8812,7 +8848,9 @@ void md_autodetect_dev(dev_t dev)
node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
if (node_detected_dev) {
node_detected_dev->dev = dev;
+ mutex_lock(&detected_devices_mutex);
list_add_tail(&node_detected_dev->list, &all_detected_devices);
+ mutex_unlock(&detected_devices_mutex);
} else {
printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
@@ -8831,6 +8869,7 @@ static void autostart_arrays(int part)
printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
+ mutex_lock(&detected_devices_mutex);
while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
i_scanned++;
node_detected_dev = list_entry(all_detected_devices.next,
@@ -8849,6 +8888,7 @@ static void autostart_arrays(int part)
list_add(&rdev->same_set, &pending_raid_disks);
i_passed++;
}
+ mutex_unlock(&detected_devices_mutex);
printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
i_scanned, i_passed);