diff options
-rw-r--r-- | Documentation/md/md-cluster.txt | 2 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 76 | ||||
-rw-r--r-- | drivers/md/md-cluster.h | 1 | ||||
-rw-r--r-- | drivers/md/md.c | 21 |
4 files changed, 94 insertions, 6 deletions
diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt index 38883276d31c..2663d49dd8a0 100644 --- a/Documentation/md/md-cluster.txt +++ b/Documentation/md/md-cluster.txt @@ -321,4 +321,4 @@ The algorithm is: There are somethings which are not supported by cluster MD yet. -- update size and change array_sectors. +- change array_sectors. diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 92c3c51ede4d..b21ef58819f6 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1151,6 +1151,81 @@ int cluster_check_sync_size(struct mddev *mddev) return (my_sync_size == sync_size) ? 0 : -1; } +/* + * Update the size for cluster raid is a little more complex, we perform it + * by the steps: + * 1. hold token lock and update superblock in initiator node. + * 2. send METADATA_UPDATED msg to other nodes. + * 3. The initiator node continues to check each bitmap's sync_size, if all + * bitmaps have the same value of sync_size, then we can set capacity and + * let other nodes to perform it. If one node can't update sync_size + * accordingly, we need to revert to previous value. + */ +static void update_size(struct mddev *mddev, sector_t old_dev_sectors) +{ + struct md_cluster_info *cinfo = mddev->cluster_info; + struct cluster_msg cmsg; + struct md_rdev *rdev; + int ret = 0; + int raid_slot = -1; + + md_update_sb(mddev, 1); + lock_comm(cinfo, 1); + + memset(&cmsg, 0, sizeof(cmsg)); + cmsg.type = cpu_to_le32(METADATA_UPDATED); + rdev_for_each(rdev, mddev) + if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) { + raid_slot = rdev->desc_nr; + break; + } + if (raid_slot >= 0) { + cmsg.raid_slot = cpu_to_le32(raid_slot); + /* + * We can only change capiticy after all the nodes can do it, + * so need to wait after other nodes already received the msg + * and handled the change + */ + ret = __sendmsg(cinfo, &cmsg); + if (ret) { + pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", + __func__, __LINE__); + unlock_comm(cinfo); + return; + } + } else { + pr_err("md-cluster: No good device id found to send\n"); + unlock_comm(cinfo); + return; + } + + /* + * check the sync_size from other node's bitmap, if sync_size + * have already updated in other nodes as expected, send an + * empty metadata msg to permit the change of capacity + */ + if (cluster_check_sync_size(mddev) == 0) { + memset(&cmsg, 0, sizeof(cmsg)); + cmsg.type = cpu_to_le32(CHANGE_CAPACITY); + ret = __sendmsg(cinfo, &cmsg); + if (ret) + pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", + __func__, __LINE__); + set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + } else { + /* revert to previous sectors */ + ret = mddev->pers->resize(mddev, old_dev_sectors); + if (!ret) + revalidate_disk(mddev->gendisk); + ret = __sendmsg(cinfo, &cmsg); + if (ret) + pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", + __func__, __LINE__); + } + unlock_comm(cinfo); +} + static int resync_start(struct mddev *mddev) { struct md_cluster_info *cinfo = mddev->cluster_info; @@ -1396,6 +1471,7 @@ static struct md_cluster_operations cluster_ops = { .gather_bitmaps = gather_bitmaps, .lock_all_bitmaps = lock_all_bitmaps, .unlock_all_bitmaps = unlock_all_bitmaps, + .update_size = update_size, }; static int __init cluster_init(void) diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index e765499ba591..274016177983 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h @@ -27,6 +27,7 @@ struct md_cluster_operations { int (*gather_bitmaps)(struct md_rdev *rdev); int (*lock_all_bitmaps)(struct mddev *mddev); void (*unlock_all_bitmaps)(struct mddev *mddev); + void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors); }; #endif /* _MD_CLUSTER_H */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 83f325077dc8..72ef3f18ac9a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6493,10 +6493,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) struct md_rdev *rdev; int rv; int fit = (num_sectors == 0); - - /* cluster raid doesn't support update size */ - if (mddev_is_clustered(mddev)) - return -EINVAL; + sector_t old_dev_sectors = mddev->dev_sectors; if (mddev->pers->resize == NULL) return -EINVAL; @@ -6525,7 +6522,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) } rv = mddev->pers->resize(mddev, num_sectors); if (!rv) { - if (mddev->queue) { + if (mddev_is_clustered(mddev)) + md_cluster_ops->update_size(mddev, old_dev_sectors); + else if (mddev->queue) { set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); } @@ -8743,6 +8742,18 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) int role, ret; char b[BDEVNAME_SIZE]; + /* + * If size is changed in another node then we need to + * do resize as well. + */ + if (mddev->dev_sectors != le64_to_cpu(sb->size)) { + ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size)); + if (ret) + pr_info("md-cluster: resize failed\n"); + else + bitmap_update_sb(mddev->bitmap); + } + /* Check for change of roles in the active devices */ rdev_for_each(rdev2, mddev) { if (test_bit(Faulty, &rdev2->flags)) |