diff options
Diffstat (limited to 'drivers/md/md-cluster.c')
-rw-r--r-- | drivers/md/md-cluster.c | 234 |
1 files changed, 158 insertions, 76 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 0b2af6e74fc3..8dff19d5502e 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -33,13 +33,6 @@ struct dlm_lock_resource { int mode; }; -struct suspend_info { - int slot; - sector_t lo; - sector_t hi; - struct list_head list; -}; - struct resync_info { __le64 lo; __le64 hi; @@ -80,7 +73,13 @@ struct md_cluster_info { struct dlm_lock_resource **other_bitmap_lockres; struct dlm_lock_resource *resync_lockres; struct list_head suspend_list; + spinlock_t suspend_lock; + /* record the region which write should be suspended */ + sector_t suspend_lo; + sector_t suspend_hi; + int suspend_from; /* the slot which broadcast suspend_lo/hi */ + struct md_thread *recovery_thread; unsigned long recovery_map; /* communication loc resources */ @@ -105,6 +104,7 @@ enum msg_type { RE_ADD, BITMAP_NEEDS_SYNC, CHANGE_CAPACITY, + BITMAP_RESIZE, }; struct cluster_msg { @@ -270,25 +270,22 @@ static void add_resync_info(struct dlm_lock_resource *lockres, ri->hi = cpu_to_le64(hi); } -static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres) +static int read_resync_info(struct mddev *mddev, + struct dlm_lock_resource *lockres) { struct resync_info ri; - struct suspend_info *s = NULL; - sector_t hi = 0; + struct md_cluster_info *cinfo = mddev->cluster_info; + int ret = 0; dlm_lock_sync(lockres, DLM_LOCK_CR); memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info)); - hi = le64_to_cpu(ri.hi); - if (hi > 0) { - s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); - if (!s) - goto out; - s->hi = hi; - s->lo = le64_to_cpu(ri.lo); + if (le64_to_cpu(ri.hi) > 0) { + cinfo->suspend_hi = le64_to_cpu(ri.hi); + cinfo->suspend_lo = le64_to_cpu(ri.lo); + ret = 1; } dlm_unlock_sync(lockres); -out: - return s; + return ret; } static void recover_bitmaps(struct md_thread *thread) @@ -298,7 +295,6 @@ static void recover_bitmaps(struct md_thread *thread) struct dlm_lock_resource *bm_lockres; char str[64]; int slot, ret; - struct suspend_info *s, *tmp; sector_t lo, hi; while (cinfo->recovery_map) { @@ -325,13 +321,17 @@ static void recover_bitmaps(struct md_thread *thread) /* Clear suspend_area associated with the bitmap */ spin_lock_irq(&cinfo->suspend_lock); - list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) - if (slot == s->slot) { - list_del(&s->list); - kfree(s); - } + cinfo->suspend_hi = 0; + cinfo->suspend_lo = 0; + cinfo->suspend_from = -1; spin_unlock_irq(&cinfo->suspend_lock); + /* Kick off a reshape if needed */ + if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) && + test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + mddev->reshape_position != MaxSector) + md_wakeup_thread(mddev->sync_thread); + if (hi > 0) { if (lo < mddev->recovery_cp) mddev->recovery_cp = lo; @@ -434,34 +434,23 @@ static void ack_bast(void *arg, int mode) } } -static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot) -{ - struct suspend_info *s, *tmp; - - list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) - if (slot == s->slot) { - list_del(&s->list); - kfree(s); - break; - } -} - static void remove_suspend_info(struct mddev *mddev, int slot) { struct md_cluster_info *cinfo = mddev->cluster_info; mddev->pers->quiesce(mddev, 1); spin_lock_irq(&cinfo->suspend_lock); - __remove_suspend_info(cinfo, slot); + cinfo->suspend_hi = 0; + cinfo->suspend_lo = 0; spin_unlock_irq(&cinfo->suspend_lock); mddev->pers->quiesce(mddev, 0); } - static void process_suspend_info(struct mddev *mddev, int slot, sector_t lo, sector_t hi) { struct md_cluster_info *cinfo = mddev->cluster_info; - struct suspend_info *s; + struct mdp_superblock_1 *sb = NULL; + struct md_rdev *rdev; if (!hi) { /* @@ -475,6 +464,12 @@ static void process_suspend_info(struct mddev *mddev, return; } + rdev_for_each(rdev, mddev) + if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) { + sb = page_address(rdev->sb_page); + break; + } + /* * The bitmaps are not same for different nodes * if RESYNCING is happening in one node, then @@ -487,26 +482,26 @@ static void process_suspend_info(struct mddev *mddev, * sync_low/hi is used to record the region which * arrived in the previous RESYNCING message, * - * Call bitmap_sync_with_cluster to clear - * NEEDED_MASK and set RESYNC_MASK since - * resync thread is running in another node, - * so we don't need to do the resync again - * with the same section */ - md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, cinfo->sync_hi, lo, hi); + * Call md_bitmap_sync_with_cluster to clear NEEDED_MASK + * and set RESYNC_MASK since resync thread is running + * in another node, so we don't need to do the resync + * again with the same section. + * + * Skip md_bitmap_sync_with_cluster in case reshape + * happening, because reshaping region is small and + * we don't want to trigger lots of WARN. + */ + if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) + md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, + cinfo->sync_hi, lo, hi); cinfo->sync_low = lo; cinfo->sync_hi = hi; - s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); - if (!s) - return; - s->slot = slot; - s->lo = lo; - s->hi = hi; mddev->pers->quiesce(mddev, 1); spin_lock_irq(&cinfo->suspend_lock); - /* Remove existing entry (if exists) before adding */ - __remove_suspend_info(cinfo, slot); - list_add(&s->list, &cinfo->suspend_list); + cinfo->suspend_from = slot; + cinfo->suspend_lo = lo; + cinfo->suspend_hi = hi; spin_unlock_irq(&cinfo->suspend_lock); mddev->pers->quiesce(mddev, 0); } @@ -612,6 +607,11 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) case BITMAP_NEEDS_SYNC: __recover_slot(mddev, le32_to_cpu(msg->slot)); break; + case BITMAP_RESIZE: + if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0)) + ret = md_bitmap_resize(mddev->bitmap, + le64_to_cpu(msg->high), 0, 0); + break; default: ret = -1; pr_warn("%s:%d Received unknown message from %d\n", @@ -800,7 +800,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) struct md_cluster_info *cinfo = mddev->cluster_info; int i, ret = 0; struct dlm_lock_resource *bm_lockres; - struct suspend_info *s; char str[64]; sector_t lo, hi; @@ -819,16 +818,13 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) bm_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (ret == -EAGAIN) { - s = read_resync_info(mddev, bm_lockres); - if (s) { + if (read_resync_info(mddev, bm_lockres)) { pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n", __func__, __LINE__, - (unsigned long long) s->lo, - (unsigned long long) s->hi, i); - spin_lock_irq(&cinfo->suspend_lock); - s->slot = i; - list_add(&s->list, &cinfo->suspend_list); - spin_unlock_irq(&cinfo->suspend_lock); + (unsigned long long) cinfo->suspend_lo, + (unsigned long long) cinfo->suspend_hi, + i); + cinfo->suspend_from = i; } ret = 0; lockres_free(bm_lockres); @@ -1001,10 +997,17 @@ static int leave(struct mddev *mddev) if (!cinfo) return 0; - /* BITMAP_NEEDS_SYNC message should be sent when node + /* + * BITMAP_NEEDS_SYNC message should be sent when node * is leaving the cluster with dirty bitmap, also we - * can only deliver it when dlm connection is available */ - if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) + * can only deliver it when dlm connection is available. + * + * Also, we should send BITMAP_NEEDS_SYNC message in + * case reshaping is interrupted. + */ + if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) || + (mddev->reshape_position != MaxSector && + test_bit(MD_CLOSING, &mddev->flags))) resync_bitmap(mddev); set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); @@ -1102,6 +1105,80 @@ static void metadata_update_cancel(struct mddev *mddev) unlock_comm(cinfo); } +static int update_bitmap_size(struct mddev *mddev, sector_t size) +{ + struct md_cluster_info *cinfo = mddev->cluster_info; + struct cluster_msg cmsg = {0}; + int ret; + + cmsg.type = cpu_to_le32(BITMAP_RESIZE); + cmsg.high = cpu_to_le64(size); + ret = sendmsg(cinfo, &cmsg, 0); + if (ret) + pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n", + __func__, __LINE__, ret); + return ret; +} + +static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize) +{ + struct bitmap_counts *counts; + char str[64]; + struct dlm_lock_resource *bm_lockres; + struct bitmap *bitmap = mddev->bitmap; + unsigned long my_pages = bitmap->counts.pages; + int i, rv; + + /* + * We need to ensure all the nodes can grow to a larger + * bitmap size before make the reshaping. + */ + rv = update_bitmap_size(mddev, newsize); + if (rv) + return rv; + + for (i = 0; i < mddev->bitmap_info.nodes; i++) { + if (i == md_cluster_ops->slot_number(mddev)) + continue; + + bitmap = get_bitmap_from_slot(mddev, i); + if (IS_ERR(bitmap)) { + pr_err("can't get bitmap from slot %d\n", i); + goto out; + } + counts = &bitmap->counts; + + /* + * If we can hold the bitmap lock of one node then + * the slot is not occupied, update the pages. + */ + snprintf(str, 64, "bitmap%04d", i); + bm_lockres = lockres_init(mddev, str, NULL, 1); + if (!bm_lockres) { + pr_err("Cannot initialize %s lock\n", str); + goto out; + } + bm_lockres->flags |= DLM_LKF_NOQUEUE; + rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); + if (!rv) + counts->pages = my_pages; + lockres_free(bm_lockres); + + if (my_pages != counts->pages) + /* + * Let's revert the bitmap size if one node + * can't resize bitmap + */ + goto out; + } + + return 0; +out: + md_bitmap_free(bitmap); + update_bitmap_size(mddev, oldsize); + return -1; +} + /* * return 0 if all the bitmaps have the same sync_size */ @@ -1243,6 +1320,16 @@ static int resync_start(struct mddev *mddev) return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev); } +static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi) +{ + struct md_cluster_info *cinfo = mddev->cluster_info; + + spin_lock_irq(&cinfo->suspend_lock); + *lo = cinfo->suspend_lo; + *hi = cinfo->suspend_hi; + spin_unlock_irq(&cinfo->suspend_lock); +} + static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) { struct md_cluster_info *cinfo = mddev->cluster_info; @@ -1295,21 +1382,14 @@ static int area_resyncing(struct mddev *mddev, int direction, { struct md_cluster_info *cinfo = mddev->cluster_info; int ret = 0; - struct suspend_info *s; if ((direction == READ) && test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state)) return 1; spin_lock_irq(&cinfo->suspend_lock); - if (list_empty(&cinfo->suspend_list)) - goto out; - list_for_each_entry(s, &cinfo->suspend_list, list) - if (hi > s->lo && lo < s->hi) { - ret = 1; - break; - } -out: + if (hi > cinfo->suspend_lo && lo < cinfo->suspend_hi) + ret = 1; spin_unlock_irq(&cinfo->suspend_lock); return ret; } @@ -1482,6 +1562,7 @@ static struct md_cluster_operations cluster_ops = { .resync_start = resync_start, .resync_finish = resync_finish, .resync_info_update = resync_info_update, + .resync_info_get = resync_info_get, .metadata_update_start = metadata_update_start, .metadata_update_finish = metadata_update_finish, .metadata_update_cancel = metadata_update_cancel, @@ -1492,6 +1573,7 @@ static struct md_cluster_operations cluster_ops = { .remove_disk = remove_disk, .load_bitmaps = load_bitmaps, .gather_bitmaps = gather_bitmaps, + .resize_bitmaps = resize_bitmaps, .lock_all_bitmaps = lock_all_bitmaps, .unlock_all_bitmaps = unlock_all_bitmaps, .update_size = update_size, |