diff options
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 158 |
1 files changed, 139 insertions, 19 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 93d1f9b469d4..be2d2da9cdba 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -684,6 +684,9 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) rv = SS_NO_REMOTE_DISK; + else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + else if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S || ns.conn == C_SYNC_SOURCE || @@ -840,7 +843,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state break; case C_WF_BITMAP_S: case C_PAUSED_SYNC_S: - ns.pdsk = D_OUTDATED; + /* remap any consistent state to D_OUTDATED, + * but disallow "upgrade" of not even consistent states. + */ + ns.pdsk = + (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED) + ? os.pdsk : D_OUTDATED; break; case C_SYNC_SOURCE: ns.pdsk = D_INCONSISTENT; @@ -1205,21 +1213,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, && (ns.pdsk < D_INCONSISTENT || ns.pdsk == D_UNKNOWN || ns.pdsk == D_OUTDATED)) { - kfree(mdev->p_uuid); - mdev->p_uuid = NULL; if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); - } + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && + !atomic_read(&mdev->new_c_uuid)) + atomic_set(&mdev->new_c_uuid, 2); put_ldev(mdev); } } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) - drbd_uuid_new_current(mdev); + /* Diskless peer becomes primary or got connected do diskless, primary peer. */ + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && + !atomic_read(&mdev->new_c_uuid)) + atomic_set(&mdev->new_c_uuid, 2); /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) @@ -1232,7 +1239,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ - drbd_send_sizes(mdev, 0); /* to start sync... */ + drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); drbd_send_state(mdev); } @@ -1343,6 +1350,24 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_md_sync(mdev); } +static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (get_ldev(mdev)) { + if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { + drbd_uuid_new_current(mdev); + if (get_net_conf(mdev)) { + drbd_send_uuids(mdev); + put_net_conf(mdev); + } + drbd_md_sync(mdev); + } + put_ldev(mdev); + } + atomic_dec(&mdev->new_c_uuid); + wake_up(&mdev->misc_wait); + + return 1; +} static int drbd_thread_setup(void *arg) { @@ -1755,7 +1780,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) (struct p_header *)&p, sizeof(p)); } -int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) +int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) { struct p_sizes p; sector_t d_size, u_size; @@ -1767,7 +1792,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) d_size = drbd_get_max_capacity(mdev->ldev); u_size = mdev->ldev->dc.disk_size; q_order_type = drbd_queue_order_type(mdev); - p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); put_ldev(mdev); } else { d_size = 0; @@ -1779,7 +1803,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) p.u_size = cpu_to_be64(u_size); p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); - p.queue_order_type = cpu_to_be32(q_order_type); + p.queue_order_type = cpu_to_be16(q_order_type); + p.dds_flags = cpu_to_be16(flags); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, (struct p_header *)&p, sizeof(p)); @@ -2180,6 +2205,43 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) return ok; } +static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds) +{ + struct p_delay_probe dp; + int offset, ok = 0; + struct timeval now; + + mutex_lock(&ds->mutex); + if (likely(ds->socket)) { + do_gettimeofday(&now); + offset = now.tv_usec - mdev->dps_time.tv_usec + + (now.tv_sec - mdev->dps_time.tv_sec) * 1000000; + dp.seq_num = cpu_to_be32(mdev->delay_seq); + dp.offset = cpu_to_be32(offset); + + ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE, + (struct p_header *)&dp, sizeof(dp), 0); + } + mutex_unlock(&ds->mutex); + + return ok; +} + +static int drbd_send_delay_probes(struct drbd_conf *mdev) +{ + int ok; + + mdev->delay_seq++; + do_gettimeofday(&mdev->dps_time); + ok = drbd_send_delay_probe(mdev, &mdev->meta); + ok = ok && drbd_send_delay_probe(mdev, &mdev->data); + + mdev->dp_volume_last = mdev->send_cnt; + mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10); + + return ok; +} + /* called on sndtimeo * returns FALSE if we should retry, * TRUE if we think connection is dead @@ -2309,6 +2371,44 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) return 1; } +static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +{ + struct page *page = e->pages; + unsigned len = e->size; + page_chain_for_each(page) { + unsigned l = min_t(unsigned, len, PAGE_SIZE); + if (!_drbd_send_page(mdev, page, 0, l)) + return 0; + len -= l; + } + return 1; +} + +static void consider_delay_probes(struct drbd_conf *mdev) +{ + if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93) + return; + + if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt) + drbd_send_delay_probes(mdev); +} + +static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (!cancel && mdev->state.conn == C_SYNC_SOURCE) + drbd_send_delay_probes(mdev); + + return 1; +} + +static void delay_probe_timer_fn(unsigned long data) +{ + struct drbd_conf *mdev = (struct drbd_conf *) data; + + if (list_empty(&mdev->delay_probe_work.list)) + drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work); +} + /* Used to send write requests * R_PRIMARY -> Peer (P_DATA) */ @@ -2360,7 +2460,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); if (ok && dgs) { dgb = mdev->int_dig_out; - drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); + drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); } if (ok) { @@ -2371,6 +2471,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } drbd_put_data_sock(mdev); + + if (ok) + consider_delay_probes(mdev); + return ok; } @@ -2409,13 +2513,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, sizeof(p), MSG_MORE); if (ok && dgs) { dgb = mdev->int_dig_out; - drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb); + drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); } if (ok) - ok = _drbd_send_zc_bio(mdev, e->private_bio); + ok = _drbd_send_zc_ee(mdev, e); drbd_put_data_sock(mdev); + + if (ok) + consider_delay_probes(mdev); + return ok; } @@ -2600,6 +2708,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->net_cnt, 0); atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); + atomic_set(&mdev->new_c_uuid, 0); mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->data.mutex); @@ -2628,16 +2737,26 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->unplug_work.list); INIT_LIST_HEAD(&mdev->md_sync_work.list); INIT_LIST_HEAD(&mdev->bm_io_work.w.list); + INIT_LIST_HEAD(&mdev->delay_probes); + INIT_LIST_HEAD(&mdev->delay_probe_work.list); + INIT_LIST_HEAD(&mdev->uuid_work.list); + mdev->resync_work.cb = w_resync_inactive; mdev->unplug_work.cb = w_send_write_hint; mdev->md_sync_work.cb = w_md_sync; mdev->bm_io_work.w.cb = w_bitmap_io; + mdev->delay_probe_work.cb = w_delay_probes; + mdev->uuid_work.cb = w_new_current_uuid; init_timer(&mdev->resync_timer); init_timer(&mdev->md_sync_timer); + init_timer(&mdev->delay_probe_timer); mdev->resync_timer.function = resync_timer_fn; mdev->resync_timer.data = (unsigned long) mdev; mdev->md_sync_timer.function = md_sync_timer_fn; mdev->md_sync_timer.data = (unsigned long) mdev; + mdev->delay_probe_timer.function = delay_probe_timer_fn; + mdev->delay_probe_timer.data = (unsigned long) mdev; + init_waitqueue_head(&mdev->misc_wait); init_waitqueue_head(&mdev->state_wait); @@ -2680,7 +2799,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) drbd_set_my_capacity(mdev, 0); if (mdev->bitmap) { /* maybe never allocated. */ - drbd_bm_resize(mdev, 0); + drbd_bm_resize(mdev, 0, 1); drbd_bm_cleanup(mdev); } @@ -3129,7 +3248,7 @@ int __init drbd_init(void) if (err) goto Enomem; - drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops); + drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); goto Enomem; @@ -3660,7 +3779,8 @@ _drbd_fault_str(unsigned int type) { [DRBD_FAULT_DT_RD] = "Data read", [DRBD_FAULT_DT_RA] = "Data read ahead", [DRBD_FAULT_BM_ALLOC] = "BM allocation", - [DRBD_FAULT_AL_EE] = "EE allocation" + [DRBD_FAULT_AL_EE] = "EE allocation", + [DRBD_FAULT_RECEIVE] = "receive data corruption", }; return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; |