1 files changed, 444 insertions, 269 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 29cd0dc9fe4f..5b525c179f39 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
 #include <linux/moduleparam.h>
@@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644);
 #endif
 
 /* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
 unsigned int cn_idx = CN_IDX_DRBD;
@@ -335,6 +336,7 @@ bail:
 	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
 }
 
+
 /**
  * _tl_restart() - Walks the transfer log, and applies an action to all requests
  * @mdev:	DRBD device.
@@ -456,7 +458,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
 }
 
 /**
- * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
  * @mdev:	DRBD device.
  * @os:		old (current) state.
  * @ns:		new (wanted) state.
@@ -473,12 +475,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,
 		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
 }
 
-int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-		      union drbd_state mask, union drbd_state val)
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+		  union drbd_state mask, union drbd_state val)
 {
 	unsigned long flags;
 	union drbd_state os, ns;
-	int rv;
+	enum drbd_state_rv rv;
 
 	spin_lock_irqsave(&mdev->req_lock, flags);
 	os = mdev->state;
@@ -502,20 +505,22 @@ void drbd_force_state(struct drbd_conf *mdev,
 	drbd_change_state(mdev, CS_HARD, mask, val);
 }
 
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
-static int is_valid_state_transition(struct drbd_conf *,
-				     union drbd_state, union drbd_state);
+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
+						    union drbd_state,
+						    union drbd_state);
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
 				       union drbd_state ns, const char **warn_sync_abort);
 int drbd_send_state_req(struct drbd_conf *,
 			union drbd_state, union drbd_state);
 
-static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
-				    union drbd_state mask, union drbd_state val)
+static enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+	     union drbd_state val)
 {
 	union drbd_state os, ns;
 	unsigned long flags;
-	int rv;
+	enum drbd_state_rv rv;
 
 	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
 		return SS_CW_SUCCESS;
@@ -536,7 +541,7 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
 		if (rv == SS_SUCCESS) {
 			rv = is_valid_state_transition(mdev, ns, os);
 			if (rv == SS_SUCCESS)
-				rv = 0; /* cont waiting, otherwise fail. */
+				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
 		}
 	}
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
@@ -554,14 +559,14 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
  * Should not be called directly, use drbd_request_state() or
  * _drbd_request_state().
  */
-static int drbd_req_state(struct drbd_conf *mdev,
-			  union drbd_state mask, union drbd_state val,
-			  enum chg_state_flags f)
+static enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+	       union drbd_state val, enum chg_state_flags f)
 {
 	struct completion done;
 	unsigned long flags;
 	union drbd_state os, ns;
-	int rv;
+	enum drbd_state_rv rv;
 
 	init_completion(&done);
 
@@ -636,10 +641,11 @@ abort:
  * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
  * flag, or when logging of failed state change requests is not desired.
  */
-int _drbd_request_state(struct drbd_conf *mdev,	union drbd_state mask,
-			union drbd_state val,	enum chg_state_flags f)
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+		    union drbd_state val, enum chg_state_flags f)
 {
-	int rv;
+	enum drbd_state_rv rv;
 
 	wait_event(mdev->state_wait,
 		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
@@ -663,8 +669,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
 	    );
 }
 
-void print_st_err(struct drbd_conf *mdev,
-	union drbd_state os, union drbd_state ns, int err)
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+	          union drbd_state ns, enum drbd_state_rv err)
 {
 	if (err == SS_IN_TRANSIENT_STATE)
 		return;
@@ -674,32 +680,18 @@ void print_st_err(struct drbd_conf *mdev,
 }
 
 
-#define drbd_peer_str drbd_role_str
-#define drbd_pdsk_str drbd_disk_str
-
-#define drbd_susp_str(A)     ((A) ? "1" : "0")
-#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
-#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
-#define drbd_user_isp_str(A) ((A) ? "1" : "0")
-
-#define PSC(A) \
-	({ if (ns.A != os.A) { \
-		pbp += sprintf(pbp, #A "( %s -> %s ) ", \
-			      drbd_##A##_str(os.A), \
-			      drbd_##A##_str(ns.A)); \
-	} })
-
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:	DRBD device.
  * @ns:		State to consider.
  */
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+static enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 {
 	/* See drbd_state_sw_errors in drbd_strings.c */
 
 	enum drbd_fencing_p fp;
-	int rv = SS_SUCCESS;
+	enum drbd_state_rv rv = SS_SUCCESS;
 
 	fp = FP_DONT_CARE;
 	if (get_ldev(mdev)) {
@@ -762,10 +754,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
  * @ns:		new state.
  * @os:		old state.
  */
-static int is_valid_state_transition(struct drbd_conf *mdev,
-				     union drbd_state ns, union drbd_state os)
+static enum drbd_state_rv
+is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
+			  union drbd_state os)
 {
-	int rv = SS_SUCCESS;
+	enum drbd_state_rv rv = SS_SUCCESS;
 
 	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
 	    os.conn > C_CONNECTED)
@@ -800,6 +793,10 @@ static int is_valid_state_transition(struct drbd_conf *mdev,
 	    os.conn < C_CONNECTED)
 		rv = SS_NEED_CONNECTION;
 
+	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+	    && os.conn < C_WF_REPORT_PARAMS)
+		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
 	return rv;
 }
 
@@ -817,6 +814,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 				       union drbd_state ns, const char **warn_sync_abort)
 {
 	enum drbd_fencing_p fp;
+	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
 
 	fp = FP_DONT_CARE;
 	if (get_ldev(mdev)) {
@@ -869,56 +867,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 		ns.conn = C_CONNECTED;
 	}
 
-	if (ns.conn >= C_CONNECTED &&
-	    ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
-	     (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
-		switch (ns.conn) {
-		case C_WF_BITMAP_T:
-		case C_PAUSED_SYNC_T:
-			ns.disk = D_OUTDATED;
-			break;
-		case C_CONNECTED:
-		case C_WF_BITMAP_S:
-		case C_SYNC_SOURCE:
-		case C_PAUSED_SYNC_S:
-			ns.disk = D_UP_TO_DATE;
-			break;
-		case C_SYNC_TARGET:
-			ns.disk = D_INCONSISTENT;
-			dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
-			break;
-		}
-		if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
-			dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
-	}
-
-	if (ns.conn >= C_CONNECTED &&
-	    (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
-		switch (ns.conn) {
-		case C_CONNECTED:
-		case C_WF_BITMAP_T:
-		case C_PAUSED_SYNC_T:
-		case C_SYNC_TARGET:
-			ns.pdsk = D_UP_TO_DATE;
-			break;
-		case C_WF_BITMAP_S:
-		case C_PAUSED_SYNC_S:
-			/* remap any consistent state to D_OUTDATED,
-			 * but disallow "upgrade" of not even consistent states.
-			 */
-			ns.pdsk =
-				(D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
-				? os.pdsk : D_OUTDATED;
-			break;
-		case C_SYNC_SOURCE:
-			ns.pdsk = D_INCONSISTENT;
-			dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
-			break;
-		}
-		if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
-			dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
-	}
-
 	/* Connection breaks down before we finished "Negotiating" */
 	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
 	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -933,6 +881,94 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 		put_ldev(mdev);
 	}
 
+	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+			ns.disk = D_UP_TO_DATE;
+		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+			ns.pdsk = D_UP_TO_DATE;
+	}
+
+	/* Implications of the connection stat on the disk states */
+	disk_min = D_DISKLESS;
+	disk_max = D_UP_TO_DATE;
+	pdsk_min = D_INCONSISTENT;
+	pdsk_max = D_UNKNOWN;
+	switch ((enum drbd_conns)ns.conn) {
+	case C_WF_BITMAP_T:
+	case C_PAUSED_SYNC_T:
+	case C_STARTING_SYNC_T:
+	case C_WF_SYNC_UUID:
+	case C_BEHIND:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_OUTDATED;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_CONNECTED:
+		disk_min = D_DISKLESS;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_DISKLESS;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_WF_BITMAP_S:
+	case C_PAUSED_SYNC_S:
+	case C_STARTING_SYNC_S:
+	case C_AHEAD:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+		break;
+	case C_SYNC_TARGET:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_INCONSISTENT;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_SYNC_SOURCE:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_INCONSISTENT;
+		break;
+	case C_STANDALONE:
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_CONNECTION:
+	case C_WF_REPORT_PARAMS:
+	case C_MASK:
+		break;
+	}
+	if (ns.disk > disk_max)
+		ns.disk = disk_max;
+
+	if (ns.disk < disk_min) {
+		dev_warn(DEV, "Implicitly set disk from %s to %s\n",
+			 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+		ns.disk = disk_min;
+	}
+	if (ns.pdsk > pdsk_max)
+		ns.pdsk = pdsk_max;
+
+	if (ns.pdsk < pdsk_min) {
+		dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
+			 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+		ns.pdsk = pdsk_min;
+	}
+
 	if (fp == FP_STONITH &&
 	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
 	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
@@ -961,6 +997,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 /* helper for __drbd_set_state */
 static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
 {
+	if (mdev->agreed_pro_version < 90)
+		mdev->ov_start_sector = 0;
+	mdev->rs_total = drbd_bm_bits(mdev);
+	mdev->ov_position = 0;
 	if (cs == C_VERIFY_T) {
 		/* starting online verify from an arbitrary position
 		 * does not fit well into the existing protocol.
@@ -970,11 +1010,15 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
 		mdev->ov_start_sector = ~(sector_t)0;
 	} else {
 		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
-		if (bit >= mdev->rs_total)
+		if (bit >= mdev->rs_total) {
 			mdev->ov_start_sector =
 				BM_BIT_TO_SECT(mdev->rs_total - 1);
+			mdev->rs_total = 1;
+		} else
+			mdev->rs_total -= bit;
 		mdev->ov_position = mdev->ov_start_sector;
 	}
+	mdev->ov_left = mdev->rs_total;
 }
 
 static void drbd_resume_al(struct drbd_conf *mdev)
@@ -992,12 +1036,12 @@ static void drbd_resume_al(struct drbd_conf *mdev)
  *
  * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
  */
-int __drbd_set_state(struct drbd_conf *mdev,
-		    union drbd_state ns, enum chg_state_flags flags,
-		    struct completion *done)
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+	         enum chg_state_flags flags, struct completion *done)
 {
 	union drbd_state os;
-	int rv = SS_SUCCESS;
+	enum drbd_state_rv rv = SS_SUCCESS;
 	const char *warn_sync_abort = NULL;
 	struct after_state_chg_work *ascw;
 
@@ -1033,22 +1077,46 @@ int __drbd_set_state(struct drbd_conf *mdev,
 		dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
 
 	{
-		char *pbp, pb[300];
-		pbp = pb;
-		*pbp = 0;
-		PSC(role);
-		PSC(peer);
-		PSC(conn);
-		PSC(disk);
-		PSC(pdsk);
-		if (is_susp(ns) != is_susp(os))
-			pbp += sprintf(pbp, "susp( %s -> %s ) ",
-				       drbd_susp_str(is_susp(os)),
-				       drbd_susp_str(is_susp(ns)));
-		PSC(aftr_isp);
-		PSC(peer_isp);
-		PSC(user_isp);
-		dev_info(DEV, "%s\n", pb);
+	char *pbp, pb[300];
+	pbp = pb;
+	*pbp = 0;
+	if (ns.role != os.role)
+		pbp += sprintf(pbp, "role( %s -> %s ) ",
+			       drbd_role_str(os.role),
+			       drbd_role_str(ns.role));
+	if (ns.peer != os.peer)
+		pbp += sprintf(pbp, "peer( %s -> %s ) ",
+			       drbd_role_str(os.peer),
+			       drbd_role_str(ns.peer));
+	if (ns.conn != os.conn)
+		pbp += sprintf(pbp, "conn( %s -> %s ) ",
+			       drbd_conn_str(os.conn),
+			       drbd_conn_str(ns.conn));
+	if (ns.disk != os.disk)
+		pbp += sprintf(pbp, "disk( %s -> %s ) ",
+			       drbd_disk_str(os.disk),
+			       drbd_disk_str(ns.disk));
+	if (ns.pdsk != os.pdsk)
+		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+			       drbd_disk_str(os.pdsk),
+			       drbd_disk_str(ns.pdsk));
+	if (is_susp(ns) != is_susp(os))
+		pbp += sprintf(pbp, "susp( %d -> %d ) ",
+			       is_susp(os),
+			       is_susp(ns));
+	if (ns.aftr_isp != os.aftr_isp)
+		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+			       os.aftr_isp,
+			       ns.aftr_isp);
+	if (ns.peer_isp != os.peer_isp)
+		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+			       os.peer_isp,
+			       ns.peer_isp);
+	if (ns.user_isp != os.user_isp)
+		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+			       os.user_isp,
+			       ns.user_isp);
+	dev_info(DEV, "%s\n", pb);
 	}
 
 	/* solve the race between becoming unconfigured,
@@ -1074,6 +1142,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
 		atomic_inc(&mdev->local_cnt);
 
 	mdev->state = ns;
+
+	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+		drbd_print_uuids(mdev, "attached to UUIDs");
+
 	wake_up(&mdev->misc_wait);
 	wake_up(&mdev->state_wait);
 
@@ -1081,7 +1153,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
 	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
 	    ns.conn < C_CONNECTED) {
 		mdev->ov_start_sector =
-			BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
+			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
 		dev_info(DEV, "Online Verify reached sector %llu\n",
 			(unsigned long long)mdev->ov_start_sector);
 	}
@@ -1106,14 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
 		unsigned long now = jiffies;
 		int i;
 
-		mdev->ov_position = 0;
-		mdev->rs_total = drbd_bm_bits(mdev);
-		if (mdev->agreed_pro_version >= 90)
-			set_ov_position(mdev, ns.conn);
-		else
-			mdev->ov_start_sector = 0;
-		mdev->ov_left = mdev->rs_total
-			      - BM_SECT_TO_BIT(mdev->ov_position);
+		set_ov_position(mdev, ns.conn);
 		mdev->rs_start = now;
 		mdev->rs_last_events = 0;
 		mdev->rs_last_sect_ev = 0;
@@ -1121,10 +1186,12 @@ int __drbd_set_state(struct drbd_conf *mdev,
 		mdev->ov_last_oos_start = 0;
 
 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
-			mdev->rs_mark_left[i] = mdev->rs_total;
+			mdev->rs_mark_left[i] = mdev->ov_left;
 			mdev->rs_mark_time[i] = now;
 		}
 
+		drbd_rs_controller_reset(mdev);
+
 		if (ns.conn == C_VERIFY_S) {
 			dev_info(DEV, "Starting Online Verify from sector %llu\n",
 					(unsigned long long)mdev->ov_position);
@@ -1228,6 +1295,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
 	}
 }
 
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
+{
+	int rv;
+
+	D_ASSERT(current == mdev->worker.task);
+
+	/* open coded non-blocking drbd_suspend_io(mdev); */
+	set_bit(SUSPEND_IO, &mdev->flags);
+
+	drbd_bm_lock(mdev, why, flags);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
 /**
  * after_state_ch() - Perform after state change actions that may sleep
  * @mdev:	DRBD device.
@@ -1266,16 +1353,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
 	nsm.i = -1;
 	if (ns.susp_nod) {
-		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
-			if (ns.conn == C_CONNECTED)
-				what = resend, nsm.susp_nod = 0;
-			else /* ns.conn > C_CONNECTED */
-				dev_err(DEV, "Unexpected Resynd going on!\n");
-		}
+		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+			what = resend;
 
 		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
-			what = restart_frozen_disk_io, nsm.susp_nod = 0;
+			what = restart_frozen_disk_io;
 
+		if (what != nothing)
+			nsm.susp_nod = 0;
 	}
 
 	if (ns.susp_fen) {
@@ -1306,13 +1391,30 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 		spin_unlock_irq(&mdev->req_lock);
 	}
 
+	/* Became sync source.  With protocol >= 96, we still need to send out
+	 * the sync uuid now. Need to do that before any drbd_send_state, or
+	 * the other side may go "paused sync" before receiving the sync uuids,
+	 * which is unexpected. */
+	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+	    mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+		drbd_gen_and_send_sync_uuid(mdev);
+		put_ldev(mdev);
+	}
+
 	/* Do not change the order of the if above and the two below... */
 	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
 		drbd_send_uuids(mdev);
 		drbd_send_state(mdev);
 	}
-	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
-		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+	/* No point in queuing send_bitmap if we don't have a connection
+	 * anymore, so check also the _current_ state, not only the new state
+	 * at the time this work was queued. */
+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+	    mdev->state.conn == C_WF_BITMAP_S)
+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+				"send_bitmap (WFBitMapS)",
+				BM_LOCKED_TEST_ALLOWED);
 
 	/* Lost contact to peer's copy of the data */
 	if ((os.pdsk >= D_INCONSISTENT &&
@@ -1343,7 +1445,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
 		/* D_DISKLESS Peer becomes secondary */
 		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
-			drbd_al_to_on_disk_bm(mdev);
+			/* We may still be Primary ourselves.
+			 * No harm done if the bitmap still changes,
+			 * redirtied pages will follow later. */
+			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Write out all changed bits on demote.
+	 * Though, no need to da that just yet
+	 * if there is a resync going on still */
+	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+		/* No changes to the bitmap expected this time, so assert that,
+		 * even though no harm was done if it did change. */
+		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote", BM_LOCKED_TEST_ALLOWED);
 		put_ldev(mdev);
 	}
 
@@ -1371,15 +1489,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
 		drbd_send_state(mdev);
 
+	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+		drbd_send_state(mdev);
+
 	/* We are in the progress to start a full sync... */
 	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
 	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+		/* no other bitmap changes expected during this phase */
+		drbd_queue_bitmap_io(mdev,
+			&drbd_bmio_set_n_write, &abw_start_sync,
+			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
 
 	/* We are invalidating our self... */
 	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
 	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+		/* other bitmap operation expected during this phase */
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+			"set_n_write from invalidate", BM_LOCKED_MASK);
 
 	/* first half of local IO error, failure to attach,
 	 * or administrative detach */
@@ -1434,10 +1560,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
 		if (drbd_send_state(mdev))
 			dev_warn(DEV, "Notified peer that I'm now diskless.\n");
-		else
-			dev_err(DEV, "Sending state for being diskless failed\n");
 		/* corresponding get_ldev in __drbd_set_state
-		 * this may finaly trigger drbd_ldev_destroy. */
+		 * this may finally trigger drbd_ldev_destroy. */
 		put_ldev(mdev);
 	}
 
@@ -1459,6 +1583,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
 		drbd_send_state(mdev);
 
+	/* This triggers bitmap writeout of potentially still unwritten pages
+	 * if the resync finished cleanly, or aborted because of peer disk
+	 * failure, or because of connection loss.
+	 * For resync aborted because of local disk failure, we cannot do
+	 * any bitmap writeout anymore.
+	 * No harm done if some bits change during this phase.
+	 */
+	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+			"write from resync_finished", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
 	/* free tl_hash if we Got thawed and are C_STANDALONE */
 	if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
 		drbd_free_tl_hash(mdev);
@@ -1559,7 +1696,7 @@ int drbd_thread_start(struct drbd_thread *thi)
 		if (!try_module_get(THIS_MODULE)) {
 			dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
 			spin_unlock_irqrestore(&thi->t_lock, flags);
-			return FALSE;
+			return false;
 		}
 
 		init_completion(&thi->stop);
@@ -1576,7 +1713,7 @@ int drbd_thread_start(struct drbd_thread *thi)
 			dev_err(DEV, "Couldn't start thread\n");
 
 			module_put(THIS_MODULE);
-			return FALSE;
+			return false;
 		}
 		spin_lock_irqsave(&thi->t_lock, flags);
 		thi->task = nt;
@@ -1596,7 +1733,7 @@ int drbd_thread_start(struct drbd_thread *thi)
 		break;
 	}
 
-	return TRUE;
+	return true;
 }
 
 
@@ -1694,8 +1831,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 {
 	int sent, ok;
 
-	ERR_IF(!h) return FALSE;
-	ERR_IF(!size) return FALSE;
+	ERR_IF(!h) return false;
+	ERR_IF(!size) return false;
 
 	h->magic   = BE_DRBD_MAGIC;
 	h->command = cpu_to_be16(cmd);
@@ -1704,8 +1841,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 	sent = drbd_send(mdev, sock, h, size, msg_flags);
 
 	ok = (sent == size);
-	if (!ok)
-		dev_err(DEV, "short sent %s size=%d sent=%d\n",
+	if (!ok && !signal_pending(current))
+		dev_warn(DEV, "short sent %s size=%d sent=%d\n",
 		    cmdname(cmd), (int)size, sent);
 	return ok;
 }
@@ -1840,7 +1977,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
 		else {
 			dev_err(DEV, "--dry-run is not supported by peer");
 			kfree(p);
-			return 0;
+			return -1;
 		}
 	}
 	p->conn_flags    = cpu_to_be32(cf);
@@ -1888,12 +2025,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
 	return _drbd_send_uuids(mdev, 8);
 }
 
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		u64 *uuid = mdev->ldev->md.uuid;
+		dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+		     text,
+		     (unsigned long long)uuid[UI_CURRENT],
+		     (unsigned long long)uuid[UI_BITMAP],
+		     (unsigned long long)uuid[UI_HISTORY_START],
+		     (unsigned long long)uuid[UI_HISTORY_END]);
+		put_ldev(mdev);
+	} else {
+		dev_info(DEV, "%s effective data uuid: %016llX\n",
+				text,
+				(unsigned long long)mdev->ed_uuid);
+	}
+}
 
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 {
 	struct p_rs_uuid p;
+	u64 uuid;
+
+	D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
 
-	p.uuid = cpu_to_be64(val);
+	uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+	drbd_uuid_set(mdev, UI_BITMAP, uuid);
+	drbd_print_uuids(mdev, "updated sync UUID");
+	drbd_md_sync(mdev);
+	p.uuid = cpu_to_be64(uuid);
 
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
 			     (struct p_header80 *)&p, sizeof(p));
@@ -1921,7 +2082,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
 	p.d_size = cpu_to_be64(d_size);
 	p.u_size = cpu_to_be64(u_size);
 	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
-	p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
+	p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
 	p.queue_order_type = cpu_to_be16(q_order_type);
 	p.dds_flags = cpu_to_be16(flags);
 
@@ -1972,7 +2133,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
 			     (struct p_header80 *)&p, sizeof(p));
 }
 
-int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
 {
 	struct p_req_state_reply p;
 
@@ -2076,9 +2237,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
 	return len;
 }
 
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-	struct p_header80 *h, struct bm_xfer_ctx *c)
+			 struct p_header80 *h, struct bm_xfer_ctx *c)
 {
 	struct p_compressed_bm *p = (void*)h;
 	unsigned long num_words;
@@ -2088,7 +2255,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
 	len = fill_bitmap_rle_bits(mdev, p, c);
 
 	if (len < 0)
-		return FAILED;
+		return -EIO;
 
 	if (len) {
 		DCBP_set_code(p, RLE_VLI_Bits);
@@ -2118,11 +2285,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
 		if (c->bit_offset > c->bm_bits)
 			c->bit_offset = c->bm_bits;
 	}
-	ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
-	if (ok == DONE)
-		INFO_bm_xfer_stats(mdev, "send", c);
-	return ok;
+	if (ok) {
+		if (len == 0) {
+			INFO_bm_xfer_stats(mdev, "send", c);
+			return 0;
+		} else
+			return 1;
+	}
+	return -EIO;
 }
 
 /* See the comment at receive_bitmap() */
@@ -2130,16 +2300,16 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
 {
 	struct bm_xfer_ctx c;
 	struct p_header80 *p;
-	int ret;
+	int err;
 
-	ERR_IF(!mdev->bitmap) return FALSE;
+	ERR_IF(!mdev->bitmap) return false;
 
 	/* maybe we should use some per thread scratch page,
 	 * and allocate that during initial device creation? */
 	p = (struct p_header80 *) __get_free_page(GFP_NOIO);
 	if (!p) {
 		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
-		return FALSE;
+		return false;
 	}
 
 	if (get_ldev(mdev)) {
@@ -2165,11 +2335,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
 	};
 
 	do {
-		ret = send_bitmap_rle_or_plain(mdev, p, &c);
-	} while (ret == OK);
+		err = send_bitmap_rle_or_plain(mdev, p, &c);
+	} while (err > 0);
 
 	free_page((unsigned long) p);
-	return (ret == DONE);
+	return err == 0;
 }
 
 int drbd_send_bitmap(struct drbd_conf *mdev)
@@ -2192,7 +2362,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
 	p.set_size = cpu_to_be32(set_size);
 
 	if (mdev->state.conn < C_CONNECTED)
-		return FALSE;
+		return false;
 	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
 			(struct p_header80 *)&p, sizeof(p));
 	return ok;
@@ -2220,7 +2390,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
 	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
 
 	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
-		return FALSE;
+		return false;
 	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
 				(struct p_header80 *)&p, sizeof(p));
 	return ok;
@@ -2326,8 +2496,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
 }
 
 /* called on sndtimeo
- * returns FALSE if we should retry,
- * TRUE if we think connection is dead
+ * returns false if we should retry,
+ * true if we think connection is dead
  */
 static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
 {
@@ -2340,7 +2510,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
 		|| mdev->state.conn < C_CONNECTED;
 
 	if (drop_it)
-		return TRUE;
+		return true;
 
 	drop_it = !--mdev->ko_count;
 	if (!drop_it) {
@@ -2477,12 +2647,11 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
 {
 	if (mdev->agreed_pro_version >= 95)
 		return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
-			(bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
 			(bi_rw & REQ_FUA ? DP_FUA : 0) |
 			(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
 			(bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
 	else
-		return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0;
+		return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write requests
@@ -2532,13 +2701,39 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 	if (ok && dgs) {
 		dgb = mdev->int_dig_out;
 		drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
-		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
 	}
 	if (ok) {
-		if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+		/* For protocol A, we have to memcpy the payload into
+		 * socket buffers, as we may complete right away
+		 * as soon as we handed it over to tcp, at which point the data
+		 * pages may become invalid.
+		 *
+		 * For data-integrity enabled, we copy it as well, so we can be
+		 * sure that even if the bio pages may still be modified, it
+		 * won't change the data on the wire, thus if the digest checks
+		 * out ok after sending on this side, but does not fit on the
+		 * receiving side, we sure have detected corruption elsewhere.
+		 */
+		if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
 			ok = _drbd_send_bio(mdev, req->master_bio);
 		else
 			ok = _drbd_send_zc_bio(mdev, req->master_bio);
+
+		/* double check digest, sometimes buffers have been modified in flight. */
+		if (dgs > 0 && dgs <= 64) {
+			/* 64 byte, 512 bit, is the larges digest size
+			 * currently supported in kernel crypto. */
+			unsigned char digest[64];
+			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
+			if (memcmp(mdev->int_dig_out, digest, dgs)) {
+				dev_warn(DEV,
+					"Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
+					(unsigned long long)req->sector, req->size);
+			}
+		} /* else if (dgs > 64) {
+		     ... Be noisy about digest too large ...
+		} */
 	}
 
 	drbd_put_data_sock(mdev);
@@ -2588,7 +2783,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 	if (ok && dgs) {
 		dgb = mdev->int_dig_out;
 		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
-		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
 	}
 	if (ok)
 		ok = _drbd_send_zc_ee(mdev, e);
@@ -2598,6 +2793,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 	return ok;
 }
 
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	struct p_block_desc p;
+
+	p.sector  = cpu_to_be64(req->sector);
+	p.blksize = cpu_to_be32(req->size);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
 /*
   drbd_send distinguishes two cases:
 
@@ -2719,35 +2924,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
 	return 0;
 }
 
-static void drbd_unplug_fn(struct request_queue *q)
-{
-	struct drbd_conf *mdev = q->queuedata;
-
-	/* unplug FIRST */
-	spin_lock_irq(q->queue_lock);
-	blk_remove_plug(q);
-	spin_unlock_irq(q->queue_lock);
-
-	/* only if connected */
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
-		D_ASSERT(mdev->state.role == R_PRIMARY);
-		if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
-			/* add to the data.work queue,
-			 * unless already queued.
-			 * XXX this might be a good addition to drbd_queue_work
-			 * anyways, to detect "double queuing" ... */
-			if (list_empty(&mdev->unplug_work.list))
-				drbd_queue_work(&mdev->data.work,
-						&mdev->unplug_work);
-		}
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (mdev->state.disk >= D_INCONSISTENT)
-		drbd_kick_lo(mdev);
-}
-
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
 	/* This way we get a compile error when sync_conf grows,
@@ -2800,6 +2976,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	atomic_set(&mdev->pp_in_use_by_net, 0);
 	atomic_set(&mdev->rs_sect_in, 0);
 	atomic_set(&mdev->rs_sect_ev, 0);
+	atomic_set(&mdev->ap_in_flight, 0);
 
 	mutex_init(&mdev->md_io_mutex);
 	mutex_init(&mdev->data.mutex);
@@ -2828,19 +3005,27 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	INIT_LIST_HEAD(&mdev->unplug_work.list);
 	INIT_LIST_HEAD(&mdev->go_diskless.list);
 	INIT_LIST_HEAD(&mdev->md_sync_work.list);
+	INIT_LIST_HEAD(&mdev->start_resync_work.list);
 	INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
 
-	mdev->resync_work.cb  = w_resync_inactive;
+	mdev->resync_work.cb  = w_resync_timer;
 	mdev->unplug_work.cb  = w_send_write_hint;
 	mdev->go_diskless.cb  = w_go_diskless;
 	mdev->md_sync_work.cb = w_md_sync;
 	mdev->bm_io_work.w.cb = w_bitmap_io;
+	mdev->start_resync_work.cb = w_start_resync;
 	init_timer(&mdev->resync_timer);
 	init_timer(&mdev->md_sync_timer);
+	init_timer(&mdev->start_resync_timer);
+	init_timer(&mdev->request_timer);
 	mdev->resync_timer.function = resync_timer_fn;
 	mdev->resync_timer.data = (unsigned long) mdev;
 	mdev->md_sync_timer.function = md_sync_timer_fn;
 	mdev->md_sync_timer.data = (unsigned long) mdev;
+	mdev->start_resync_timer.function = start_resync_timer_fn;
+	mdev->start_resync_timer.data = (unsigned long) mdev;
+	mdev->request_timer.function = request_timer_fn;
+	mdev->request_timer.data = (unsigned long) mdev;
 
 	init_waitqueue_head(&mdev->misc_wait);
 	init_waitqueue_head(&mdev->state_wait);
@@ -2911,6 +3096,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
 	D_ASSERT(list_empty(&mdev->resync_work.list));
 	D_ASSERT(list_empty(&mdev->unplug_work.list));
 	D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+	drbd_set_defaults(mdev);
 }
 
 
@@ -2953,7 +3140,7 @@ static void drbd_destroy_mempools(void)
 static int drbd_create_mempools(void)
 {
 	struct page *page;
-	const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+	const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
 	int i;
 
 	/* prepare our caches and mempools */
@@ -3117,11 +3304,20 @@ static void drbd_cleanup(void)
 
 	unregister_reboot_notifier(&drbd_notifier);
 
+	/* first remove proc,
+	 * drbdsetup uses it's presence to detect
+	 * whether DRBD is loaded.
+	 * If we would get stuck in proc removal,
+	 * but have netlink already deregistered,
+	 * some drbdsetup commands may wait forever
+	 * for an answer.
+	 */
+	if (drbd_proc)
+		remove_proc_entry("drbd", NULL);
+
 	drbd_nl_cleanup();
 
 	if (minor_table) {
-		if (drbd_proc)
-			remove_proc_entry("drbd", NULL);
 		i = minor_count;
 		while (i--)
 			drbd_delete_device(i);
@@ -3149,7 +3345,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
 	char reason = '-';
 	int r = 0;
 
-	if (!__inc_ap_bio_cond(mdev)) {
+	if (!may_inc_ap_bio(mdev)) {
 		/* DRBD has frozen IO */
 		r = bdi_bits;
 		reason = 'd';
@@ -3202,7 +3398,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
 		goto out_no_disk;
 	mdev->vdisk = disk;
 
-	set_disk_ro(disk, TRUE);
+	set_disk_ro(disk, true);
 
 	disk->queue = q;
 	disk->major = DRBD_MAJOR;
@@ -3218,13 +3414,11 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
 	q->backing_dev_info.congested_fn = drbd_congested;
 	q->backing_dev_info.congested_data = mdev;
 
-	blk_queue_make_request(q, drbd_make_request_26);
-	blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
+	blk_queue_make_request(q, drbd_make_request);
+	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
-	q->queue_lock = &mdev->req_lock; /* needed since we use */
-		/* plugging on a queue, that actually has no requests! */
-	q->unplug_fn = drbd_unplug_fn;
+	q->queue_lock = &mdev->req_lock;
 
 	mdev->md_io_page = alloc_page(GFP_KERNEL);
 	if (!mdev->md_io_page)
@@ -3283,6 +3477,7 @@ void drbd_free_mdev(struct drbd_conf *mdev)
 	put_disk(mdev->vdisk);
 	blk_cleanup_queue(mdev->rq_queue);
 	free_cpumask_var(mdev->cpu_mask);
+	drbd_free_tl_hash(mdev);
 	kfree(mdev);
 }
 
@@ -3298,7 +3493,7 @@ int __init drbd_init(void)
 		return -EINVAL;
 	}
 
-	if (1 > minor_count || minor_count > 255) {
+	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
 		printk(KERN_ERR
 			"drbd: invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
@@ -3480,7 +3675,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
 		/* this was a try anyways ... */
 		dev_err(DEV, "meta data update failed!\n");
-		drbd_chk_io_error(mdev, 1, TRUE);
+		drbd_chk_io_error(mdev, 1, true);
 	}
 
 	/* Update mdev->ldev->md.la_size_sect,
@@ -3496,7 +3691,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
  * @mdev:	DRBD device.
  * @bdev:	Device from which the meta data should be read in.
  *
- * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
  * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
@@ -3511,7 +3706,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
 
 	if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
-		/* NOTE: cant do normal error processing here as this is
+		/* NOTE: can't do normal error processing here as this is
 		   called BEFORE disk is attached */
 		dev_err(DEV, "Error while reading metadata.\n");
 		rv = ERR_IO_MD_DISK;
@@ -3566,28 +3761,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	return rv;
 }
 
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
-	static char *uuid_str[UI_EXTENDED_SIZE] = {
-		[UI_CURRENT] = "CURRENT",
-		[UI_BITMAP] = "BITMAP",
-		[UI_HISTORY_START] = "HISTORY_START",
-		[UI_HISTORY_END] = "HISTORY_END",
-		[UI_SIZE] = "SIZE",
-		[UI_FLAGS] = "FLAGS",
-	};
-
-	if (index >= UI_EXTENDED_SIZE) {
-		dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
-		return;
-	}
-
-	dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
-		 uuid_str[index],
-		 (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
 /**
  * drbd_md_mark_dirty() - Mark meta data super block as dirty
  * @mdev:	DRBD device.
@@ -3617,10 +3790,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
 {
 	int i;
 
-	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
 		mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
-		debug_drbd_uuid(mdev, i+1);
-	}
 }
 
 void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3635,7 +3806,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 	}
 
 	mdev->ldev->md.uuid[idx] = val;
-	debug_drbd_uuid(mdev, idx);
 	drbd_md_mark_dirty(mdev);
 }
 
@@ -3645,7 +3815,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 	if (mdev->ldev->md.uuid[idx]) {
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
-		debug_drbd_uuid(mdev, UI_HISTORY_START);
 	}
 	_drbd_uuid_set(mdev, idx, val);
 }
@@ -3660,14 +3829,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
 	u64 val;
+	unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+	if (bm_uuid)
+		dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-	dev_info(DEV, "Creating new current UUID\n");
-	D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
 	mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
-	debug_drbd_uuid(mdev, UI_BITMAP);
 
 	get_random_bytes(&val, sizeof(u64));
 	_drbd_uuid_set(mdev, UI_CURRENT, val);
+	drbd_print_uuids(mdev, "new current UUID");
 	/* get it to stable storage _now_ */
 	drbd_md_sync(mdev);
 }
@@ -3681,16 +3852,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
 		mdev->ldev->md.uuid[UI_BITMAP] = 0;
-		debug_drbd_uuid(mdev, UI_HISTORY_START);
-		debug_drbd_uuid(mdev, UI_BITMAP);
 	} else {
-		if (mdev->ldev->md.uuid[UI_BITMAP])
-			dev_warn(DEV, "bm UUID already set");
+		unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+		if (bm_uuid)
+			dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-		mdev->ldev->md.uuid[UI_BITMAP] = val;
-		mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
-		debug_drbd_uuid(mdev, UI_BITMAP);
+		mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
 	}
 	drbd_md_mark_dirty(mdev);
 }
@@ -3746,15 +3913,19 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
 static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 {
 	struct bm_io_work *work = container_of(w, struct bm_io_work, w);
-	int rv;
+	int rv = -EIO;
 
 	D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
 
-	drbd_bm_lock(mdev, work->why);
-	rv = work->io_fn(mdev);
-	drbd_bm_unlock(mdev);
+	if (get_ldev(mdev)) {
+		drbd_bm_lock(mdev, work->why, work->flags);
+		rv = work->io_fn(mdev);
+		drbd_bm_unlock(mdev);
+		put_ldev(mdev);
+	}
 
 	clear_bit(BITMAP_IO, &mdev->flags);
+	smp_mb__after_clear_bit();
 	wake_up(&mdev->misc_wait);
 
 	if (work->done)
@@ -3762,6 +3933,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 
 	clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
 	work->why = NULL;
+	work->flags = 0;
 
 	return 1;
 }
@@ -3816,7 +3988,7 @@ void drbd_go_diskless(struct drbd_conf *mdev)
 void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 			  int (*io_fn)(struct drbd_conf *),
 			  void (*done)(struct drbd_conf *, int),
-			  char *why)
+			  char *why, enum bm_flag flags)
 {
 	D_ASSERT(current == mdev->worker.task);
 
@@ -3830,15 +4002,15 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 	mdev->bm_io_work.io_fn = io_fn;
 	mdev->bm_io_work.done = done;
 	mdev->bm_io_work.why = why;
+	mdev->bm_io_work.flags = flags;
 
+	spin_lock_irq(&mdev->req_lock);
 	set_bit(BITMAP_IO, &mdev->flags);
 	if (atomic_read(&mdev->ap_bio_cnt) == 0) {
-		if (list_empty(&mdev->bm_io_work.w.list)) {
-			set_bit(BITMAP_IO_QUEUED, &mdev->flags);
+		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
 			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
-		} else
-			dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
 	}
+	spin_unlock_irq(&mdev->req_lock);
 }
 
 /**
@@ -3850,19 +4022,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
  * freezes application IO while that the actual IO operations runs. This
  * functions MAY NOT be called from worker context.
  */
-int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
 {
 	int rv;
 
 	D_ASSERT(current != mdev->worker.task);
 
-	drbd_suspend_io(mdev);
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_suspend_io(mdev);
 
-	drbd_bm_lock(mdev, why);
+	drbd_bm_lock(mdev, why, flags);
 	rv = io_fn(mdev);
 	drbd_bm_unlock(mdev);
 
-	drbd_resume_io(mdev);
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_resume_io(mdev);
 
 	return rv;
 }