summaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r--drivers/block/drbd/drbd_main.c713
1 files changed, 444 insertions, 269 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 29cd0dc9fe4f..5b525c179f39 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
MODULE_VERSION(REL_VERSION);
MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+ __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
#include <linux/moduleparam.h>
@@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644);
#endif
/* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
int disable_sendpage;
int allow_oos;
unsigned int cn_idx = CN_IDX_DRBD;
@@ -335,6 +336,7 @@ bail:
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
}
+
/**
* _tl_restart() - Walks the transfer log, and applies an action to all requests
* @mdev: DRBD device.
@@ -456,7 +458,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
}
/**
- * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
* @mdev: DRBD device.
* @os: old (current) state.
* @ns: new (wanted) state.
@@ -473,12 +475,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,
(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
}
-int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
- union drbd_state mask, union drbd_state val)
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+ union drbd_state mask, union drbd_state val)
{
unsigned long flags;
union drbd_state os, ns;
- int rv;
+ enum drbd_state_rv rv;
spin_lock_irqsave(&mdev->req_lock, flags);
os = mdev->state;
@@ -502,20 +505,22 @@ void drbd_force_state(struct drbd_conf *mdev,
drbd_change_state(mdev, CS_HARD, mask, val);
}
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
-static int is_valid_state_transition(struct drbd_conf *,
- union drbd_state, union drbd_state);
+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
+ union drbd_state,
+ union drbd_state);
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
union drbd_state ns, const char **warn_sync_abort);
int drbd_send_state_req(struct drbd_conf *,
union drbd_state, union drbd_state);
-static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
- union drbd_state mask, union drbd_state val)
+static enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+ union drbd_state val)
{
union drbd_state os, ns;
unsigned long flags;
- int rv;
+ enum drbd_state_rv rv;
if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
return SS_CW_SUCCESS;
@@ -536,7 +541,7 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
if (rv == SS_SUCCESS) {
rv = is_valid_state_transition(mdev, ns, os);
if (rv == SS_SUCCESS)
- rv = 0; /* cont waiting, otherwise fail. */
+ rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
}
}
spin_unlock_irqrestore(&mdev->req_lock, flags);
@@ -554,14 +559,14 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
* Should not be called directly, use drbd_request_state() or
* _drbd_request_state().
*/
-static int drbd_req_state(struct drbd_conf *mdev,
- union drbd_state mask, union drbd_state val,
- enum chg_state_flags f)
+static enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+ union drbd_state val, enum chg_state_flags f)
{
struct completion done;
unsigned long flags;
union drbd_state os, ns;
- int rv;
+ enum drbd_state_rv rv;
init_completion(&done);
@@ -636,10 +641,11 @@ abort:
* Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
* flag, or when logging of failed state change requests is not desired.
*/
-int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
- union drbd_state val, enum chg_state_flags f)
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+ union drbd_state val, enum chg_state_flags f)
{
- int rv;
+ enum drbd_state_rv rv;
wait_event(mdev->state_wait,
(rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
@@ -663,8 +669,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
);
}
-void print_st_err(struct drbd_conf *mdev,
- union drbd_state os, union drbd_state ns, int err)
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+ union drbd_state ns, enum drbd_state_rv err)
{
if (err == SS_IN_TRANSIENT_STATE)
return;
@@ -674,32 +680,18 @@ void print_st_err(struct drbd_conf *mdev,
}
-#define drbd_peer_str drbd_role_str
-#define drbd_pdsk_str drbd_disk_str
-
-#define drbd_susp_str(A) ((A) ? "1" : "0")
-#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
-#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
-#define drbd_user_isp_str(A) ((A) ? "1" : "0")
-
-#define PSC(A) \
- ({ if (ns.A != os.A) { \
- pbp += sprintf(pbp, #A "( %s -> %s ) ", \
- drbd_##A##_str(os.A), \
- drbd_##A##_str(ns.A)); \
- } })
-
/**
* is_valid_state() - Returns an SS_ error code if ns is not valid
* @mdev: DRBD device.
* @ns: State to consider.
*/
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+static enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
{
/* See drbd_state_sw_errors in drbd_strings.c */
enum drbd_fencing_p fp;
- int rv = SS_SUCCESS;
+ enum drbd_state_rv rv = SS_SUCCESS;
fp = FP_DONT_CARE;
if (get_ldev(mdev)) {
@@ -762,10 +754,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
* @ns: new state.
* @os: old state.
*/
-static int is_valid_state_transition(struct drbd_conf *mdev,
- union drbd_state ns, union drbd_state os)
+static enum drbd_state_rv
+is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
+ union drbd_state os)
{
- int rv = SS_SUCCESS;
+ enum drbd_state_rv rv = SS_SUCCESS;
if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
os.conn > C_CONNECTED)
@@ -800,6 +793,10 @@ static int is_valid_state_transition(struct drbd_conf *mdev,
os.conn < C_CONNECTED)
rv = SS_NEED_CONNECTION;
+ if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+ && os.conn < C_WF_REPORT_PARAMS)
+ rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
return rv;
}
@@ -817,6 +814,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
union drbd_state ns, const char **warn_sync_abort)
{
enum drbd_fencing_p fp;
+ enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
fp = FP_DONT_CARE;
if (get_ldev(mdev)) {
@@ -869,56 +867,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
ns.conn = C_CONNECTED;
}
- if (ns.conn >= C_CONNECTED &&
- ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
- (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
- switch (ns.conn) {
- case C_WF_BITMAP_T:
- case C_PAUSED_SYNC_T:
- ns.disk = D_OUTDATED;
- break;
- case C_CONNECTED:
- case C_WF_BITMAP_S:
- case C_SYNC_SOURCE:
- case C_PAUSED_SYNC_S:
- ns.disk = D_UP_TO_DATE;
- break;
- case C_SYNC_TARGET:
- ns.disk = D_INCONSISTENT;
- dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
- break;
- }
- if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
- dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
- }
-
- if (ns.conn >= C_CONNECTED &&
- (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
- switch (ns.conn) {
- case C_CONNECTED:
- case C_WF_BITMAP_T:
- case C_PAUSED_SYNC_T:
- case C_SYNC_TARGET:
- ns.pdsk = D_UP_TO_DATE;
- break;
- case C_WF_BITMAP_S:
- case C_PAUSED_SYNC_S:
- /* remap any consistent state to D_OUTDATED,
- * but disallow "upgrade" of not even consistent states.
- */
- ns.pdsk =
- (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
- ? os.pdsk : D_OUTDATED;
- break;
- case C_SYNC_SOURCE:
- ns.pdsk = D_INCONSISTENT;
- dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
- break;
- }
- if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
- dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
- }
-
/* Connection breaks down before we finished "Negotiating" */
if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -933,6 +881,94 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
put_ldev(mdev);
}
+ /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+ if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+ if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+ ns.disk = D_UP_TO_DATE;
+ if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+ ns.pdsk = D_UP_TO_DATE;
+ }
+
+ /* Implications of the connection stat on the disk states */
+ disk_min = D_DISKLESS;
+ disk_max = D_UP_TO_DATE;
+ pdsk_min = D_INCONSISTENT;
+ pdsk_max = D_UNKNOWN;
+ switch ((enum drbd_conns)ns.conn) {
+ case C_WF_BITMAP_T:
+ case C_PAUSED_SYNC_T:
+ case C_STARTING_SYNC_T:
+ case C_WF_SYNC_UUID:
+ case C_BEHIND:
+ disk_min = D_INCONSISTENT;
+ disk_max = D_OUTDATED;
+ pdsk_min = D_UP_TO_DATE;
+ pdsk_max = D_UP_TO_DATE;
+ break;
+ case C_VERIFY_S:
+ case C_VERIFY_T:
+ disk_min = D_UP_TO_DATE;
+ disk_max = D_UP_TO_DATE;
+ pdsk_min = D_UP_TO_DATE;
+ pdsk_max = D_UP_TO_DATE;
+ break;
+ case C_CONNECTED:
+ disk_min = D_DISKLESS;
+ disk_max = D_UP_TO_DATE;
+ pdsk_min = D_DISKLESS;
+ pdsk_max = D_UP_TO_DATE;
+ break;
+ case C_WF_BITMAP_S:
+ case C_PAUSED_SYNC_S:
+ case C_STARTING_SYNC_S:
+ case C_AHEAD:
+ disk_min = D_UP_TO_DATE;
+ disk_max = D_UP_TO_DATE;
+ pdsk_min = D_INCONSISTENT;
+ pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+ break;
+ case C_SYNC_TARGET:
+ disk_min = D_INCONSISTENT;
+ disk_max = D_INCONSISTENT;
+ pdsk_min = D_UP_TO_DATE;
+ pdsk_max = D_UP_TO_DATE;
+ break;
+ case C_SYNC_SOURCE:
+ disk_min = D_UP_TO_DATE;
+ disk_max = D_UP_TO_DATE;
+ pdsk_min = D_INCONSISTENT;
+ pdsk_max = D_INCONSISTENT;
+ break;
+ case C_STANDALONE:
+ case C_DISCONNECTING:
+ case C_UNCONNECTED:
+ case C_TIMEOUT:
+ case C_BROKEN_PIPE:
+ case C_NETWORK_FAILURE:
+ case C_PROTOCOL_ERROR:
+ case C_TEAR_DOWN:
+ case C_WF_CONNECTION:
+ case C_WF_REPORT_PARAMS:
+ case C_MASK:
+ break;
+ }
+ if (ns.disk > disk_max)
+ ns.disk = disk_max;
+
+ if (ns.disk < disk_min) {
+ dev_warn(DEV, "Implicitly set disk from %s to %s\n",
+ drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+ ns.disk = disk_min;
+ }
+ if (ns.pdsk > pdsk_max)
+ ns.pdsk = pdsk_max;
+
+ if (ns.pdsk < pdsk_min) {
+ dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
+ drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+ ns.pdsk = pdsk_min;
+ }
+
if (fp == FP_STONITH &&
(ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
!(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
@@ -961,6 +997,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
/* helper for __drbd_set_state */
static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
{
+ if (mdev->agreed_pro_version < 90)
+ mdev->ov_start_sector = 0;
+ mdev->rs_total = drbd_bm_bits(mdev);
+ mdev->ov_position = 0;
if (cs == C_VERIFY_T) {
/* starting online verify from an arbitrary position
* does not fit well into the existing protocol.
@@ -970,11 +1010,15 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
mdev->ov_start_sector = ~(sector_t)0;
} else {
unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
- if (bit >= mdev->rs_total)
+ if (bit >= mdev->rs_total) {
mdev->ov_start_sector =
BM_BIT_TO_SECT(mdev->rs_total - 1);
+ mdev->rs_total = 1;
+ } else
+ mdev->rs_total -= bit;
mdev->ov_position = mdev->ov_start_sector;
}
+ mdev->ov_left = mdev->rs_total;
}
static void drbd_resume_al(struct drbd_conf *mdev)
@@ -992,12 +1036,12 @@ static void drbd_resume_al(struct drbd_conf *mdev)
*
* Caller needs to hold req_lock, and global_state_lock. Do not call directly.
*/
-int __drbd_set_state(struct drbd_conf *mdev,
- union drbd_state ns, enum chg_state_flags flags,
- struct completion *done)
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+ enum chg_state_flags flags, struct completion *done)
{
union drbd_state os;
- int rv = SS_SUCCESS;
+ enum drbd_state_rv rv = SS_SUCCESS;
const char *warn_sync_abort = NULL;
struct after_state_chg_work *ascw;
@@ -1033,22 +1077,46 @@ int __drbd_set_state(struct drbd_conf *mdev,
dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
{
- char *pbp, pb[300];
- pbp = pb;
- *pbp = 0;
- PSC(role);
- PSC(peer);
- PSC(conn);
- PSC(disk);
- PSC(pdsk);
- if (is_susp(ns) != is_susp(os))
- pbp += sprintf(pbp, "susp( %s -> %s ) ",
- drbd_susp_str(is_susp(os)),
- drbd_susp_str(is_susp(ns)));
- PSC(aftr_isp);
- PSC(peer_isp);
- PSC(user_isp);
- dev_info(DEV, "%s\n", pb);
+ char *pbp, pb[300];
+ pbp = pb;
+ *pbp = 0;
+ if (ns.role != os.role)
+ pbp += sprintf(pbp, "role( %s -> %s ) ",
+ drbd_role_str(os.role),
+ drbd_role_str(ns.role));
+ if (ns.peer != os.peer)
+ pbp += sprintf(pbp, "peer( %s -> %s ) ",
+ drbd_role_str(os.peer),
+ drbd_role_str(ns.peer));
+ if (ns.conn != os.conn)
+ pbp += sprintf(pbp, "conn( %s -> %s ) ",
+ drbd_conn_str(os.conn),
+ drbd_conn_str(ns.conn));
+ if (ns.disk != os.disk)
+ pbp += sprintf(pbp, "disk( %s -> %s ) ",
+ drbd_disk_str(os.disk),
+ drbd_disk_str(ns.disk));
+ if (ns.pdsk != os.pdsk)
+ pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+ drbd_disk_str(os.pdsk),
+ drbd_disk_str(ns.pdsk));
+ if (is_susp(ns) != is_susp(os))
+ pbp += sprintf(pbp, "susp( %d -> %d ) ",
+ is_susp(os),
+ is_susp(ns));
+ if (ns.aftr_isp != os.aftr_isp)
+ pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+ os.aftr_isp,
+ ns.aftr_isp);
+ if (ns.peer_isp != os.peer_isp)
+ pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+ os.peer_isp,
+ ns.peer_isp);
+ if (ns.user_isp != os.user_isp)
+ pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+ os.user_isp,
+ ns.user_isp);
+ dev_info(DEV, "%s\n", pb);
}
/* solve the race between becoming unconfigured,
@@ -1074,6 +1142,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
atomic_inc(&mdev->local_cnt);
mdev->state = ns;
+
+ if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+ drbd_print_uuids(mdev, "attached to UUIDs");
+
wake_up(&mdev->misc_wait);
wake_up(&mdev->state_wait);
@@ -1081,7 +1153,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
ns.conn < C_CONNECTED) {
mdev->ov_start_sector =
- BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
+ BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
dev_info(DEV, "Online Verify reached sector %llu\n",
(unsigned long long)mdev->ov_start_sector);
}
@@ -1106,14 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
unsigned long now = jiffies;
int i;
- mdev->ov_position = 0;
- mdev->rs_total = drbd_bm_bits(mdev);
- if (mdev->agreed_pro_version >= 90)
- set_ov_position(mdev, ns.conn);
- else
- mdev->ov_start_sector = 0;
- mdev->ov_left = mdev->rs_total
- - BM_SECT_TO_BIT(mdev->ov_position);
+ set_ov_position(mdev, ns.conn);
mdev->rs_start = now;
mdev->rs_last_events = 0;
mdev->rs_last_sect_ev = 0;
@@ -1121,10 +1186,12 @@ int __drbd_set_state(struct drbd_conf *mdev,
mdev->ov_last_oos_start = 0;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
- mdev->rs_mark_left[i] = mdev->rs_total;
+ mdev->rs_mark_left[i] = mdev->ov_left;
mdev->rs_mark_time[i] = now;
}
+ drbd_rs_controller_reset(mdev);
+
if (ns.conn == C_VERIFY_S) {
dev_info(DEV, "Starting Online Verify from sector %llu\n",
(unsigned long long)mdev->ov_position);
@@ -1228,6 +1295,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
}
}
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+ int (*io_fn)(struct drbd_conf *),
+ char *why, enum bm_flag flags)
+{
+ int rv;
+
+ D_ASSERT(current == mdev->worker.task);
+
+ /* open coded non-blocking drbd_suspend_io(mdev); */
+ set_bit(SUSPEND_IO, &mdev->flags);
+
+ drbd_bm_lock(mdev, why, flags);
+ rv = io_fn(mdev);
+ drbd_bm_unlock(mdev);
+
+ drbd_resume_io(mdev);
+
+ return rv;
+}
+
/**
* after_state_ch() - Perform after state change actions that may sleep
* @mdev: DRBD device.
@@ -1266,16 +1353,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
nsm.i = -1;
if (ns.susp_nod) {
- if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
- if (ns.conn == C_CONNECTED)
- what = resend, nsm.susp_nod = 0;
- else /* ns.conn > C_CONNECTED */
- dev_err(DEV, "Unexpected Resynd going on!\n");
- }
+ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+ what = resend;
if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
- what = restart_frozen_disk_io, nsm.susp_nod = 0;
+ what = restart_frozen_disk_io;
+ if (what != nothing)
+ nsm.susp_nod = 0;
}
if (ns.susp_fen) {
@@ -1306,13 +1391,30 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
spin_unlock_irq(&mdev->req_lock);
}
+ /* Became sync source. With protocol >= 96, we still need to send out
+ * the sync uuid now. Need to do that before any drbd_send_state, or
+ * the other side may go "paused sync" before receiving the sync uuids,
+ * which is unexpected. */
+ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+ (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+ mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+ drbd_gen_and_send_sync_uuid(mdev);
+ put_ldev(mdev);
+ }
+
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
drbd_send_uuids(mdev);
drbd_send_state(mdev);
}
- if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
- drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+ /* No point in queuing send_bitmap if we don't have a connection
+ * anymore, so check also the _current_ state, not only the new state
+ * at the time this work was queued. */
+ if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+ mdev->state.conn == C_WF_BITMAP_S)
+ drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+ "send_bitmap (WFBitMapS)",
+ BM_LOCKED_TEST_ALLOWED);
/* Lost contact to peer's copy of the data */
if ((os.pdsk >= D_INCONSISTENT &&
@@ -1343,7 +1445,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
- drbd_al_to_on_disk_bm(mdev);
+ /* We may still be Primary ourselves.
+ * No harm done if the bitmap still changes,
+ * redirtied pages will follow later. */
+ drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+ "demote diskless peer", BM_LOCKED_SET_ALLOWED);
+ put_ldev(mdev);
+ }
+
+ /* Write out all changed bits on demote.
+ * Though, no need to da that just yet
+ * if there is a resync going on still */
+ if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+ mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+ /* No changes to the bitmap expected this time, so assert that,
+ * even though no harm was done if it did change. */
+ drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+ "demote", BM_LOCKED_TEST_ALLOWED);
put_ldev(mdev);
}
@@ -1371,15 +1489,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
drbd_send_state(mdev);
+ if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+ drbd_send_state(mdev);
+
/* We are in the progress to start a full sync... */
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
- drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+ /* no other bitmap changes expected during this phase */
+ drbd_queue_bitmap_io(mdev,
+ &drbd_bmio_set_n_write, &abw_start_sync,
+ "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
/* We are invalidating our self... */
if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
- drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+ /* other bitmap operation expected during this phase */
+ drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+ "set_n_write from invalidate", BM_LOCKED_MASK);
/* first half of local IO error, failure to attach,
* or administrative detach */
@@ -1434,10 +1560,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (drbd_send_state(mdev))
dev_warn(DEV, "Notified peer that I'm now diskless.\n");
- else
- dev_err(DEV, "Sending state for being diskless failed\n");
/* corresponding get_ldev in __drbd_set_state
- * this may finaly trigger drbd_ldev_destroy. */
+ * this may finally trigger drbd_ldev_destroy. */
put_ldev(mdev);
}
@@ -1459,6 +1583,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
drbd_send_state(mdev);
+ /* This triggers bitmap writeout of potentially still unwritten pages
+ * if the resync finished cleanly, or aborted because of peer disk
+ * failure, or because of connection loss.
+ * For resync aborted because of local disk failure, we cannot do
+ * any bitmap writeout anymore.
+ * No harm done if some bits change during this phase.
+ */
+ if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+ drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+ "write from resync_finished", BM_LOCKED_SET_ALLOWED);
+ put_ldev(mdev);
+ }
+
/* free tl_hash if we Got thawed and are C_STANDALONE */
if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
drbd_free_tl_hash(mdev);
@@ -1559,7 +1696,7 @@ int drbd_thread_start(struct drbd_thread *thi)
if (!try_module_get(THIS_MODULE)) {
dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
spin_unlock_irqrestore(&thi->t_lock, flags);
- return FALSE;
+ return false;
}
init_completion(&thi->stop);
@@ -1576,7 +1713,7 @@ int drbd_thread_start(struct drbd_thread *thi)
dev_err(DEV, "Couldn't start thread\n");
module_put(THIS_MODULE);
- return FALSE;
+ return false;
}
spin_lock_irqsave(&thi->t_lock, flags);
thi->task = nt;
@@ -1596,7 +1733,7 @@ int drbd_thread_start(struct drbd_thread *thi)
break;
}
- return TRUE;
+ return true;
}
@@ -1694,8 +1831,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
{
int sent, ok;
- ERR_IF(!h) return FALSE;
- ERR_IF(!size) return FALSE;
+ ERR_IF(!h) return false;
+ ERR_IF(!size) return false;
h->magic = BE_DRBD_MAGIC;
h->command = cpu_to_be16(cmd);
@@ -1704,8 +1841,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
sent = drbd_send(mdev, sock, h, size, msg_flags);
ok = (sent == size);
- if (!ok)
- dev_err(DEV, "short sent %s size=%d sent=%d\n",
+ if (!ok && !signal_pending(current))
+ dev_warn(DEV, "short sent %s size=%d sent=%d\n",
cmdname(cmd), (int)size, sent);
return ok;
}
@@ -1840,7 +1977,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
else {
dev_err(DEV, "--dry-run is not supported by peer");
kfree(p);
- return 0;
+ return -1;
}
}
p->conn_flags = cpu_to_be32(cf);
@@ -1888,12 +2025,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
return _drbd_send_uuids(mdev, 8);
}
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+ if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ u64 *uuid = mdev->ldev->md.uuid;
+ dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+ text,
+ (unsigned long long)uuid[UI_CURRENT],
+ (unsigned long long)uuid[UI_BITMAP],
+ (unsigned long long)uuid[UI_HISTORY_START],
+ (unsigned long long)uuid[UI_HISTORY_END]);
+ put_ldev(mdev);
+ } else {
+ dev_info(DEV, "%s effective data uuid: %016llX\n",
+ text,
+ (unsigned long long)mdev->ed_uuid);
+ }
+}
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
{
struct p_rs_uuid p;
+ u64 uuid;
+
+ D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
- p.uuid = cpu_to_be64(val);
+ uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+ drbd_uuid_set(mdev, UI_BITMAP, uuid);
+ drbd_print_uuids(mdev, "updated sync UUID");
+ drbd_md_sync(mdev);
+ p.uuid = cpu_to_be64(uuid);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
(struct p_header80 *)&p, sizeof(p));
@@ -1921,7 +2082,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
p.d_size = cpu_to_be64(d_size);
p.u_size = cpu_to_be64(u_size);
p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
- p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
+ p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
p.queue_order_type = cpu_to_be16(q_order_type);
p.dds_flags = cpu_to_be16(flags);
@@ -1972,7 +2133,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
(struct p_header80 *)&p, sizeof(p));
}
-int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
{
struct p_req_state_reply p;
@@ -2076,9 +2237,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
return len;
}
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
send_bitmap_rle_or_plain(struct drbd_conf *mdev,
- struct p_header80 *h, struct bm_xfer_ctx *c)
+ struct p_header80 *h, struct bm_xfer_ctx *c)
{
struct p_compressed_bm *p = (void*)h;
unsigned long num_words;
@@ -2088,7 +2255,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
len = fill_bitmap_rle_bits(mdev, p, c);
if (len < 0)
- return FAILED;
+ return -EIO;
if (len) {
DCBP_set_code(p, RLE_VLI_Bits);
@@ -2118,11 +2285,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
if (c->bit_offset > c->bm_bits)
c->bit_offset = c->bm_bits;
}
- ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
- if (ok == DONE)
- INFO_bm_xfer_stats(mdev, "send", c);
- return ok;
+ if (ok) {
+ if (len == 0) {
+ INFO_bm_xfer_stats(mdev, "send", c);
+ return 0;
+ } else
+ return 1;
+ }
+ return -EIO;
}
/* See the comment at receive_bitmap() */
@@ -2130,16 +2300,16 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
{
struct bm_xfer_ctx c;
struct p_header80 *p;
- int ret;
+ int err;
- ERR_IF(!mdev->bitmap) return FALSE;
+ ERR_IF(!mdev->bitmap) return false;
/* maybe we should use some per thread scratch page,
* and allocate that during initial device creation? */
p = (struct p_header80 *) __get_free_page(GFP_NOIO);
if (!p) {
dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
- return FALSE;
+ return false;
}
if (get_ldev(mdev)) {
@@ -2165,11 +2335,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
};
do {
- ret = send_bitmap_rle_or_plain(mdev, p, &c);
- } while (ret == OK);
+ err = send_bitmap_rle_or_plain(mdev, p, &c);
+ } while (err > 0);
free_page((unsigned long) p);
- return (ret == DONE);
+ return err == 0;
}
int drbd_send_bitmap(struct drbd_conf *mdev)
@@ -2192,7 +2362,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
p.set_size = cpu_to_be32(set_size);
if (mdev->state.conn < C_CONNECTED)
- return FALSE;
+ return false;
ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
(struct p_header80 *)&p, sizeof(p));
return ok;
@@ -2220,7 +2390,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
- return FALSE;
+ return false;
ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
(struct p_header80 *)&p, sizeof(p));
return ok;
@@ -2326,8 +2496,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
}
/* called on sndtimeo
- * returns FALSE if we should retry,
- * TRUE if we think connection is dead
+ * returns false if we should retry,
+ * true if we think connection is dead
*/
static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
{
@@ -2340,7 +2510,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
|| mdev->state.conn < C_CONNECTED;
if (drop_it)
- return TRUE;
+ return true;
drop_it = !--mdev->ko_count;
if (!drop_it) {
@@ -2477,12 +2647,11 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
{
if (mdev->agreed_pro_version >= 95)
return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
- (bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
(bi_rw & REQ_FUA ? DP_FUA : 0) |
(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
(bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
else
- return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0;
+ return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
}
/* Used to send write requests
@@ -2532,13 +2701,39 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
if (ok && dgs) {
dgb = mdev->int_dig_out;
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
- ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+ ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
}
if (ok) {
- if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+ /* For protocol A, we have to memcpy the payload into
+ * socket buffers, as we may complete right away
+ * as soon as we handed it over to tcp, at which point the data
+ * pages may become invalid.
+ *
+ * For data-integrity enabled, we copy it as well, so we can be
+ * sure that even if the bio pages may still be modified, it
+ * won't change the data on the wire, thus if the digest checks
+ * out ok after sending on this side, but does not fit on the
+ * receiving side, we sure have detected corruption elsewhere.
+ */
+ if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
ok = _drbd_send_bio(mdev, req->master_bio);
else
ok = _drbd_send_zc_bio(mdev, req->master_bio);
+
+ /* double check digest, sometimes buffers have been modified in flight. */
+ if (dgs > 0 && dgs <= 64) {
+ /* 64 byte, 512 bit, is the larges digest size
+ * currently supported in kernel crypto. */
+ unsigned char digest[64];
+ drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
+ if (memcmp(mdev->int_dig_out, digest, dgs)) {
+ dev_warn(DEV,
+ "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
+ (unsigned long long)req->sector, req->size);
+ }
+ } /* else if (dgs > 64) {
+ ... Be noisy about digest too large ...
+ } */
}
drbd_put_data_sock(mdev);
@@ -2588,7 +2783,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
if (ok && dgs) {
dgb = mdev->int_dig_out;
drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
- ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+ ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
}
if (ok)
ok = _drbd_send_zc_ee(mdev, e);
@@ -2598,6 +2793,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
return ok;
}
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+ struct p_block_desc p;
+
+ p.sector = cpu_to_be64(req->sector);
+ p.blksize = cpu_to_be32(req->size);
+
+ return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
/*
drbd_send distinguishes two cases:
@@ -2719,35 +2924,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
return 0;
}
-static void drbd_unplug_fn(struct request_queue *q)
-{
- struct drbd_conf *mdev = q->queuedata;
-
- /* unplug FIRST */
- spin_lock_irq(q->queue_lock);
- blk_remove_plug(q);
- spin_unlock_irq(q->queue_lock);
-
- /* only if connected */
- spin_lock_irq(&mdev->req_lock);
- if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
- D_ASSERT(mdev->state.role == R_PRIMARY);
- if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
- /* add to the data.work queue,
- * unless already queued.
- * XXX this might be a good addition to drbd_queue_work
- * anyways, to detect "double queuing" ... */
- if (list_empty(&mdev->unplug_work.list))
- drbd_queue_work(&mdev->data.work,
- &mdev->unplug_work);
- }
- }
- spin_unlock_irq(&mdev->req_lock);
-
- if (mdev->state.disk >= D_INCONSISTENT)
- drbd_kick_lo(mdev);
-}
-
static void drbd_set_defaults(struct drbd_conf *mdev)
{
/* This way we get a compile error when sync_conf grows,
@@ -2800,6 +2976,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
atomic_set(&mdev->pp_in_use_by_net, 0);
atomic_set(&mdev->rs_sect_in, 0);
atomic_set(&mdev->rs_sect_ev, 0);
+ atomic_set(&mdev->ap_in_flight, 0);
mutex_init(&mdev->md_io_mutex);
mutex_init(&mdev->data.mutex);
@@ -2828,19 +3005,27 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
INIT_LIST_HEAD(&mdev->unplug_work.list);
INIT_LIST_HEAD(&mdev->go_diskless.list);
INIT_LIST_HEAD(&mdev->md_sync_work.list);
+ INIT_LIST_HEAD(&mdev->start_resync_work.list);
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
- mdev->resync_work.cb = w_resync_inactive;
+ mdev->resync_work.cb = w_resync_timer;
mdev->unplug_work.cb = w_send_write_hint;
mdev->go_diskless.cb = w_go_diskless;
mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io;
+ mdev->start_resync_work.cb = w_start_resync;
init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer);
+ init_timer(&mdev->start_resync_timer);
+ init_timer(&mdev->request_timer);
mdev->resync_timer.function = resync_timer_fn;
mdev->resync_timer.data = (unsigned long) mdev;
mdev->md_sync_timer.function = md_sync_timer_fn;
mdev->md_sync_timer.data = (unsigned long) mdev;
+ mdev->start_resync_timer.function = start_resync_timer_fn;
+ mdev->start_resync_timer.data = (unsigned long) mdev;
+ mdev->request_timer.function = request_timer_fn;
+ mdev->request_timer.data = (unsigned long) mdev;
init_waitqueue_head(&mdev->misc_wait);
init_waitqueue_head(&mdev->state_wait);
@@ -2911,6 +3096,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
D_ASSERT(list_empty(&mdev->resync_work.list));
D_ASSERT(list_empty(&mdev->unplug_work.list));
D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+ drbd_set_defaults(mdev);
}
@@ -2953,7 +3140,7 @@ static void drbd_destroy_mempools(void)
static int drbd_create_mempools(void)
{
struct page *page;
- const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+ const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
int i;
/* prepare our caches and mempools */
@@ -3117,11 +3304,20 @@ static void drbd_cleanup(void)
unregister_reboot_notifier(&drbd_notifier);
+ /* first remove proc,
+ * drbdsetup uses it's presence to detect
+ * whether DRBD is loaded.
+ * If we would get stuck in proc removal,
+ * but have netlink already deregistered,
+ * some drbdsetup commands may wait forever
+ * for an answer.
+ */
+ if (drbd_proc)
+ remove_proc_entry("drbd", NULL);
+
drbd_nl_cleanup();
if (minor_table) {
- if (drbd_proc)
- remove_proc_entry("drbd", NULL);
i = minor_count;
while (i--)
drbd_delete_device(i);
@@ -3149,7 +3345,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
char reason = '-';
int r = 0;
- if (!__inc_ap_bio_cond(mdev)) {
+ if (!may_inc_ap_bio(mdev)) {
/* DRBD has frozen IO */
r = bdi_bits;
reason = 'd';
@@ -3202,7 +3398,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
goto out_no_disk;
mdev->vdisk = disk;
- set_disk_ro(disk, TRUE);
+ set_disk_ro(disk, true);
disk->queue = q;
disk->major = DRBD_MAJOR;
@@ -3218,13 +3414,11 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
q->backing_dev_info.congested_fn = drbd_congested;
q->backing_dev_info.congested_data = mdev;
- blk_queue_make_request(q, drbd_make_request_26);
- blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
+ blk_queue_make_request(q, drbd_make_request);
+ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
blk_queue_merge_bvec(q, drbd_merge_bvec);
- q->queue_lock = &mdev->req_lock; /* needed since we use */
- /* plugging on a queue, that actually has no requests! */
- q->unplug_fn = drbd_unplug_fn;
+ q->queue_lock = &mdev->req_lock;
mdev->md_io_page = alloc_page(GFP_KERNEL);
if (!mdev->md_io_page)
@@ -3283,6 +3477,7 @@ void drbd_free_mdev(struct drbd_conf *mdev)
put_disk(mdev->vdisk);
blk_cleanup_queue(mdev->rq_queue);
free_cpumask_var(mdev->cpu_mask);
+ drbd_free_tl_hash(mdev);
kfree(mdev);
}
@@ -3298,7 +3493,7 @@ int __init drbd_init(void)
return -EINVAL;
}
- if (1 > minor_count || minor_count > 255) {
+ if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
printk(KERN_ERR
"drbd: invalid minor_count (%d)\n", minor_count);
#ifdef MODULE
@@ -3480,7 +3675,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
/* this was a try anyways ... */
dev_err(DEV, "meta data update failed!\n");
- drbd_chk_io_error(mdev, 1, TRUE);
+ drbd_chk_io_error(mdev, 1, true);
}
/* Update mdev->ldev->md.la_size_sect,
@@ -3496,7 +3691,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
* @mdev: DRBD device.
* @bdev: Device from which the meta data should be read in.
*
- * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
* something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
*/
int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
@@ -3511,7 +3706,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
- /* NOTE: cant do normal error processing here as this is
+ /* NOTE: can't do normal error processing here as this is
called BEFORE disk is attached */
dev_err(DEV, "Error while reading metadata.\n");
rv = ERR_IO_MD_DISK;
@@ -3566,28 +3761,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
return rv;
}
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
- static char *uuid_str[UI_EXTENDED_SIZE] = {
- [UI_CURRENT] = "CURRENT",
- [UI_BITMAP] = "BITMAP",
- [UI_HISTORY_START] = "HISTORY_START",
- [UI_HISTORY_END] = "HISTORY_END",
- [UI_SIZE] = "SIZE",
- [UI_FLAGS] = "FLAGS",
- };
-
- if (index >= UI_EXTENDED_SIZE) {
- dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
- return;
- }
-
- dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
- uuid_str[index],
- (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
/**
* drbd_md_mark_dirty() - Mark meta data super block as dirty
* @mdev: DRBD device.
@@ -3617,10 +3790,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
{
int i;
- for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+ for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
- debug_drbd_uuid(mdev, i+1);
- }
}
void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3635,7 +3806,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
}
mdev->ldev->md.uuid[idx] = val;
- debug_drbd_uuid(mdev, idx);
drbd_md_mark_dirty(mdev);
}
@@ -3645,7 +3815,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
if (mdev->ldev->md.uuid[idx]) {
drbd_uuid_move_history(mdev);
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
- debug_drbd_uuid(mdev, UI_HISTORY_START);
}
_drbd_uuid_set(mdev, idx, val);
}
@@ -3660,14 +3829,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
{
u64 val;
+ unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+ if (bm_uuid)
+ dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
- dev_info(DEV, "Creating new current UUID\n");
- D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
- debug_drbd_uuid(mdev, UI_BITMAP);
get_random_bytes(&val, sizeof(u64));
_drbd_uuid_set(mdev, UI_CURRENT, val);
+ drbd_print_uuids(mdev, "new current UUID");
/* get it to stable storage _now_ */
drbd_md_sync(mdev);
}
@@ -3681,16 +3852,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
drbd_uuid_move_history(mdev);
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
mdev->ldev->md.uuid[UI_BITMAP] = 0;
- debug_drbd_uuid(mdev, UI_HISTORY_START);
- debug_drbd_uuid(mdev, UI_BITMAP);
} else {
- if (mdev->ldev->md.uuid[UI_BITMAP])
- dev_warn(DEV, "bm UUID already set");
+ unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+ if (bm_uuid)
+ dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
- mdev->ldev->md.uuid[UI_BITMAP] = val;
- mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
- debug_drbd_uuid(mdev, UI_BITMAP);
+ mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
}
drbd_md_mark_dirty(mdev);
}
@@ -3746,15 +3913,19 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
struct bm_io_work *work = container_of(w, struct bm_io_work, w);
- int rv;
+ int rv = -EIO;
D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
- drbd_bm_lock(mdev, work->why);
- rv = work->io_fn(mdev);
- drbd_bm_unlock(mdev);
+ if (get_ldev(mdev)) {
+ drbd_bm_lock(mdev, work->why, work->flags);
+ rv = work->io_fn(mdev);
+ drbd_bm_unlock(mdev);
+ put_ldev(mdev);
+ }
clear_bit(BITMAP_IO, &mdev->flags);
+ smp_mb__after_clear_bit();
wake_up(&mdev->misc_wait);
if (work->done)
@@ -3762,6 +3933,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
work->why = NULL;
+ work->flags = 0;
return 1;
}
@@ -3816,7 +3988,7 @@ void drbd_go_diskless(struct drbd_conf *mdev)
void drbd_queue_bitmap_io(struct drbd_conf *mdev,
int (*io_fn)(struct drbd_conf *),
void (*done)(struct drbd_conf *, int),
- char *why)
+ char *why, enum bm_flag flags)
{
D_ASSERT(current == mdev->worker.task);
@@ -3830,15 +4002,15 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
mdev->bm_io_work.io_fn = io_fn;
mdev->bm_io_work.done = done;
mdev->bm_io_work.why = why;
+ mdev->bm_io_work.flags = flags;
+ spin_lock_irq(&mdev->req_lock);
set_bit(BITMAP_IO, &mdev->flags);
if (atomic_read(&mdev->ap_bio_cnt) == 0) {
- if (list_empty(&mdev->bm_io_work.w.list)) {
- set_bit(BITMAP_IO_QUEUED, &mdev->flags);
+ if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
- } else
- dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
}
+ spin_unlock_irq(&mdev->req_lock);
}
/**
@@ -3850,19 +4022,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
* freezes application IO while that the actual IO operations runs. This
* functions MAY NOT be called from worker context.
*/
-int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+ char *why, enum bm_flag flags)
{
int rv;
D_ASSERT(current != mdev->worker.task);
- drbd_suspend_io(mdev);
+ if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+ drbd_suspend_io(mdev);
- drbd_bm_lock(mdev, why);
+ drbd_bm_lock(mdev, why, flags);
rv = io_fn(mdev);
drbd_bm_unlock(mdev);
- drbd_resume_io(mdev);
+ if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+ drbd_resume_io(mdev);
return rv;
}