From 5ab7d2c005135849cf0bb1485d954c98f2cca57c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 27 Jan 2014 15:58:22 +0100 Subject: drbd: fix resync finished detection This fixes one recent regresion, and one long existing bug. The bug: drbd_try_clear_on_disk_bm() assumed that all "count" bits have to be accounted in the resync extent corresponding to the start sector. Since we allow application requests to cross our "extent" boundaries, this assumption is no longer true, resulting in possible misaccounting, scary messages ("BAD! sector=12345s enr=6 rs_left=-7 rs_failed=0 count=58 cstate=..."), and potentially, if the last bit to be cleared during resync would reside in previously misaccounted resync extent, the resync would never be recognized as finished, but would be "stalled" forever, even though all blocks are in sync again and all bits have been cleared... The regression was introduced by drbd: get rid of atomic update on disk bitmap works For an "empty" resync (rs_total == 0), we must not "finish" the resync on the SyncSource before the SyncTarget knows all relevant information (sync uuid). We need to wait for the full round-trip, the SyncTarget will then explicitly notify us. Also for normal, non-empty resyncs (rs_total > 0), the resync-finished condition needs to be tested before the schedule() in wait_for_work, or it is likely to be missed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'drivers/block/drbd/drbd_int.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eb002a7656af..a16f9ae3c98a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -432,7 +432,11 @@ enum { * goes into C_CONNECTED state. */ CONSIDER_RESYNC, + RS_PROGRESS, /* tell worker that resync made significant progress */ + RS_DONE, /* tell worker that resync is done */ + MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ + SUSPEND_IO, /* suspend application io */ BITMAP_IO, /* suspend application io; once no more io in flight, start bitmap io */ @@ -577,6 +581,7 @@ enum { * and potentially deadlock on, this drbd worker. */ DISCONNECT_SENT, + CONN_RS_PROGRESS, /* tell worker that resync made significant progress */ }; struct drbd_resource { @@ -1106,17 +1111,21 @@ struct bm_extent { /* in which _bitmap_ extent (resp. sector) the bit for a certain * _storage_ sector is located in */ #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) +#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) -/* how much _storage_ sectors we have per bitmap sector */ +/* first storage sector a bitmap extent corresponds to */ #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) +/* how much _storage_ sectors we have per bitmap extent */ #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) +/* how many bits are covered by one bitmap extent (resync extent) */ +#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) + +#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1) + /* in one sector of the bitmap, we have this many activity_log extents. */ #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) -#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) -#define BM_BLOCKS_PER_BM_EXT_MASK ((1<ldev. Returns 0 if there is no ldev * @M: DRBD device. -- cgit v1.2.3