From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:47:24 +0100 Subject: drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9e5f5607eba3..d28202811672 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -334,9 +334,7 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) #define DRBD_MAGIC 0x83740267 -#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) #define DRBD_MAGIC_BIG 0x835a -#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 -- cgit v1.2.3 From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 20 Jul 2011 14:59:37 +0200 Subject: idr: idr_for_each_entry() macro Inspired by the list_for_each_entry() macro --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index 255491cf522e..52a9da295296 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); void __init idr_init_cache(void); +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + #endif /* __IDR_H__ */ -- cgit v1.2.3 From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:57:39 +0100 Subject: drbd: Use new header layout The new header layout will only be used if the peer supports it of course. For the first packet and the handshake packet the old (h80) layout is used for compatibility reasons. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 82 +++++++++++++++++--------------------- drivers/block/drbd/drbd_receiver.c | 7 +++- include/linux/drbd.h | 2 +- 4 files changed, 42 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dc669dfe5b0d..4de43481bcb9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -345,7 +345,6 @@ struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ - u8 payload[0]; } __packed; struct p_header { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 55ce48e24b8e..f8cb15c84ed8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) } #endif +static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be32(DRBD_MAGIC); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size); +} + +static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be16(DRBD_MAGIC_BIG); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); +} + +static void prepare_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packets cmd, int size) +{ + if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(mdev, &h->h95, cmd, size); + else + prepare_header80(mdev, &h->h80, cmd, size); +} + /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *hg, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags) { - struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, if (!expect(size)) return false; - h->magic = cpu_to_be32(DRBD_MAGIC); - h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header80)); + prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header80 h; + struct p_header h; int ok; - h.magic = cpu_to_be32(DRBD_MAGIC); - h.command = cpu_to_be16(cmd); - h.length = cpu_to_be16(size); + prepare_header(mdev, &h, cmd, size); if (!drbd_get_data_sock(mdev)) return 0; @@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, int ok; struct p_block_req p; + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); p.sector = cpu_to_be64(sector); p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->tconn->data.mutex); ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); @@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(P_DATA); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(P_DATA); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } - + prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = - atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(cmd); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } - + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ @@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; + /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -3506,12 +3500,8 @@ int __init drbd_init(void) { int err; - if (sizeof(struct p_handshake) != 80) { - printk(KERN_ERR - "drbd: never change the size or layout " - "of the HandShake packet.\n"); - return -EINVAL; - } + BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); + BUILD_BUG_ON(sizeof(struct p_handshake) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9393fe482efc..8f5a241fe20a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev) return -2; clear_bit(DISCARD_CONCURRENT, &mdev->flags); + mdev->tconn->agreed_pro_version = 99; + /* agreed_pro_version must be smaller than 100 so we send the old + header (h80) in the first packet and in the handshake packet. */ sock = NULL; msock = NULL; @@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { + if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length); + *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28202811672..35fc08a0a552 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 100 enum drbd_io_error_p { -- cgit v1.2.3 From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:55 +0100 Subject: drbd: use clear_bit_unlock() where appropriate Some open-coded clear_bit(); smp_mb__after_clear_bit(); should in fact have been smp_mb__before_clear_bit(); clear_bit(); Instead, use clear_bit_unlock() to annotate the intention, and have it do the right thing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_main.c | 3 +-- include/linux/lru_cache.h | 3 +-- lib/lru_cache.c | 10 ++++------ 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e8d652f197c3..4be737055718 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 592f0c949fd0..c77e51a40926 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(BITMAP_IO, &mdev->flags); wake_up(&mdev->misc_wait); if (work->done) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 7a71ffad037c..4cceafb0732d 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); } static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..9f353f7f41ca 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); RETURN(); } @@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } -- cgit v1.2.3 From 46a15bc3ec425b546d140581c28192ab7877ddc4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:01 +0100 Subject: lru_cache: allow multiple changes per transaction Allow multiple changes to the active set of elements in lru_cache. The only current user of lru_cache, drbd, is driving this generalisation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 50 ++------ drivers/block/drbd/drbd_nl.c | 4 +- include/linux/lru_cache.h | 68 +++++++---- lib/lru_cache.c | 243 +++++++++++++++++++++++++++------------ 4 files changed, 225 insertions(+), 140 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1ce3de6eed1b..44097c87fed7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; struct lc_element *tmp; - unsigned long al_flags = 0; int wake; spin_lock_irq(&mdev->al_lock); @@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return NULL; } } - al_ext = lc_get(mdev->act_log, enr); - al_flags = mdev->act_log->flags; + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - - /* - if (!al_ext) { - if (al_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); - if (al_flags & LC_DIRTY) - dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); - } - */ - return al_ext; } @@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); - lc_changed(mdev->act_log, al_ext); + lc_committed(mdev->act_log); spin_unlock_irq(&mdev->al_lock); wake_up(&mdev->al_wait); } @@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) struct lc_element *al_ext; int i; - D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { al_ext = lc_element_by_index(mdev->act_log, i); @@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, } ext->rs_left = rs_left; ext->rs_failed = success ? 0 : count; - lc_changed(mdev->resync, &ext->lce); + /* we don't keep a persistent log of the resync lru, + * we can commit any change right away. */ + lc_committed(mdev->resync); } lc_put(mdev->resync, &ext->lce); /* no race, we are within the al_lock! */ @@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wakeup = 1; } if (bm_ext->lce.refcnt == 1) @@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); } return bm_ext; @@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; - int rv = 0; + int rv; spin_lock_irq(&mdev->al_lock); - if (unlikely(enr == mdev->act_log->new_number)) - rv = 1; - else { - al_ext = lc_find(mdev->act_log, enr); - if (al_ext) { - if (al_ext->refcnt) - rv = 1; - } - } + rv = lc_is_used(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - /* - if (unlikely(rv)) { - dev_info(DEV, "Delaying sync read until app's write is done\n"); - } - */ return rv; } @@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); goto try_again; } if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wake_up(&mdev->al_wait); D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); } @@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } check_al: for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - if (unlikely(al_enr+i == mdev->act_log->new_number)) - goto try_again; if (lc_is_used(mdev->act_log, al_enr+i)) goto try_again; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ae8f42e38e4f..0a92f5226c2a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, + n = lc_create("act_log", drbd_al_ext_cache, 1, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } resync_lru = lc_create("resync", drbd_bm_ext_cache, - 61, sizeof(struct bm_extent), + 1, 61, sizeof(struct bm_extent), offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4cceafb0732d..cbafae40c649 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -166,9 +166,11 @@ struct lc_element { /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ unsigned lc_number; - /* special label when on free list */ #define LC_FREE (~0U) + + /* for pending changes */ + unsigned lc_new_number; }; struct lru_cache { @@ -176,6 +178,7 @@ struct lru_cache { struct list_head lru; struct list_head free; struct list_head in_use; + struct list_head to_be_changed; /* the pre-created kmem cache to allocate the objects from */ struct kmem_cache *lc_cache; @@ -186,7 +189,7 @@ struct lru_cache { size_t element_off; /* number of elements (indices) */ - unsigned int nr_elements; + unsigned int nr_elements; /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. @@ -194,18 +197,19 @@ struct lru_cache { * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) + /* allow to accumulate a few (index:label) changes, + * but no more than max_pending_changes */ + unsigned int max_pending_changes; + /* number of elements currently on to_be_changed list */ + unsigned int pending_changes; + /* statistics */ - unsigned used; /* number of lelements currently on in_use list */ - unsigned long hits, misses, starving, dirty, changed; + unsigned used; /* number of elements currently on in_use list */ + unsigned long hits, misses, starving, locked, changed; /* see below: flag-bits for lru_cache */ unsigned long flags; - /* when changing the label of an index element */ - unsigned int new_number; - - /* for paranoia when changing the label of an index element */ - struct lc_element *changing_element; void *lc_private; const char *name; @@ -221,10 +225,15 @@ enum { /* debugging aid, to catch concurrent access early. * user needs to guarantee exclusive access by proper locking! */ __LC_PARANOIA, - /* if we need to change the set, but currently there is a changing - * transaction pending, we are "dirty", and must deferr further - * changing requests */ + + /* annotate that the set is "dirty", possibly accumulating further + * changes, until a transaction is finally triggered */ __LC_DIRTY, + + /* Locked, no further changes allowed. + * Also used to serialize changing transactions. */ + __LC_LOCKED, + /* if we need to change the set, but currently there is no free nor * unused element available, we are "starving", and must not give out * further references, to guarantee that eventually some refcnt will @@ -236,9 +245,11 @@ enum { }; #define LC_PARANOIA (1<<__LC_PARANOIA) #define LC_DIRTY (1<<__LC_DIRTY) +#define LC_LOCKED (1<<__LC_LOCKED) #define LC_STARVING (1<<__LC_STARVING) extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); @@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); +extern void lc_committed(struct lru_cache *lc); struct seq_file; extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); @@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char void (*detail) (struct seq_file *, struct lc_element *)); /** - * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set * @lc: the lru cache to operate on * - * Note that the reference counts and order on the active and lru lists may - * still change. Returns true if we acquired the lock. + * Allows (expects) the set to be "dirty". Note that the reference counts and + * order on the active and lru lists may still change. Used to serialize + * changing transactions. Returns true if we aquired the lock. */ -static inline int lc_try_lock(struct lru_cache *lc) +static inline int lc_try_lock_for_transaction(struct lru_cache *lc) { - return !test_and_set_bit(__LC_DIRTY, &lc->flags); + return !test_and_set_bit(__LC_LOCKED, &lc->flags); } +/** + * lc_try_lock - variant to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Note that the reference counts and order on the active and lru lists may + * still change. Only works on a "clean" set. Returns true if we aquired the + * lock, which means there are no pending changes, and any further attempt to + * change the set will not succeed until the next lc_unlock(). + */ +extern int lc_try_lock(struct lru_cache *lc); + /** * lc_unlock - unlock @lc, allow lc_get() to change the set again * @lc: the lru cache to operate on */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit_unlock(__LC_DIRTY, &lc->flags); + clear_bit(__LC_DIRTY, &lc->flags); + clear_bit_unlock(__LC_LOCKED, &lc->flags); } -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} +extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 17621684758a..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); BUG_ON(i >= lc_->nr_elements); \ BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/* We need to atomically + * - try to grab the lock (set LC_LOCKED) + * - only if there is no pending transaction + * (neither LC_DIRTY nor LC_STARVING is set) + * Because of PARANOIA_ENTRY() above abusing lc->flags as well, + * it is not sufficient to just say + * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); + */ +int lc_try_lock(struct lru_cache *lc) +{ + unsigned long val; + do { + val = cmpxchg(&lc->flags, 0, LC_LOCKED); + } while (unlikely (val == LC_PARANOIA)); + /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ + return 0 == val; +#if 0 + /* Alternative approach, spin in case someone enters or leaves a + * PARANOIA_ENTRY()/RETURN() section. */ + unsigned long old, new, val; + do { + old = lc->flags & LC_PARANOIA; + new = old | LC_LOCKED; + val = cmpxchg(&lc->flags, old, new); + } while (unlikely (val == (old ^ LC_PARANOIA))); + return old == val; +#endif +} + /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects * @e_off: offset to the &struct lc_element member in a tracked object @@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); * or NULL on (allocation) failure. */ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off) { struct hlist_head *slot = NULL; @@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->name = name; lc->element_size = e_size; lc->element_off = e_off; lc->nr_elements = e_count; - lc->new_number = LC_FREE; + lc->max_pending_changes = max_pending_changes; lc->lc_cache = cache; lc->lc_element = element; lc->lc_slot = slot; @@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, e = p + e_off; e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); element[i] = e; } @@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->used = 0; lc->hits = 0; lc->misses = 0; lc->starving = 0; - lc->dirty = 0; + lc->locked = 0; lc->changed = 0; + lc->pending_changes = 0; lc->flags = 0; - lc->changing_element = NULL; - lc->new_number = LC_FREE; memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); for (i = 0; i < lc->nr_elements; i++) { @@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) /* re-init it */ e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); } } @@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) /* NOTE: * total calls to lc_get are * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in + * misses include "locked" count (update from an other thread in * progress) and "changed", when this in fact lead to an successful * update of the cache. */ return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) @@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) } -/** - * lc_find - find element by label, if present in the hash table - * @lc: The lru_cache object - * @enr: element number - * - * Returns the pointer to an element, if the element with the requested - * "label" or element number is present in the hash table, - * or NULL if not found. Does not change the refcnt. - */ -struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, + bool include_changing) { struct hlist_node *n; struct lc_element *e; @@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) BUG_ON(!lc); BUG_ON(!lc->nr_elements); hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { - if (e->lc_number == enr) + /* "about to be changed" elements, pending transaction commit, + * are hashed by their "new number". "Normal" elements have + * lc_number == lc_new_number. */ + if (e->lc_new_number != enr) + continue; + if (e->lc_new_number == e->lc_number || include_changing) return e; + break; } return NULL; } -/* returned element will be "recycled" immediately */ -static struct lc_element *lc_evict(struct lru_cache *lc) +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + * Ignores elements that are "about to be used", i.e. not yet in the active + * set, but still pending transaction commit. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) { - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - PARANOIA_LC_ELEMENT(lc, e); + return __lc_find(lc, enr, 0); +} - list_del(&e->list); - hlist_del(&e->colision); - return e; +/** + * lc_is_used - find element by label + * @lc: The lru_cache object + * @enr: element number + * + * Returns true, if the element with the requested "label" or element number is + * present in the hash table, and is used (refcnt > 0). + * Also finds elements that are not _currently_ used but only "about to be + * used", i.e. on the "to_be_changed" list, pending transaction commit. + */ +bool lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = __lc_find(lc, enr, 1); + return e && e->refcnt; } /** @@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - e->lc_number = LC_FREE; + e->lc_number = e->lc_new_number = LC_FREE; hlist_del_init(&e->colision); list_move(&e->list, &lc->free); RETURN(); } -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) { struct list_head *n; + struct lc_element *e; + + if (!list_empty(&lc->free)) + n = lc->free.next; + else if (!list_empty(&lc->lru)) + n = lc->lru.prev; + else + return NULL; + + e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); - if (list_empty(&lc->free)) - return lc_evict(lc); + e->lc_new_number = new_number; + if (!hlist_unhashed(&e->colision)) + __hlist_del(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + list_move(&e->list, &lc->to_be_changed); - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); + return e; } static int lc_unused_element_available(struct lru_cache *lc) @@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - e = lc_find(lc, enr); - if (e) { + e = __lc_find(lc, enr, 1); + /* if lc_new_number != lc_number, + * this enr is currently being pulled in already, + * and will be available once the pending transaction + * has been committed. */ + if (e && e->lc_new_number == e->lc_number) { ++lc->hits; if (e->refcnt++ == 0) lc->used++; @@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool if (!may_change) RETURN(NULL); + /* It has been found above, but on the "to_be_changed" list, not yet + * committed. Don't pull it in twice, wait for the transaction, then + * try again */ + if (e) + RETURN(NULL); + + /* To avoid races with lc_try_lock(), first, mark us dirty + * (using test_and_set_bit, as it implies memory barriers), ... */ + test_and_set_bit(__LC_DIRTY, &lc->flags); + + /* ... only then check if it is locked anyways. If lc_unlock clears + * the dirty bit again, that's not a problem, we will come here again. + */ + if (test_bit(__LC_LOCKED, &lc->flags)) { + ++lc->locked; + RETURN(NULL); + } + /* In case there is nothing available and we can not kick out * the LRU element, we have to wait ... */ @@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; + /* It was not present in the active set. We are going to recycle an + * unused (or even "free") element, but we won't accumulate more than + * max_pending_changes changes. */ + if (lc->pending_changes >= lc->max_pending_changes) RETURN(NULL); - } - e = lc_get_unused_element(lc); + e = lc_prepare_for_change(lc, enr); BUG_ON(!e); clear_bit(__LC_STARVING, &lc->flags); BUG_ON(++e->refcnt != 1); lc->used++; - - lc->changing_element = e; - lc->new_number = enr; + lc->pending_changes++; RETURN(e); } @@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * pointer to an UNUSED element with some different element number, * where that different number may also be %LC_FREE. * - * In this case, the cache is marked %LC_DIRTY (blocking further changes), - * and the returned element pointer is removed from the lru list and - * hash collision chains. The user now should do whatever housekeeping - * is necessary. - * Then he must call lc_changed(lc,element_pointer), to finish - * the change. + * In this case, the cache is marked %LC_DIRTY, + * so lc_try_lock() will no longer succeed. + * The returned element pointer is moved to the "to_be_changed" list, + * and registered with the new element number on the hash collision chains, + * so it is possible to pick it up from lc_is_used(). + * Up to "max_pending_changes" (see lc_create()) can be accumulated. + * The user now should do whatever housekeeping is necessary, + * typically serialize on lc_try_lock_for_transaction(), then call + * lc_committed(lc) and lc_unlock(), to finish the change. * * NOTE: The user needs to check the lc_number on EACH use, so he recognizes * any cache set change. @@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) } /** - * lc_changed - tell @lc that the change has been recorded + * lc_committed - tell @lc that pending changes have been recorded * @lc: the lru cache to operate on - * @e: the element pending label change + * + * User is expected to serialize on explicit lc_try_lock_for_transaction() + * before the transaction is started, and later needs to lc_unlock() explicitly + * as well. */ -void lc_changed(struct lru_cache *lc, struct lc_element *e) +void lc_committed(struct lru_cache *lc) { + struct lc_element *e, *tmp; + PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - PARANOIA_LC_ELEMENT(lc, e); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = LC_FREE; - clear_bit_unlock(__LC_DIRTY, &lc->flags); + list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { + /* count number of changes, not number of transactions */ + ++lc->changed; + e->lc_number = e->lc_new_number; + list_move(&e->list, &lc->in_use); + } + lc->pending_changes = 0; RETURN(); } @@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) PARANOIA_ENTRY(); PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); + BUG_ON(e->lc_number != e->lc_new_number); if (--e->refcnt == 0) { /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); @@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; + struct list_head *lh; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); - e->lc_number = enr; + BUG_ON(e->lc_number != e->lc_new_number); + BUG_ON(e->refcnt != 0); + e->lc_number = e->lc_new_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); + if (enr == LC_FREE) + lh = &lc->free; + else { + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + lh = &lc->lru; + } + list_move(&e->list, lh); } /** @@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); -EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); +EXPORT_SYMBOL(lc_try_lock); +EXPORT_SYMBOL(lc_is_used); -- cgit v1.2.3 From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:03 +0100 Subject: drbd: new on-disk activity log transaction format Use a new on-disk transaction format for the activity log, which allows for multiple changes to the active set per transaction. Using 4k transaction blocks, we can now get rid of the work-around code to deal with devices not supporting 512 byte logical block size. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 409 ++++++++++++++++++++++++--------------- drivers/block/drbd/drbd_int.h | 44 +++-- drivers/block/drbd/drbd_main.c | 4 - drivers/block/drbd/drbd_nl.c | 42 +--- include/linux/drbd.h | 4 + include/linux/drbd_limits.h | 8 +- 6 files changed, 302 insertions(+), 209 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 44097c87fed7..ea3895de4e6d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -24,21 +24,67 @@ */ #include +#include #include +#include +#include #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial checksum in our on disk activity log. - * With that we can ensure correct operation even when the storage - * device might do a partial (last) sector write while losing power. - */ -struct __packed al_transaction { - u32 magic; - u32 tr_number; - struct __packed { - u32 pos; - u32 extent; } updates[1 + AL_EXTENTS_PT]; - u32 xor_sum; +/* all fields on disc in big endian */ +struct __packed al_transaction_on_disk { + /* don't we all like magic */ + __be32 magic; + + /* to identify the most recent transaction block + * in the on disk ring buffer */ + __be32 tr_number; + + /* checksum on the full 4k block, with this field set to 0. */ + __be32 crc32c; + + /* type of transaction, special transaction types like: + * purge-all, set-all-idle, set-all-active, ... to-be-defined */ + __be16 transaction_type; + + /* we currently allow only a few thousand extents, + * so 16bit will be enough for the slot number. */ + + /* how many updates in this transaction */ + __be16 n_updates; + + /* maximum slot number, "al-extents" in drbd.conf speak. + * Having this in each transaction should make reconfiguration + * of that parameter easier. */ + __be16 context_size; + + /* slot number the context starts with */ + __be16 context_start_slot_nr; + + /* Some reserved bytes. Expected usage is a 64bit counter of + * sectors-written since device creation, and other data generation tag + * supporting usage */ + __be32 __reserved[4]; + + /* --- 36 byte used --- */ + + /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes + * in one transaction, then use the remaining byte in the 4k block for + * context information. "Flexible" number of updates per transaction + * does not help, as we have to account for the case when all update + * slots are used anyways, so it would only complicate code without + * additional benefit. + */ + __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; + + /* but the extent number is 32bit, which at an extent size of 4 MiB + * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ + __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; + + /* --- 420 bytes used (36 + 64*6) --- */ + + /* 4096 - 420 = 3676 = 919 * 4 */ + __be32 context[AL_CONTEXT_PER_TRANSACTION]; }; struct update_odbm_work { @@ -48,11 +94,8 @@ struct update_odbm_work { struct update_al_work { struct drbd_work w; - struct lc_element *al_ext; struct completion event; - unsigned int enr; - /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ - unsigned int old_enr; + int err; }; struct drbd_atodb_wait { @@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int logical_block_size, mask, ok; - int offset = 0; + int ok; struct page *iop = mdev->md_io_page; D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); BUG_ON(!bdev->md_bdev); - logical_block_size = bdev_logical_block_size(bdev->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - /* in case logical_block_size != 512 [ s390 only? ] */ - if (logical_block_size != MD_SECTOR_SIZE) { - mask = (logical_block_size / MD_SECTOR_SIZE) - 1; - D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); - offset = sector & mask; - sector = sector & ~mask; - iop = mdev->md_io_tmpp; - - if (rw & WRITE) { - /* these are GFP_KERNEL pages, pre-allocated - * on device initialization */ - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, - READ, logical_block_size); - - if (unlikely(!ok)) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [logical_block_size!=512]) failed!\n", - (unsigned long long)sector); - return 0; - } - - memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); - } - } + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); if (sector < drbd_md_first_sector(bdev) || - sector > drbd_md_last_sector(bdev)) + sector + 7 > drbd_md_last_sector(bdev)) dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); - } - return ok; } @@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) * current->bio_tail list now. * we have to delegate updates to the activity log * to the worker thread. */ - init_completion(&al_work.event); - al_work.al_ext = al_ext; - al_work.enr = enr; - al_work.old_enr = al_ext->lc_number; - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); - wait_for_completion(&al_work.event); - mdev->al_writ_cnt++; + /* Serialize multiple transactions. + * This uses test_and_set_bit, memory barrier is implicit. + * Optimization potential: + * first check for transaction number > old transaction number, + * so not all waiters have to lock/unlock. */ + wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log)); - spin_lock_irq(&mdev->al_lock); - lc_committed(mdev->act_log); - spin_unlock_irq(&mdev->al_lock); + /* Double check: it may have been committed by someone else, + * while we have been waiting for the lock. */ + if (al_ext->lc_number != enr) { + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + /* FIXME + if (al_work.err) + we need an "lc_cancel" here; + */ + lc_committed(mdev->act_log); + spin_unlock_irq(&mdev->al_lock); + } + lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); } } @@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct drbd_conf *mdev = w->mdev; - struct lc_element *updated = aw->al_ext; - const unsigned int new_enr = aw->enr; - const unsigned int evicted = aw->old_enr; - struct al_transaction *buffer; + struct al_transaction_on_disk *buffer; + struct lc_element *e; sector_t sector; - int i, n, mx; - unsigned int extent_nr; - u32 xor_sum = 0; + int i, mx; + unsigned extent_nr; + unsigned crc = 0; if (!get_ldev(mdev)) { - dev_err(DEV, - "disk is %s, cannot start al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + dev_err(DEV, "disk is %s, cannot start al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); return 1; } - /* do we have to do a bitmap write, first? - * TODO reduce maximum latency: - * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. - * For now, we must not write the transaction, - * if we cannot write out the bitmap of the evicted extent. */ - if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) - drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, - "disk is %s, cannot write al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + "disk is %s, cannot write al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = (struct al_transaction *)page_address(mdev->md_io_page); + buffer = page_address(mdev->md_io_page); - buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + memset(buffer, 0, sizeof(*buffer)); + buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); - n = lc_index_of(mdev->act_log, updated); + i = 0; + + /* Even though no one can start to change this list + * once we set the LC_LOCKED -- from drbd_al_begin_io(), + * lc_try_lock_for_transaction() --, someone may still + * be in the process of changing it. */ + spin_lock_irq(&mdev->al_lock); + list_for_each_entry(e, &mdev->act_log->to_be_changed, list) { + if (i == AL_UPDATES_PER_TRANSACTION) { + i++; + break; + } + buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); + buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); + if (e->lc_number != LC_FREE) + drbd_bm_mark_for_writeout(mdev, + al_extent_to_bm_page(e->lc_number)); + i++; + } + spin_unlock_irq(&mdev->al_lock); + BUG_ON(i > AL_UPDATES_PER_TRANSACTION); - buffer->updates[0].pos = cpu_to_be32(n); - buffer->updates[0].extent = cpu_to_be32(new_enr); + buffer->n_updates = cpu_to_be16(i); + for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { + buffer->update_slot_nr[i] = cpu_to_be16(-1); + buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); + } - xor_sum ^= new_enr; + buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle); - mx = min_t(int, AL_EXTENTS_PT, + mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; - buffer->updates[i+1].pos = cpu_to_be32(idx); - buffer->updates[i+1].extent = cpu_to_be32(extent_nr); - xor_sum ^= extent_nr; + buffer->context[i] = cpu_to_be32(extent_nr); } - for (; i < AL_EXTENTS_PT; i++) { - buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); - buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); - xor_sum ^= LC_FREE; - } - mdev->al_tr_cycle += AL_EXTENTS_PT; + for (; i < AL_CONTEXT_PER_TRANSACTION; i++) + buffer->context[i] = cpu_to_be32(LC_FREE); + + mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - buffer->xor_sum = cpu_to_be32(xor_sum); - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset + mdev->al_tr_pos; - - if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) - drbd_chk_io_error(mdev, 1, true); + + mdev->ldev->md.al_offset + + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + crc = crc32c(0, buffer, 4096); + buffer->crc32c = cpu_to_be32(crc); - D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); - mdev->al_tr_number++; + if (drbd_bm_write_hinted(mdev)) + aw->err = -EIO; + /* drbd_chk_io_error done already */ + else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + aw->err = -EIO; + drbd_chk_io_error(mdev, 1, true); + } else { + /* advance ringbuffer position and transaction counter */ + mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); + mdev->al_tr_number++; + } mutex_unlock(&mdev->md_io_mutex); - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } +/* FIXME + * reading of the activity log, + * and potentially dirtying of the affected bitmap regions, + * should be done from userland only. + * DRBD would simply always attach with an empty activity log, + * and refuse to attach to something that looks like a crashed primary. + */ + /** * drbd_al_read_tr() - Read a single transaction from the on disk activity log * @mdev: DRBD device. @@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused) */ static int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - struct al_transaction *b, int index) { + struct al_transaction_on_disk *b = page_address(mdev->md_io_page); sector_t sector; - int rv, i; - u32 xor_sum = 0; + u32 crc; - sector = bdev->md.md_offset + bdev->md.al_offset + index; + sector = bdev->md.md_offset + + bdev->md.al_offset + + index * (MD_BLOCK_SIZE>>9); /* Dont process error normally, * as this is done before disk is attached! */ if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); + if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) + return 0; + + if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) + return 0; - for (i = 0; i < AL_EXTENTS_PT + 1; i++) - xor_sum ^= be32_to_cpu(b->updates[i].extent); - rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) + return 0; - return rv; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) + return 0; + + crc = be32_to_cpu(b->crc32c); + b->crc32c = 0; + if (!expect(crc == crc32c(0, b, 4096))) + return 0; + + return 1; } /** @@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, */ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - struct al_transaction *buffer; + struct al_transaction_on_disk *b; int i; int rv; int mx; @@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) u32 to_tnr = 0; u32 cnr; - mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + /* Note that this is expected to be called with a newly created, + * clean and all unused activity log of the "expected size". + */ /* lock out all other meta data io for now, * and make sure the page is mapped. */ mutex_lock(&mdev->md_io_mutex); - buffer = page_address(mdev->md_io_page); + b = page_address(mdev->md_io_page); + + /* Always use the full ringbuffer space for now. + * possible optimization: read in all of it, + * then scan the in-memory pages. */ + + mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* Find the valid transaction in the log */ - for (i = 0; i <= mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + for (i = 0; i < mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, i); + /* invalid data in that block */ if (rv == 0) continue; + + /* IO error */ if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; } - cnr = be32_to_cpu(buffer->tr_number); + cnr = be32_to_cpu(b->tr_number); if (++found_valid == 1) { from = i; to = i; @@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) to_tnr = cnr; continue; } + + D_ASSERT(cnr != to_tnr); + D_ASSERT(cnr != from_tnr); if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx+1); + D_ASSERT(from_tnr - cnr + i - from == mx); from = i; from_tnr = cnr; } @@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ i = from; while (1) { - int j, pos; - unsigned int extent_nr; - unsigned int trn; + struct lc_element *e; + unsigned j, n, slot, extent_nr; - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + rv = drbd_al_read_tr(mdev, bdev, i); if (!expect(rv != 0)) goto cancel; if (rv == -1) { @@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) return 0; } - trn = be32_to_cpu(buffer->tr_number); + /* deal with different transaction types. + * not yet implemented */ + if (!expect(b->transaction_type == 0)) + goto cancel; - spin_lock_irq(&mdev->al_lock); + /* on the fly re-create/resize activity log? + * will be a special transaction type flag. */ + if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) + goto cancel; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) + goto cancel; - /* This loop runs backwards because in the cyclic - elements there might be an old version of the - updated element (in slot 0). So the element in slot 0 - can overwrite old versions. */ - for (j = AL_EXTENTS_PT; j >= 0; j--) { - pos = be32_to_cpu(buffer->updates[j].pos); - extent_nr = be32_to_cpu(buffer->updates[j].extent); + /* We are the only user of the activity log right now, + * don't actually need to take that lock. */ + spin_lock_irq(&mdev->al_lock); - if (extent_nr == LC_FREE) - continue; + /* first, apply the context, ... */ + for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); + j < AL_CONTEXT_PER_TRANSACTION && + slot < mdev->act_log->nr_elements; j++, slot++) { + extent_nr = be32_to_cpu(b->context[j]); + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); + } - lc_set(mdev->act_log, extent_nr, pos); - active_extents++; + /* ... then apply the updates, + * which override the context information. + * drbd_al_read_tr already did the rangecheck + * on n <= AL_UPDATES_PER_TRANSACTION */ + n = be16_to_cpu(b->n_updates); + for (j = 0; j < n; j++) { + slot = be16_to_cpu(b->update_slot_nr[j]); + extent_nr = be32_to_cpu(b->update_extent_nr[j]); + if (!expect(slot < mdev->act_log->nr_elements)) + break; + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); } spin_unlock_irq(&mdev->al_lock); @@ -514,15 +614,12 @@ cancel: if (i == to) break; i++; - if (i > mx) + if (i >= mx) i = 0; } mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = to; - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index edfdeb62c18f..3213808a898a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1069,7 +1069,6 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev); * either at the end of the backing device * or on a separate meta data device. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ /* The following numbers are sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ -/* Allows up to about 3.8TB */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) - -/* Since the smalles IO unit is usually 512 byte */ -#define MD_SECTOR_SHIFT 9 -#define MD_SECTOR_SIZE (1<ldev); mdev->ldev = NULL;); - if (mdev->md_io_tmpp) { - __free_page(mdev->md_io_tmpp); - mdev->md_io_tmpp = NULL; - } clear_bit(GO_DISKLESS, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0a92f5226c2a..90d731723205 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, case DRBD_MD_INDEX_FLEX_INT: bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ - bdev->md.al_offset = -MD_AL_MAX_SIZE; + bdev->md.al_offset = -MD_AL_SECTORS; /* we need (slightly less than) ~ this much bitmap sectors: */ md_size_sect = drbd_get_capacity(bdev->backing_bdev); md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); @@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= 7)) - mdev->sync_conf.al_extents = 127; + if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && mdev->act_log->nr_elements == mdev->sync_conf.al_extents) @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, 1, + n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; enum drbd_state_rv rv; int cp_discovered = 0; - int logical_block_size; drbd_reconfig_start(mdev); @@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); - /* allocate a second IO page if logical_block_size != 512 */ - logical_block_size = bdev_logical_block_size(nbc->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - if (logical_block_size != MD_SECTOR_SIZE) { - if (!mdev->md_io_tmpp) { - struct page *page = alloc_page(GFP_NOIO); - if (!page) - goto force_diskless_dec; - - dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", - logical_block_size, MD_SECTOR_SIZE); - dev_warn(DEV, "Workaround engaged (has performance impact).\n"); - - mdev->md_io_tmpp = page; - } - } - if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.rate >= 1)) sc.rate = 1; - if (!expect(sc.al_extents >= 7)) - sc.al_extents = 127; /* arbitrary minimum */ -#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) - if (sc.al_extents > AL_MAX) { - dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); - sc.al_extents = AL_MAX; - } -#undef AL_MAX + + /* clip to allowed range */ + if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) + sc.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) + sc.al_extents = DRBD_AL_EXTENTS_MAX; /* to avoid spurious errors when configuring minors before configuring * the minors they depend on: if necessary, first create the minor we diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 35fc08a0a552..70a688b92c1b 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -336,6 +336,10 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +/* how I came up with this magic? + * base64 decode "actlog==" ;) */ +#define DRBD_AL_MAGIC 0x69cb65a2 + /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 #define DRBD_MD_INDEX_FLEX_EXT -2 diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c36752385..75f05af33725 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -102,10 +102,12 @@ #define DRBD_RATE_DEF 250 /* kb/second */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 #define DRBD_AFTER_MIN -1 -- cgit v1.2.3 From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 15:38:08 +0100 Subject: drbd: Preparing the connector interface to operator on connections Up to now it only operated on minor numbers. Now it can work also on named connections. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 15 +++++++ drivers/block/drbd/drbd_nl.c | 96 +++++++++++++++++++++++++++--------------- include/linux/drbd.h | 19 +++++++-- 4 files changed, 94 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 48367e53a7a5..033af1995867 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); +struct drbd_tconn *conn_by_name(const char *name); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cbec5ff2cc74..4761426f9ad7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) INIT_LIST_HEAD(&wq->q); } +struct drbd_tconn *conn_by_name(const char *name) +{ + struct drbd_tconn *tconn; + + write_lock_irq(&global_state_lock); + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (!strcmp(tconn->name, name)) + goto found; + } + tconn = NULL; +found: + write_unlock_irq(&global_state_lock); + return tconn; +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b141f891f643..27a43d138f6b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2184,42 +2184,57 @@ out: return 0; } +enum cn_handler_type { + CHT_MINOR, + CHT_CONN, + CHT_CTOR, + /* CHT_RES, later */ +}; + struct cn_handler_struct { - int (*function)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); + enum cn_handler_type type; + union { + int (*minor_based)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*conn_based)(struct drbd_tconn *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*constructor)(struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + }; int reply_body_size; }; static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { &drbd_nl_primary, 0 }, - [ P_secondary ] = { &drbd_nl_secondary, 0 }, - [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, - [ P_detach ] = { &drbd_nl_detach, 0 }, - [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, - [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, - [ P_resize ] = { &drbd_nl_resize, 0 }, - [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, - [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, - [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, - [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, - [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, - [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, - [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, - [ P_outdate ] = { &drbd_nl_outdate, 0 }, - [ P_get_config ] = { &drbd_nl_get_config, + [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, + [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, + [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, + [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, + [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, + [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, + [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, + [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, + [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, + [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, + [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, + [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, + [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, + [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, sizeof(struct syncer_conf_tag_len_struct) + sizeof(struct disk_conf_tag_len_struct) + sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { &drbd_nl_get_state, + [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, sizeof(struct get_state_tag_len_struct) + sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { &drbd_nl_get_uuids, + [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, - [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, + [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, + [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) @@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; struct drbd_conf *mdev; + struct drbd_tconn *tconn; int retcode, rr; int reply_size = sizeof(struct cn_msg) + sizeof(struct drbd_nl_cfg_reply) @@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms goto fail; } - mdev = ensure_mdev(nlp->drbd_minor, - (nlp->flags & DRBD_NL_CREATE_DEVICE)); - if (!mdev) { - retcode = ERR_MINOR_INVALID; - goto fail; - } - if (nlp->packet_type >= P_nl_after_last_packet || nlp->packet_type == P_return_code_only) { retcode = ERR_PACKET_NR; @@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms cm = cnd_table + nlp->packet_type; /* This may happen if packet number is 0: */ - if (cm->function == NULL) { + if (cm->minor_based == NULL) { retcode = ERR_PACKET_NR; goto fail; } @@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->function. */ - rr = cm->function(mdev, nlp, reply); + retcode = ERR_MINOR_INVALID; + rr = 0; + switch (cm->type) { + case CHT_MINOR: + mdev = minor_to_mdev(nlp->drbd_minor); + if (!mdev) + goto fail; + rr = cm->minor_based(mdev, nlp, reply); + break; + case CHT_CONN: + tconn = conn_by_name(nlp->obj_name); + if (!tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto fail; + } + rr = cm->conn_based(tconn, nlp, reply); + break; + case CHT_CTOR: + rr = cm->constructor(nlp, reply); + break; + /* case CHT_RES: */ + } cn_reply->id = req->id; cn_reply->seq = req->seq; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 70a688b92c1b..7683b4ab6583 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,6 +155,7 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, + ERR_CONN_NOT_KNOWN = 158, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -347,8 +348,11 @@ enum drbd_timeout_flag { /* Start of the new netlink/connector stuff */ -#define DRBD_NL_CREATE_DEVICE 0x01 -#define DRBD_NL_SET_DEFAULTS 0x02 +enum drbd_ncr_flags { + DRBD_NL_CREATE_DEVICE = 0x01, + DRBD_NL_SET_DEFAULTS = 0x02, +}; +#define DRBD_NL_OBJ_NAME_LEN 32 /* For searching a vacant cn_idx value */ @@ -356,8 +360,15 @@ enum drbd_timeout_flag { struct drbd_nl_cfg_req { int packet_type; - unsigned int drbd_minor; - int flags; + union { + struct { + unsigned int drbd_minor; + enum drbd_ncr_flags flags; + }; + struct { + char obj_name[DRBD_NL_OBJ_NAME_LEN]; + }; + }; unsigned short tag_list[]; }; -- cgit v1.2.3 From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 02:07:03 -0500 Subject: drbd: Implemented new commands to create/delete connections/minors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 68 ++++++++++++++------------ drivers/block/drbd/drbd_nl.c | 106 +++++++++++++++++++++++++---------------- include/linux/drbd.h | 3 ++ include/linux/drbd_nl.h | 12 +++++ 5 files changed, 120 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a27e2a4e038d..535d503886d8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); - /* Meta data layout We reserve a 128MB Block (4k aligned) * either at the end of the backing device @@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); -extern struct drbd_conf *drbd_new_device(unsigned int minor); +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_free_mdev(struct drbd_conf *mdev); +extern void drbd_delete_device(unsigned int minor); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2bfd63058f40..ec7d0d98657c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas return thi ? thi->name : task->comm; } -#ifdef CONFIG_SMP int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; - idr_get_next(&tconn->volumes, &minor); + + if (!idr_get_next(&tconn->volumes, &minor)) + return -1; return minor; } + +#ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev) dev_err(DEV, "%d EEs in net list found!\n", rr); } -/* caution. no locking. - * currently only used from module cleanup code. */ -static void drbd_delete_device(unsigned int minor) +/* caution. no locking. */ +void drbd_delete_device(unsigned int minor) { struct drbd_conf *mdev = minor_to_mdev(minor); if (!mdev) return; + idr_remove(&mdev->tconn->volumes, minor); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); - drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) + goto fail; + if (!tl_init(tconn)) goto fail; @@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) fail: tl_cleanup(tconn); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn); @@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); idr_destroy(&tconn->volumes); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); @@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn) kfree(tconn); } -struct drbd_conf *drbd_new_device(unsigned int minor) +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) { struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; - char conn_name[9]; /* drbd1234N */ - int vnr; + int vnr_got = vnr; + + mdev = minor_to_mdev(minor); + if (mdev) + return ERR_MINOR_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) - return NULL; - sprintf(conn_name, "drbd%d", minor); - mdev->tconn = drbd_new_tconn(conn_name); - if (!mdev->tconn) - goto out_no_tconn; - if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) - goto out_no_cpumask; - if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) - goto out_no_cpumask; - if (vnr != 0) { - dev_err(DEV, "vnr = %d\n", vnr); - goto out_no_cpumask; - } - if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) - goto out_no_cpumask; + return ERR_NOMEM; + + mdev->tconn = tconn; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_no_q; + } mdev->minor = minor; @@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor) INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - return mdev; + minor_table[minor] = mdev; + add_disk(disk); + + return NO_ERROR; /* out_whatever_else: kfree(mdev->current_epoch); */ @@ -2367,12 +2377,10 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->tconn->cpu_mask); -out_no_cpumask: - drbd_free_tconn(mdev->tconn); -out_no_tconn: + idr_remove(&tconn->volumes, vnr_got); +out_no_idr: kfree(mdev); - return NULL; + return ERR_NOMEM; } /* counterpart of drbd_new_device. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 455a51dd364d..f2739fd188a0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) return rv; } -static struct drbd_conf *ensure_mdev(int minor, int create) -{ - struct drbd_conf *mdev; - - if (minor >= minor_count) - return NULL; - - mdev = minor_to_mdev(minor); - - if (!mdev && create) { - struct gendisk *disk = NULL; - mdev = drbd_new_device(minor); - - spin_lock_irq(&drbd_pp_lock); - if (minor_table[minor] == NULL) { - minor_table[minor] = mdev; - disk = mdev->vdisk; - mdev = NULL; - } /* else: we lost the race */ - spin_unlock_irq(&drbd_pp_lock); - - if (disk) /* we won the race above */ - /* in case we ever add a drbd_delete_device(), - * don't forget the del_gendisk! */ - add_disk(disk); - else /* we lost the race above */ - drbd_free_mdev(mdev); - - mdev = minor_to_mdev(minor); - } - - return mdev; -} - static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) sc.al_extents = DRBD_AL_EXTENTS_MAX; - /* to avoid spurious errors when configuring minors before configuring - * the minors they depend on: if necessary, first create the minor we - * depend on */ - if (sc.after >= 0) - ensure_mdev(sc.after, 1); - /* most sanity checks done, try to assign the new sync-after * dependency. need to hold the global lock in there, * to avoid a race in the dependency loop check. */ @@ -2184,13 +2144,73 @@ out: return 0; } +static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_connection args; + + if (!new_connection_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = NO_ERROR; + if (!drbd_new_tconn(args.name)) + reply->ret_code = ERR_NOMEM; + + return 0; +} + +static int drbd_nl_new_minor(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_minor args; + + args.vol_nr = 0; + args.minor = 0; + + if (!new_minor_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); + + return 0; +} + +static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_MINOR_CONFIGURED; + } + return 0; +} + +static int drbd_nl_del_conn(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + if (conn_lowest_minor(tconn) < 0) { + drbd_free_tconn(tconn); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_CONN_IN_USE; + } + + return 0; +} + enum cn_handler_type { CHT_MINOR, CHT_CONN, CHT_CTOR, /* CHT_RES, later */ }; - struct cn_handler_struct { enum cn_handler_type type; union { @@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = { sizeof(struct get_timeout_flag_tag_len_struct)}, [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, + [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, + [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, + [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, + [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7683b4ab6583..e192167e6145 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -156,6 +156,9 @@ enum drbd_ret_code { ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, ERR_CONN_NOT_KNOWN = 158, + ERR_CONN_IN_USE = 159, + ERR_MINOR_CONFIGURED = 160, + ERR_MINOR_EXISTS = 161, /* insert new ones above this line */ AFTER_LAST_ERR_CODE diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ab6159e4fcf0..1216c7a432c5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26, NL_RESPONSE(return_code_only, 27) #endif +NL_PACKET(new_connection, 28, /* CHT_CTOR */ + NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) +) + +NL_PACKET(new_minor, 29, /* CHT_CONN */ + NL_INTEGER( 86, T_MANDATORY, minor) + NL_INTEGER( 87, T_MANDATORY, vol_nr) +) + +NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ +NL_PACKET(del_connection, 31, ) /* CHT_CONN */ + #undef NL_PACKET #undef NL_INTEGER #undef NL_INT64 -- cgit v1.2.3 From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:51:43 +0100 Subject: drbd: default to detach on-io-error Old default behaviour was "pass-on", which is not useful in production at all. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 75f05af33725..22920a8af4e2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,7 +125,7 @@ #define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT -- cgit v1.2.3 From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:20:08 +0100 Subject: drbd: prepare the transition from connector to genetlink This adds the new API header and helper files. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 349 +++++++++++++++++++++++++++++++ include/linux/drbd_genl_api.h | 55 +++++ include/linux/genl_magic_func.h | 417 ++++++++++++++++++++++++++++++++++++++ include/linux/genl_magic_struct.h | 260 ++++++++++++++++++++++++ 4 files changed, 1081 insertions(+) create mode 100644 include/linux/drbd_genl.h create mode 100644 include/linux/drbd_genl_api.h create mode 100644 include/linux/genl_magic_func.h create mode 100644 include/linux/genl_magic_struct.h (limited to 'include') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h new file mode 100644 index 000000000000..84e16848f7a1 --- /dev/null +++ b/include/linux/drbd_genl.h @@ -0,0 +1,349 @@ +/* + * General overview: + * full generic netlink message: + * |nlmsghdr|genlmsghdr| + * + * payload: + * |optional fixed size family header| + * + * sequence of netlink attributes: + * I chose to have all "top level" attributes NLA_NESTED, + * corresponding to some real struct. + * So we have a sequence of |tla, len| + * + * nested nla sequence: + * may be empty, or contain a sequence of netlink attributes + * representing the struct fields. + * + * The tag number of any field (regardless of containing struct) + * will be available as T_ ## field_name, + * so you cannot have the same field name in two differnt structs. + * + * The tag numbers themselves are per struct, though, + * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, + * which we won't use here). + * The tag numbers are used as index in the respective nla_policy array. + * + * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy + * genl_magic_struct.h + * generates the struct declaration, + * generates an entry in the tla enum, + * genl_magic_func.h + * generates an entry in the static tla policy + * with .type = NLA_NESTED + * generates the static _nl_policy definition, + * and static conversion functions + * + * genl_magic_func.h + * + * GENL_mc_group(group) + * genl_magic_struct.h + * does nothing + * genl_magic_func.h + * defines and registers the mcast group, + * and provides a send helper + * + * GENL_notification(op_name, op_num, mcast_group, tla list) + * These are notifications to userspace. + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * does nothing + * + * mcast group: the name of the mcast group this notification should be + * expected on + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + * + * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" + * These are requests from userspace. + * + * _op and _notification share the same "number space", + * op_nr will be assigned to "genlmsghdr->cmd" + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * generates an entry in the static genl_ops array, + * and static register/unregister functions to + * genl_register_family_with_ops(). + * + * flags and handler: + * GENL_op_init( .doit = x, .dumpit = y, .flags = something) + * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + */ + +/* + * STRUCTS + */ + +/* this is sent kernel -> userland on various error conditions, and contains + * informational textual info, which is supposedly human readable. + * The computer relevant return code is in the drbd_genlmsghdr. + */ +GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, + /* "arbitrary" size strings, nla_policy.len = 0 */ + __str_field(1, GENLA_F_MANDATORY, info_text, 0) +) + +/* Configuration requests typically need a context to operate on. + * Possible keys are device minor (fits in the drbd_genlmsghdr), + * the replication link (aka connection) name, + * and/or the replication group (aka resource) name, + * and the volume id within the resource. */ +GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, + /* currently only 256 volumes per group, + * but maybe we still change that */ + __u32_field(1, GENLA_F_MANDATORY, ctx_volume) + __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) +) + +GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, + __u64_field(1, GENLA_F_MANDATORY, disk_size) + __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) + __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) + __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) + __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) + __u32_field(7, GENLA_F_MANDATORY, fencing) + __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(11, GENLA_F_MANDATORY, no_md_flush) + __flg_field(12, GENLA_F_MANDATORY, use_bmbv) +) + +GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, + __u32_field(1, GENLA_F_MANDATORY, rate) + __u32_field(2, GENLA_F_MANDATORY, after) + __u32_field(3, GENLA_F_MANDATORY, al_extents) + __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) + __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __flg_field(7, GENLA_F_MANDATORY, use_rle) + __u32_field(8, GENLA_F_MANDATORY, on_no_data) + __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(10, GENLA_F_MANDATORY, c_delay_target) + __u32_field(11, GENLA_F_MANDATORY, c_fill_target) + __u32_field(12, GENLA_F_MANDATORY, c_max_rate) + __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +) + +GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, + __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + shared_secret, SHARED_SECRET_MAX) + __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(4, GENLA_F_REQUIRED, my_addr, 128) + __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) + __u32_field(6, GENLA_F_REQUIRED, wire_protocol) + __u32_field(7, GENLA_F_MANDATORY, try_connect_int) + __u32_field(8, GENLA_F_MANDATORY, timeout) + __u32_field(9, GENLA_F_MANDATORY, ping_int) + __u32_field(10, GENLA_F_MANDATORY, ping_timeo) + __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(13, GENLA_F_MANDATORY, ko_count) + __u32_field(14, GENLA_F_MANDATORY, max_buffers) + __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(20, GENLA_F_MANDATORY, rr_conflict) + __u32_field(21, GENLA_F_MANDATORY, on_congestion) + __u32_field(22, GENLA_F_MANDATORY, cong_fill) + __u32_field(23, GENLA_F_MANDATORY, cong_extents) + __flg_field(24, GENLA_F_MANDATORY, two_primaries) + __flg_field(25, GENLA_F_MANDATORY, want_lose) + __flg_field(26, GENLA_F_MANDATORY, no_cork) + __flg_field(27, GENLA_F_MANDATORY, always_asbp) + __flg_field(28, GENLA_F_MANDATORY, dry_run) +) + +GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, + __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) +) + +GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, + __u64_field(1, GENLA_F_MANDATORY, resize_size) + __flg_field(2, GENLA_F_MANDATORY, resize_force) + __flg_field(3, GENLA_F_MANDATORY, no_resync) +) + +GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, + /* the reason of the broadcast, + * if this is an event triggered broadcast. */ + __u32_field(1, GENLA_F_MANDATORY, sib_reason) + __u32_field(2, GENLA_F_REQUIRED, current_state) + __u64_field(3, GENLA_F_MANDATORY, capacity) + __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + + /* These are for broadcast from after state change work. + * prev_state and new_state are from the moment the state change took + * place, new_state is not neccessarily the same as current_state, + * there may have been more state changes since. Which will be + * broadcasted soon, in their respective after state change work. */ + __u32_field(5, GENLA_F_MANDATORY, prev_state) + __u32_field(6, GENLA_F_MANDATORY, new_state) + + /* if we have a local disk: */ + __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, GENLA_F_MANDATORY, disk_flags) + __u64_field(9, GENLA_F_MANDATORY, bits_total) + __u64_field(10, GENLA_F_MANDATORY, bits_oos) + /* and in case resync or online verify is active */ + __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + + /* for pre and post notifications of helper execution */ + __str_field(13, GENLA_F_MANDATORY, helper, 32) + __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) +) + +GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, + __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) +) + +GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, + __flg_field(1, GENLA_F_MANDATORY, clear_bm) +) + +GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, + __u32_field(1, GENLA_F_REQUIRED, timeout_type) +) + +GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, + __flg_field(1, GENLA_F_MANDATORY, force_disconnect) +) + +/* + * Notifications and commands (genlmsghdr->cmd) + */ +GENL_mc_group(events) + + /* kernel -> userspace announcement of changes */ +GENL_notification( + DRBD_EVENT, 1, events, + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + + /* query kernel for specific or all info */ +GENL_op( + DRBD_ADM_GET_STATUS, 2, + GENL_op_init( + .doit = drbd_adm_get_status, + .dumpit = drbd_adm_get_status_all, + /* anyone may ask for the status, + * it is broadcasted anyways */ + ), + /* To select the object .doit. + * Or a subset of objects in .dumpit. */ + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) +) + +#if 0 + /* TO BE DONE */ + /* create or destroy resources, aka replication groups */ +GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +#endif + + /* add DRBD minor devices as volumes to resources */ +GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* add or delete replication links to resources */ +GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on replication links */ +GENL_op(DRBD_ADM_SYNCER, 9, + GENL_doit(drbd_adm_syncer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_CONNECT, 10, + GENL_doit(drbd_adm_connect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + +GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on minors */ +GENL_op(DRBD_ADM_ATTACH, 12, + GENL_doit(drbd_adm_attach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_RESIZE, 13, + GENL_doit(drbd_adm_resize), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) +) + + /* operates on all volumes within a resource */ +GENL_op( + DRBD_ADM_PRIMARY, 14, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_SECONDARY, 15, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_NEW_C_UUID, 16, + GENL_doit(drbd_adm_new_c_uuid), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_START_OV, 17, + GENL_doit(drbd_adm_start_ov), + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) +) + +GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h new file mode 100644 index 000000000000..9ef50d51e34e --- /dev/null +++ b/include/linux/drbd_genl_api.h @@ -0,0 +1,55 @@ +#ifndef DRBD_GENL_STRUCT_H +#define DRBD_GENL_STRUCT_H + +/** + * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests + * @minor: + * For admin requests (user -> kernel): which minor device to operate on. + * For (unicast) replies or informational (broadcast) messages + * (kernel -> user): which minor device the information is about. + * If we do not operate on minors, but on connections or resources, + * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT + * is used instead. + * @flags: possible operation modifiers (relevant only for user->kernel): + * DRBD_GENL_F_SET_DEFAULTS + * @volume: + * When creating a new minor (adding it to a resource), the resource needs + * to know which volume number within the resource this is supposed to be. + * The volume number corresponds to the same volume number on the remote side, + * whereas the minor number on the remote side may be different + * (union with flags). + * @ret_code: kernel->userland unicast cfg reply return code (union with flags); + */ +struct drbd_genlmsghdr { + __u32 minor; + union { + __u32 flags; + __s32 ret_code; + }; +}; + +/* To be used in drbd_genlmsghdr.flags */ +enum { + DRBD_GENL_F_SET_DEFAULTS = 1, +}; + +enum drbd_state_info_bcast_reason { + SIB_GET_STATUS_REPLY = 1, + SIB_STATE_CHANGE = 2, + SIB_HELPER_PRE = 3, + SIB_HELPER_POST = 4, + SIB_SYNC_PROGRESS = 5, +}; + +/* hack around predefined gcc/cpp "linux=1", + * we cannot possibly include <1/drbd_genl.h> */ +#undef linux + +#include +#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_FAMILY drbd +#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) +#define GENL_MAGIC_INCLUDE_FILE +#include + +#endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h new file mode 100644 index 000000000000..8a86f659d363 --- /dev/null +++ b/include/linux/genl_magic_func.h @@ -0,0 +1,417 @@ +#ifndef GENL_MAGIC_FUNC_H +#define GENL_MAGIC_FUNC_H + +#include + +/* + * Extension of genl attribute validation policies {{{1 + * {{{2 + */ + +/** + * nla_is_required - return true if this attribute is required + * @nla: netlink attribute + */ +static inline int nla_is_required(const struct nlattr *nla) +{ + return nla->nla_type & GENLA_F_REQUIRED; +} + +/** + * nla_is_mandatory - return true if understanding this attribute is mandatory + * @nla: netlink attribute + * Note: REQUIRED attributes are implicitly MANDATORY as well + */ +static inline int nla_is_mandatory(const struct nlattr *nla) +{ + return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); +} + +/* Functionality to be integrated into nla_parse(), and validate_nla(), + * respectively. + * + * Enforcing the "mandatory" bit is done here, + * by rejecting unknown mandatory attributes. + * + * Part of enforcing the "required" flag would mean to embed it into + * nla_policy.type, and extending validate_nla(), which currently does + * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, + * so we cannot do that. Thats why enforcing "required" is done in the + * generated assignment functions below. */ +static int nla_check_unknown(int maxtype, struct nlattr *head, int len) +{ + struct nlattr *nla; + int rem; + nla_for_each_attr(nla, head, len, rem) { + __u16 type = nla_type(nla); + if (type > maxtype && nla_is_mandatory(nla)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Magic: declare tla policy {{{1 + * Magic: declare nested policies + * {{{2 + */ +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + [tag_name] = { .type = NLA_NESTED }, + +static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ +{ s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type }, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ + __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, + +#include GENL_MAGIC_INCLUDE_FILE + +#ifndef __KERNEL__ +#ifndef pr_info +#define pr_info(args...) fprintf(stderr, args); +#endif +#endif + +#if 1 +static void dprint_field(const char *dir, int nla_type, + const char *name, void *valp) +{ + __u64 val = valp ? *(__u32 *)valp : 1; + switch (nla_type) { + case NLA_U8: val = (__u8)val; + case NLA_U16: val = (__u16)val; + case NLA_U32: val = (__u32)val; + pr_info("%s attr %s: %d 0x%08x\n", dir, + name, (int)val, (unsigned)val); + break; + case NLA_U64: + val = *(__u64*)valp; + pr_info("%s attr %s: %lld 0x%08llx\n", dir, + name, (long long)val, (unsigned long long)val); + break; + case NLA_FLAG: + if (val) + pr_info("%s attr %s: set\n", dir, name); + break; + } +} + +static void dprint_array(const char *dir, int nla_type, + const char *name, const char *val, unsigned len) +{ + switch (nla_type) { + case NLA_NUL_STRING: + if (len && val[len-1] == '\0') + len--; + pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); + break; + default: + /* we can always show 4 byte, + * thats what nlattr are aligned to. */ + pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", + dir, name, len, val[0], val[1], val[2], val[3]); + } +} + +#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); + +/* Name is a member field name of the struct s. + * If s is NULL (only parsing, no copy requested in *_from_attrs()), + * nla is supposed to point to the attribute containing the information + * corresponding to that struct member. */ +#define DPRINT_FIELD(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_field(dir, nla_type, #name, &s->name); \ + else if (nla) \ + dprint_field(dir, nla_type, #name, \ + (nla_type == NLA_FLAG) ? NULL \ + : nla_data(nla)); \ + } while (0) + +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_array(dir, nla_type, #name, \ + s->name, s->name ## _len); \ + else if (nla) \ + dprint_array(dir, nla_type, #name, \ + nla_data(nla), nla_len(nla)); \ + } while (0) +#else +#define DPRINT_TLA(a, op, b) do {} while (0) +#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) +#endif + +/* + * Magic: provide conversion functions {{{1 + * populate struct from attribute table: + * {{{2 + */ + +/* processing of generic netlink messages is serialized. + * use one static buffer for parsing of nested attributes */ +static struct nlattr *nested_attr_tb[128]; + +#ifndef BUILD_BUG_ON +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#endif + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + /* static, potentially unused */ \ +int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +{ \ + const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ + struct nlattr *tla = tb[tag_number]; \ + struct nlattr **ntb = nested_attr_tb; \ + struct nlattr *nla; \ + int err; \ + BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \ + if (!tla) \ + return -ENOMSG; \ + DPRINT_TLA(#s_name, "<=-", #tag_name); \ + err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + if (err) \ + return err; \ + err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ + if (err) \ + return err; \ + \ + s_fields \ + return 0; \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } + +/* validate_nla() already checked nla_len <= maxlen appropriately. */ +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name ## _len = \ + __get(s->name, nla, maxlen); \ + DPRINT_ARRAY("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } \ + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +/* + * Magic: define op number to op name mapping {{{1 + * {{{2 + */ +const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +{ + switch (cmd) { +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + case op_num: return #op_name; +#include GENL_MAGIC_INCLUDE_FILE + default: + return "unknown"; + } +} + +#ifdef __KERNEL__ +#include +/* + * Magic: define genl_ops {{{1 + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ +{ \ + handler \ + .cmd = op_name, \ + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ +}, + +#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +static struct genl_ops ZZZ_genl_ops[] __read_mostly = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +/* + * Define the genl_family, multicast groups, {{{1 + * and provide register/unregister functions. + * {{{2 + */ +#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +static struct genl_family ZZZ_genl_family __read_mostly = { + .id = GENL_ID_GENERATE, + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, +}; + +/* + * Magic: define multicast groups + * Magic: define multicast group registration helper + */ +#undef GENL_mc_group +#define GENL_mc_group(group) \ +static struct genl_multicast_group \ +CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \ + .name = #group, \ +}; \ +static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ + struct sk_buff *skb, gfp_t flags) \ +{ \ + unsigned int group_id = \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \ + if (!group_id) \ + return -EINVAL; \ + return genlmsg_multicast(skb, 0, group_id, flags); \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +{ + int err = genl_register_family_with_ops(&ZZZ_genl_family, + ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); + if (err) + return err; +#undef GENL_mc_group +#define GENL_mc_group(group) \ + err = genl_register_mc_group(&ZZZ_genl_family, \ + &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \ + if (err) \ + goto fail; \ + else \ + pr_info("%s: mcg %s: %u\n", #group, \ + __stringify(GENL_MAGIC_FAMILY), \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_mc_group +#define GENL_mc_group(group) + return 0; +fail: + genl_unregister_family(&ZZZ_genl_family); + return err; +} + +void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +{ + genl_unregister_family(&ZZZ_genl_family); +} + +/* + * Magic: provide conversion functions {{{1 + * populate skb from struct. + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \ + const bool exclude_sensitive) \ +{ \ + struct nlattr *tla = nla_nest_start(skb, tag_number); \ + if (!tla) \ + goto nla_put_failure; \ + DPRINT_TLA(#s_name, "-=>", #tag_name); \ + s_fields \ + nla_nest_end(skb, tla); \ + return 0; \ + \ +nla_put_failure: \ + if (tla) \ + nla_nest_cancel(skb, tla); \ + return -EMSGSIZE; \ +} \ +static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 0); \ +} \ +static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 1); \ +} + + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_FIELD(">>", nla_type, name, s, NULL); \ + __put(skb, attr_nr, s->name); \ + } + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ + __put(skb, attr_nr, min_t(int, maxlen, \ + s->name ## _len + (nla_type == NLA_NUL_STRING)),\ + s->name); \ + } + +#include GENL_MAGIC_INCLUDE_FILE + +#endif /* __KERNEL__ */ + +/* }}}1 */ +#endif /* GENL_MAGIC_FUNC_H */ +/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h new file mode 100644 index 000000000000..745ebfd6c7e5 --- /dev/null +++ b/include/linux/genl_magic_struct.h @@ -0,0 +1,260 @@ +#ifndef GENL_MAGIC_STRUCT_H +#define GENL_MAGIC_STRUCT_H + +#ifndef GENL_MAGIC_FAMILY +# error "you need to define GENL_MAGIC_FAMILY before inclusion" +#endif + +#ifndef GENL_MAGIC_VERSION +# error "you need to define GENL_MAGIC_VERSION before inclusion" +#endif + +#ifndef GENL_MAGIC_INCLUDE_FILE +# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" +#endif + +#include +#include + +#define CONCAT__(a,b) a ## b +#define CONCAT_(a,b) CONCAT__(a,b) + +extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); + +/* + * Extension of genl attribute validation policies {{{2 + */ + +/** + * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement + * + * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. + * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. + * + * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. + * Without this, unknown attributes (> maxtype) are _silently_ ignored + * by validate_nla(). + * + * To be used for API extensions, so older kernel can reject requests for not + * yet implemented features, if newer userland tries to use them even though + * the genl_family version clearly indicates they are not available. + * + * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. + * To be used for API extensions for things that have sane defaults, + * so newer userland can still talk to older kernel, knowing it will + * silently ignore these attributes if not yet known. + * + * NOTE: These flags overload + * NLA_F_NESTED (1 << 15) + * NLA_F_NET_BYTEORDER (1 << 14) + * from linux/netlink.h, which are not useful for validate_nla(): + * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting + * .type = NLA_NESTED in the appropriate policy. + * + * See also: nla_type() + */ +enum { + GENLA_F_MAY_IGNORE = 0, + GENLA_F_MANDATORY = 1 << 14, + GENLA_F_REQUIRED = 1 << 15, + + /* This will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb, to exclude "sensitive" + * information from broadcasts, or on unpriviledged get requests. + * This is useful because genetlink multicast groups can be listened in + * on by anyone. */ + GENLA_F_SENSITIVE = 1 << 16, +}; + +#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) + +/* }}}1 + * MAGIC + * multi-include macro expansion magic starts here + */ + +/* MAGIC helpers {{{2 */ + +/* possible field types */ +#define __flg_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ + nla_get_flag, __nla_put_flag) +#define __u8_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ + nla_get_u8, NLA_PUT_U8) +#define __u16_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ + nla_get_u16, NLA_PUT_U16) +#define __u32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ + nla_get_u32, NLA_PUT_U32) +#define __u64_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ + nla_get_u64, NLA_PUT_U64) +#define __str_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ + nla_strlcpy, NLA_PUT) +#define __bin_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ + nla_memcpy, NLA_PUT) + +#define __nla_put_flag(skb, attrtype, value) \ + do { \ + if (value) \ + NLA_PUT_FLAG(skb, attrtype); \ + } while (0) + +#define GENL_op_init(args...) args +#define GENL_doit(handler) \ + .doit = handler, \ + .flags = GENL_ADMIN_PERM, +#define GENL_dumpit(handler) \ + .dumpit = handler, \ + .flags = GENL_ADMIN_PERM, + +/* }}}1 + * Magic: define the enum symbols for genl_ops + * Magic: define the enum symbols for top level attributes + * Magic: define the enum symbols for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + op_name = op_num, + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + op_name = op_num, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + tag_name = tag_number, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: compile time assert unique numbers for operations + * Magic: -"- unique numbers for top level attributes + * Magic: -"- unique numbers for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) \ + case op_name: + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + case op_name: + +static inline void ct_assert_unique_operations(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + case tag_number: + +static inline void ct_assert_unique_top_level_attributes(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ +{ \ + switch (0) { \ + s_fields \ + ; \ + } \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + case attr_nr: + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + case attr_nr: + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: declare structs + * struct { + * fields + * }; + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +struct s_name { s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + type name; + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + type name[maxlen]; \ + __u32 name ## _len; + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 */ +#endif /* GENL_MAGIC_STRUCT_H */ +/* vim: set foldmethod=marker nofoldenable : */ -- cgit v1.2.3 From 3b98c0c2093d1f92e5b7394ae0b13d142e7ef880 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 12:49:34 +0100 Subject: drbd: switch configuration interface from connector to genetlink Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 3 +- drivers/block/drbd/drbd_int.h | 36 +- drivers/block/drbd/drbd_main.c | 27 +- drivers/block/drbd/drbd_nl.c | 1536 +++++++++++++++++++------------------- drivers/block/drbd/drbd_state.c | 7 +- include/linux/drbd.h | 35 +- include/linux/genl_magic_func.h | 2 +- 7 files changed, 806 insertions(+), 840 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 7cd78617669b..c1a90616776b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -702,6 +702,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); struct drbd_conf *mdev = w->mdev; + struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) @@ -725,7 +726,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) break; } } - drbd_bcast_sync_progress(mdev); + drbd_bcast_event(mdev, &sib); return 1; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e68758344647..429fd8da6b71 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "drbd_state.h" @@ -65,7 +66,6 @@ extern unsigned int minor_count; extern int disable_sendpage; extern int allow_oos; -extern unsigned int cn_idx; #ifdef CONFIG_DRBD_FAULT_INJECTION extern int enable_faults; @@ -865,14 +865,6 @@ struct drbd_md { */ }; -/* for sync_conf and other types... */ -#define NL_PACKET(name, number, fields) struct name { fields }; -#define NL_INTEGER(pn,pr,member) int member; -#define NL_INT64(pn,pr,member) __u64 member; -#define NL_BIT(pn,pr,member) unsigned member:1; -#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; -#include "linux/drbd_nl.h" - struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; @@ -1502,7 +1494,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, extern void drbd_free_mdev(struct drbd_conf *mdev); extern void drbd_delete_device(unsigned int minor); -struct drbd_tconn *drbd_new_tconn(char *name); +struct drbd_tconn *drbd_new_tconn(const char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); struct drbd_tconn *conn_by_name(const char *name); @@ -1679,16 +1671,22 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); - /* drbd_nl.c */ - -void drbd_nl_cleanup(void); -int __init drbd_nl_init(void); -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); -void drbd_bcast_sync_progress(struct drbd_conf *mdev); -void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, - const char *, const struct drbd_peer_request *); - +/* state info broadcast */ +struct sib_info { + enum drbd_state_info_bcast_reason sib_reason; + union { + struct { + char *helper_name; + unsigned helper_exit_code; + }; + struct { + union drbd_state os; + union drbd_state ns; + }; + }; +}; +void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib); /* * inline helper functions diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9f6db5947c65..9697ab872098 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -86,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!"); module_param(minor_count, uint, 0444); module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); -module_param(cn_idx, uint, 0444); module_param(proc_details, int, 0644); #ifdef CONFIG_DRBD_FAULT_INJECTION @@ -108,7 +107,6 @@ module_param(fault_devs, int, 0644); unsigned int minor_count = DRBD_MINOR_COUNT_DEF; int disable_sendpage; int allow_oos; -unsigned int cn_idx = CN_IDX_DRBD; int proc_details; /* Detail level in proc drbd*/ /* Module parameter for setting the user mode helper program @@ -2175,7 +2173,7 @@ static void drbd_cleanup(void) if (drbd_proc) remove_proc_entry("drbd", NULL); - drbd_nl_cleanup(); + drbd_genl_unregister(); idr_for_each_entry(&minors, mdev, i) drbd_delete_device(i); @@ -2237,6 +2235,9 @@ struct drbd_tconn *conn_by_name(const char *name) { struct drbd_tconn *tconn; + if (!name || !name[0]) + return NULL; + write_lock_irq(&global_state_lock); list_for_each_entry(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) @@ -2248,7 +2249,7 @@ found: return tconn; } -struct drbd_tconn *drbd_new_tconn(char *name) +struct drbd_tconn *drbd_new_tconn(const char *name) { struct drbd_tconn *tconn; @@ -2333,6 +2334,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, mdev->tconn = tconn; mdev->minor = minor; + mdev->vnr = vnr; drbd_init_set_defaults(mdev); @@ -2461,10 +2463,6 @@ int __init drbd_init(void) #endif } - err = drbd_nl_init(); - if (err) - return err; - err = register_blkdev(DRBD_MAJOR, "drbd"); if (err) { printk(KERN_ERR @@ -2473,6 +2471,13 @@ int __init drbd_init(void) return err; } + err = drbd_genl_register(); + if (err) { + printk(KERN_ERR "drbd: unable to register generic netlink family\n"); + goto fail; + } + + register_reboot_notifier(&drbd_notifier); /* @@ -2487,12 +2492,12 @@ int __init drbd_init(void) err = drbd_create_mempools(); if (err) - goto Enomem; + goto fail; drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); - goto Enomem; + goto fail; } rwlock_init(&global_state_lock); @@ -2507,7 +2512,7 @@ int __init drbd_init(void) return 0; /* Success! */ -Enomem: +fail: drbd_cleanup(); if (err == -ENOMEM) /* currently always the case */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f2739fd188a0..f9be14248e33 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -29,110 +29,225 @@ #include #include #include -#include #include #include #include "drbd_int.h" #include "drbd_req.h" #include "drbd_wrappers.h" #include -#include #include -#include #include -static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); -static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); -static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); - -/* see get_sb_bdev and bd_claim */ +#include + +/* .doit */ +// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info); +// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); +/* .dumpit */ +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); + +#include +#include + +/* used blkdev_get_by_path, to claim our meta data device(s) */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; -/* Generate the tag_list to struct functions */ -#define NL_PACKET(name, number, fields) \ -static int name ## _from_tags( \ - unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ -static int name ## _from_tags( \ - unsigned short *tags, struct name *arg) \ -{ \ - int tag; \ - int dlen; \ - \ - while ((tag = get_unaligned(tags++)) != TT_END) { \ - dlen = get_unaligned(tags++); \ - switch (tag_number(tag)) { \ - fields \ - default: \ - if (tag & T_MANDATORY) { \ - printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \ - return 0; \ - } \ - } \ - tags = (unsigned short *)((char *)tags + dlen); \ - } \ - return 1; \ +/* Configuration is strictly serialized, because generic netlink message + * processing is strictly serialized by the genl_lock(). + * Which means we can use one static global drbd_config_context struct. + */ +static struct drbd_config_context { + /* assigned from drbd_genlmsghdr */ + unsigned int minor; + /* assigned from request attributes, if present */ + unsigned int volume; +#define VOLUME_UNSPECIFIED (-1U) + /* pointer into the request skb, + * limited lifetime! */ + char *conn_name; + + /* reply buffer */ + struct sk_buff *reply_skb; + /* pointer into reply buffer */ + struct drbd_genlmsghdr *reply_dh; + /* resolved from attributes, if possible */ + struct drbd_conf *mdev; + struct drbd_tconn *tconn; +} adm_ctx; + +static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) +{ + genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); + if (genlmsg_reply(skb, info)) + printk(KERN_ERR "drbd: error sending genl reply\n"); } -#define NL_INTEGER(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ - arg->member = get_unaligned((int *)(tags)); \ - break; -#define NL_INT64(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ - arg->member = get_unaligned((u64 *)(tags)); \ - break; -#define NL_BIT(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ - arg->member = *(char *)(tags) ? 1 : 0; \ - break; -#define NL_STRING(pn, pr, member, len) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ - if (dlen > len) { \ - printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \ - #member, dlen, (unsigned int)len); \ - return 0; \ - } \ - arg->member ## _len = dlen; \ - memcpy(arg->member, tags, min_t(size_t, dlen, len)); \ - break; -#include "linux/drbd_nl.h" - -/* Generate the struct to tag_list functions */ -#define NL_PACKET(name, number, fields) \ -static unsigned short* \ -name ## _to_tags( \ - struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ -static unsigned short* \ -name ## _to_tags( \ - struct name *arg, unsigned short *tags) \ -{ \ - fields \ - return tags; \ + +/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only + * reason it could fail was no space in skb, and there are 4k available. */ +static int drbd_msg_put_info(const char *info) +{ + struct sk_buff *skb = adm_ctx.reply_skb; + struct nlattr *nla; + int err = -EMSGSIZE; + + if (!info || !info[0]) + return 0; + + nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + if (!nla) + return err; + + err = nla_put_string(skb, T_info_text, info); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } else + nla_nest_end(skb, nla); + return 0; } -#define NL_INTEGER(pn, pr, member) \ - put_unaligned(pn | pr | TT_INTEGER, tags++); \ - put_unaligned(sizeof(int), tags++); \ - put_unaligned(arg->member, (int *)tags); \ - tags = (unsigned short *)((char *)tags+sizeof(int)); -#define NL_INT64(pn, pr, member) \ - put_unaligned(pn | pr | TT_INT64, tags++); \ - put_unaligned(sizeof(u64), tags++); \ - put_unaligned(arg->member, (u64 *)tags); \ - tags = (unsigned short *)((char *)tags+sizeof(u64)); -#define NL_BIT(pn, pr, member) \ - put_unaligned(pn | pr | TT_BIT, tags++); \ - put_unaligned(sizeof(char), tags++); \ - *(char *)tags = arg->member; \ - tags = (unsigned short *)((char *)tags+sizeof(char)); -#define NL_STRING(pn, pr, member, len) \ - put_unaligned(pn | pr | TT_STRING, tags++); \ - put_unaligned(arg->member ## _len, tags++); \ - memcpy(tags, arg->member, arg->member ## _len); \ - tags = (unsigned short *)((char *)tags + arg->member ## _len); -#include "linux/drbd_nl.h" - -void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); -void drbd_nl_send_reply(struct cn_msg *, int); +/* This would be a good candidate for a "pre_doit" hook, + * and per-family private info->pointers. + * But we need to stay compatible with older kernels. + * If it returns successfully, adm_ctx members are valid. + */ +#define DRBD_ADM_NEED_MINOR 1 +#define DRBD_ADM_NEED_CONN 2 +static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, + unsigned flags) +{ + struct drbd_genlmsghdr *d_in = info->userhdr; + const u8 cmd = info->genlhdr->cmd; + int err; + + memset(&adm_ctx, 0, sizeof(adm_ctx)); + + /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ + if (cmd != DRBD_ADM_GET_STATUS + && security_netlink_recv(skb, CAP_SYS_ADMIN)) + return -EPERM; + + adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!adm_ctx.reply_skb) + goto fail; + + adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, + info, &drbd_genl_family, 0, cmd); + /* put of a few bytes into a fresh skb of >= 4k will always succeed. + * but anyways */ + if (!adm_ctx.reply_dh) + goto fail; + + adm_ctx.reply_dh->minor = d_in->minor; + adm_ctx.reply_dh->ret_code = NO_ERROR; + + if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { + struct nlattr *nla; + /* parse and validate only */ + err = drbd_cfg_context_from_attrs(NULL, info->attrs); + if (err) + goto fail; + + /* It was present, and valid, + * copy it over to the reply skb. */ + err = nla_put_nohdr(adm_ctx.reply_skb, + info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, + info->attrs[DRBD_NLA_CFG_CONTEXT]); + if (err) + goto fail; + + /* and assign stuff to the global adm_ctx */ + nla = nested_attr_tb[__nla_type(T_ctx_volume)]; + adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; + nla = nested_attr_tb[__nla_type(T_ctx_conn_name)]; + if (nla) + adm_ctx.conn_name = nla_data(nla); + } else + adm_ctx.volume = VOLUME_UNSPECIFIED; + + adm_ctx.minor = d_in->minor; + adm_ctx.mdev = minor_to_mdev(d_in->minor); + adm_ctx.tconn = conn_by_name(adm_ctx.conn_name); + + if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { + drbd_msg_put_info("unknown minor"); + return ERR_MINOR_INVALID; + } + if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) { + drbd_msg_put_info("unknown connection"); + return ERR_INVALID_REQUEST; + } + + /* some more paranoia, if the request was over-determined */ + if (adm_ctx.mdev && + adm_ctx.volume != VOLUME_UNSPECIFIED && + adm_ctx.volume != adm_ctx.mdev->vnr) { + pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", + adm_ctx.minor, adm_ctx.volume, + adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("over-determined configuration context mismatch"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.mdev && adm_ctx.tconn && + adm_ctx.mdev->tconn != adm_ctx.tconn) { + pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", + adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("over-determined configuration context mismatch"); + return ERR_INVALID_REQUEST; + } + return NO_ERROR; + +fail: + nlmsg_free(adm_ctx.reply_skb); + adm_ctx.reply_skb = NULL; + return -ENOMEM; +} + +static int drbd_adm_finish(struct genl_info *info, int retcode) +{ + struct nlattr *nla; + const char *conn_name = NULL; + + if (!adm_ctx.reply_skb) + return -ENOMEM; + + adm_ctx.reply_dh->ret_code = retcode; + + nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; + if (nla) { + nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + if (nla) + conn_name = nla_data(nla); + } + + drbd_adm_send_reply(adm_ctx.reply_skb, info); + return 0; +} int drbd_khelper(struct drbd_conf *mdev, char *cmd) { @@ -142,9 +257,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL, /* Will be set to address family */ NULL, /* Will be set to address */ NULL }; - char mb[12], af[20], ad[60], *afs; char *argv[] = {usermode_helper, cmd, mb, NULL }; + struct sib_info sib; int ret; snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); @@ -177,8 +292,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) drbd_md_sync(mdev); dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); - - drbd_bcast_ev_helper(mdev, cmd); + sib.sib_reason = SIB_HELPER_PRE; + sib.helper_name = cmd; + drbd_bcast_event(mdev, &sib); ret = call_usermodehelper(usermode_helper, argv, envp, 1); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", @@ -188,6 +304,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); + sib.sib_reason = SIB_HELPER_POST; + sib.helper_exit_code = ret; + drbd_bcast_event(mdev, &sib); if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; @@ -362,7 +481,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (rv == SS_NOTHING_TO_DO) - goto fail; + goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { nps = drbd_try_outdate_peer(mdev); @@ -388,13 +507,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) rv = _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_WAIT_COMPLETE); if (rv < SS_SUCCESS) - goto fail; + goto out; } break; } if (rv < SS_SUCCESS) - goto fail; + goto out; if (forced) dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); @@ -438,33 +557,46 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) drbd_md_sync(mdev); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); - fail: +out: mutex_unlock(mdev->state_mutex); return rv; } -static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static const char *from_attrs_err_to_txt(int err) { - struct primary primary_args; - - memset(&primary_args, 0, sizeof(struct primary)); - if (!primary_from_tags(nlp->tag_list, &primary_args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = - drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force); - - return 0; + return err == -ENOMSG ? "required attribute missing" : + err == -EOPNOTSUPP ? "unknown mandatory attribute" : + "invalid attribute value"; } -static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); + struct set_role_parms parms; + int err; + enum drbd_ret_code retcode; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + memset(&parms, 0, sizeof(parms)); + if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { + err = set_role_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) + retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate); + else + retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0); +out: + drbd_adm_finish(info, retcode); return 0; } @@ -541,6 +673,12 @@ char *ppsize(char *buf, unsigned long long size) * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: * peer may not initiate a resize. */ +/* Note these are not to be confused with + * drbd_adm_suspend_io/drbd_adm_resume_io, + * which are (sub) state changes triggered by admin (drbdsetup), + * and can be long lived. + * This changes an mdev->flag, is triggered by drbd internals, + * and should be short-lived. */ void drbd_suspend_io(struct drbd_conf *mdev) { set_bit(SUSPEND_IO, &mdev->flags); @@ -881,11 +1019,10 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } -/* does always return 0; - * interesting return code is in reply->ret_code */ -static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + int err; enum drbd_ret_code retcode; enum determine_dev_size dd; sector_t max_possible_sectors; @@ -897,6 +1034,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp enum drbd_state_rv rv; int cp_discovered = 0; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto fail; + + mdev = adm_ctx.mdev; conn_reconfig_start(mdev->tconn); /* if you want to reconfigure, please tear down first */ @@ -910,7 +1054,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); - /* allocation not in the IO path, cqueue thread context */ + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; @@ -922,12 +1066,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->dc.fencing = DRBD_FENCING_DEF; nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) { + err = disk_conf_from_attrs(&nbc->dc, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -961,7 +1107,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp */ bdev = blkdev_get_by_path(nbc->dc.meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - (nbc->dc.meta_dev_idx < 0) ? + ((int)nbc->dc.meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, @@ -997,7 +1143,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - if (nbc->dc.meta_dev_idx < 0) { + if ((int)nbc->dc.meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); @@ -1028,7 +1174,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if (nbc->dc.meta_dev_idx >= 0) + if ((int)nbc->dc.meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } @@ -1242,8 +1388,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); put_ldev(mdev); - reply->ret_code = retcode; conn_reconfig_done(mdev->tconn); + drbd_adm_finish(info, retcode); return 0; force_diskless_dec: @@ -1251,6 +1397,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp force_diskless: drbd_force_state(mdev, NS(disk, D_FAILED)); drbd_md_sync(mdev); + conn_reconfig_done(mdev->tconn); fail: if (nbc) { if (nbc->backing_bdev) @@ -1263,8 +1410,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } lc_destroy(resync_lru); - reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + drbd_adm_finish(info, retcode); return 0; } @@ -1273,42 +1419,54 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * Then we transition to D_DISKLESS, and wait for put_ldev() to return all * internal references as well. * Only then we have finally detached. */ -static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; enum drbd_ret_code retcode; - int ret; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); - /* D_FAILED will transition to DISKLESS. */ - ret = wait_event_interruptible(mdev->misc_wait, - mdev->state.disk != D_FAILED); + retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + wait_event(mdev->misc_wait, + mdev->state.disk != D_DISKLESS || + !atomic_read(&mdev->local_cnt)); drbd_resume_io(mdev); - if ((int)retcode == (int)SS_IS_DISKLESS) - retcode = SS_NOTHING_TO_DO; - if (ret) - retcode = ERR_INTR; - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { - int i; - enum drbd_ret_code retcode; + char hmac_name[CRYPTO_MAX_ALG_NAME]; + struct drbd_conf *mdev; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct drbd_conf *mdev; - char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; struct drbd_tconn *oconn; + struct drbd_tconn *tconn; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; + enum drbd_ret_code retcode; + int i; + int err; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; conn_reconfig_start(tconn); if (tconn->cstate > C_STANDALONE) { @@ -1343,8 +1501,10 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - if (!net_conf_from_tags(nlp->tag_list, new_conf)) { + err = net_conf_from_attrs(new_conf, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } @@ -1495,8 +1655,8 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl mdev->recv_cnt = 0; kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); } - reply->ret_code = retcode; conn_reconfig_done(tconn); + drbd_adm_finish(info, retcode); return 0; fail: @@ -1508,24 +1668,37 @@ fail: crypto_free_hash(integrity_r_tfm); kfree(new_conf); - reply->ret_code = retcode; conn_reconfig_done(tconn); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { - int retcode; - struct disconnect dc; + struct disconnect_parms parms; + struct drbd_tconn *tconn; + enum drbd_ret_code retcode; + int err; - memset(&dc, 0, sizeof(struct disconnect)); - if (!disconnect_from_tags(nlp->tag_list, &dc)) { - retcode = ERR_MANDATORY_TAG; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) goto fail; + + tconn = adm_ctx.tconn; + memset(&parms, 0, sizeof(parms)); + if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { + err = disconnect_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } } - if (dc.force) { + if (parms.force_disconnect) { spin_lock_irq(&tconn->req_lock); if (tconn->cstate >= C_WF_CONNECTION) _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); @@ -1567,7 +1740,7 @@ static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req * done: retcode = NO_ERROR; fail: - reply->ret_code = retcode; + drbd_adm_finish(info, retcode); return 0; } @@ -1587,20 +1760,32 @@ void resync_after_online_grow(struct drbd_conf *mdev) _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); } -static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) { - struct resize rs; - int retcode = NO_ERROR; + struct resize_parms rs; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; enum determine_dev_size dd; enum dds_flags ddsf; + int err; - memset(&rs, 0, sizeof(struct resize)); - if (!resize_from_tags(nlp->tag_list, &rs)) { - retcode = ERR_MANDATORY_TAG; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) goto fail; + + memset(&rs, 0, sizeof(struct resize_parms)); + if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { + err = resize_parms_from_attrs(&rs, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } } + mdev = adm_ctx.mdev; if (mdev->state.conn > C_CONNECTED) { retcode = ERR_RESIZE_RESYNC; goto fail; @@ -1644,14 +1829,14 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } fail: - reply->ret_code = retcode; + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ @@ -1662,12 +1847,21 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n int *rs_plan_s = NULL; int fifo_size; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto fail; + mdev = adm_ctx.mdev; + if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; + drbd_msg_put_info("unable to allocate cpumask"); goto fail; } - if (nlp->flags & DRBD_NL_SET_DEFAULTS) { + if (((struct drbd_genlmsghdr*)info->userhdr)->flags + & DRBD_GENL_F_SET_DEFAULTS) { memset(&sc, 0, sizeof(struct syncer_conf)); sc.rate = DRBD_RATE_DEF; sc.after = DRBD_AFTER_DEF; @@ -1681,8 +1875,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); - if (!syncer_conf_from_tags(nlp->tag_list, &sc)) { + err = syncer_conf_from_attrs(&sc, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } @@ -1832,14 +2028,23 @@ fail: free_cpumask_var(new_cpu_mask); crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); - reply->ret_code = retcode; + + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) { - int retcode; + struct drbd_conf *mdev; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. */ @@ -1862,7 +2067,8 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } @@ -1875,56 +2081,58 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) return rv; } -static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, + union drbd_state mask, union drbd_state val) { - int retcode; - - /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + enum drbd_ret_code retcode; - retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); - - if (retcode < SS_SUCCESS) { - if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { - /* The peer will get a resync upon connect anyways. Just make that - into a full resync. */ - retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); - if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, - "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) - retcode = ERR_IO_MD_DISK; - } - } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); - } + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - reply->ret_code = retcode; + retcode = drbd_request_state(adm_ctx.mdev, mask, val); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S)); +} - if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) - retcode = ERR_PAUSE_IS_SET; +int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - reply->ret_code = retcode; + if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) + retcode = ERR_PAUSE_IS_SET; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; union drbd_state s; + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { - s = mdev->state; + if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { + s = adm_ctx.mdev->state; if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; @@ -1933,28 +2141,35 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } } - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); - - return 0; + return drbd_adm_simple_request_state(skb, info, NS(susp, 1)); } -static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } drbd_suspend_io(mdev); - reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); - if (reply->ret_code == SS_SUCCESS) { + retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); + if (retcode == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) tl_clear(mdev->tconn); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) @@ -1962,138 +2177,259 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } drbd_resume_io(mdev); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); - return 0; + return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, + const struct sib_info *sib) { - unsigned short *tl; - - tl = reply->tag_list; - - if (get_ldev(mdev)) { - tl = disk_conf_to_tags(&mdev->ldev->dc, tl); - put_ldev(mdev); + struct state_info *si = NULL; /* for sizeof(si->member); */ + struct nlattr *nla; + int got_ldev; + int got_net; + int err = 0; + int exclude_sensitive; + + /* If sib != NULL, this is drbd_bcast_event, which anyone can listen + * to. So we better exclude_sensitive information. + * + * If sib == NULL, this is drbd_adm_get_status, executed synchronously + * in the context of the requesting user process. Exclude sensitive + * information, unless current has superuser. + * + * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and + * relies on the current implementation of netlink_dump(), which + * executes the dump callback successively from netlink_recvmsg(), + * always in the context of the receiving process */ + exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); + + got_ldev = get_ldev(mdev); + got_net = get_net_conf(mdev->tconn); + + /* We need to add connection name and volume number information still. + * Minor number is in drbd_genlmsghdr. */ + nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); + if (!nla) + goto nla_put_failure; + NLA_PUT_U32(skb, T_ctx_volume, mdev->vnr); + NLA_PUT_STRING(skb, T_ctx_conn_name, mdev->tconn->name); + nla_nest_end(skb, nla); + + if (got_ldev) + if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) + goto nla_put_failure; + if (got_net) + if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) + goto nla_put_failure; + + if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) + goto nla_put_failure; + + nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); + if (!nla) + goto nla_put_failure; + NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); + NLA_PUT_U32(skb, T_current_state, mdev->state.i); + NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); + NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); + + if (got_ldev) { + NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); + NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); + NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); + NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + if (C_SYNC_SOURCE <= mdev->state.conn && + C_PAUSED_SYNC_T >= mdev->state.conn) { + NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); + NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); + } } - if (get_net_conf(mdev->tconn)) { - tl = net_conf_to_tags(mdev->tconn->net_conf, tl); - put_net_conf(mdev->tconn); + if (sib) { + switch(sib->sib_reason) { + case SIB_SYNC_PROGRESS: + case SIB_GET_STATUS_REPLY: + break; + case SIB_STATE_CHANGE: + NLA_PUT_U32(skb, T_prev_state, sib->os.i); + NLA_PUT_U32(skb, T_new_state, sib->ns.i); + break; + case SIB_HELPER_POST: + NLA_PUT_U32(skb, + T_helper_exit_code, sib->helper_exit_code); + /* fall through */ + case SIB_HELPER_PRE: + NLA_PUT_STRING(skb, T_helper, sib->helper_name); + break; + } } - tl = syncer_conf_to_tags(&mdev->sync_conf, tl); - - put_unaligned(TT_END, tl++); /* Close the tag list */ + nla_nest_end(skb, nla); - return (int)((char *)tl - (char *)reply->tag_list); + if (0) +nla_put_failure: + err = -EMSGSIZE; + if (got_ldev) + put_ldev(mdev); + if (got_net) + put_net_conf(mdev->tconn); + return err; } -static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) { - unsigned short *tl = reply->tag_list; - union drbd_state s = mdev->state; - unsigned long rs_left; - unsigned int res; + enum drbd_ret_code retcode; + int err; - tl = get_state_to_tags((struct get_state *)&s, tl); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - /* no local ref, no bitmap, no syncer progress. */ - if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { - if (get_ldev(mdev)) { - drbd_get_syncer_progress(mdev, &rs_left, &res); - tl = tl_add_int(tl, T_sync_progress, &res); - put_ldev(mdev); - } + err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL); + if (err) { + nlmsg_free(adm_ctx.reply_skb); + return err; } - put_unaligned(TT_END, tl++); /* Close the tag list */ - - return (int)((char *)tl - (char *)reply->tag_list); +out: + drbd_adm_finish(info, retcode); + return 0; } -static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { - unsigned short *tl; - - tl = reply->tag_list; + struct drbd_conf *mdev; + struct drbd_genlmsghdr *dh; + int minor = cb->args[0]; + + /* Open coded deferred single idr_for_each_entry iteration. + * This may miss entries inserted after this dump started, + * or entries deleted before they are reached. + * But we need to make sure the mdev won't disappear while + * we are looking at it. */ + + rcu_read_lock(); + mdev = idr_get_next(&minors, &minor); + if (mdev) { + dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, &drbd_genl_family, + NLM_F_MULTI, DRBD_ADM_GET_STATUS); + if (!dh) + goto errout; + + D_ASSERT(mdev->minor == minor); + + dh->minor = minor; + dh->ret_code = NO_ERROR; + + if (nla_put_status_info(skb, mdev, NULL)) { + genlmsg_cancel(skb, dh); + goto errout; + } + genlmsg_end(skb, dh); + } - if (get_ldev(mdev)) { - tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); - tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); - put_ldev(mdev); - } - put_unaligned(TT_END, tl++); /* Close the tag list */ +errout: + rcu_read_unlock(); + /* where to start idr_get_next with the next iteration */ + cb->args[0] = minor+1; - return (int)((char *)tl - (char *)reply->tag_list); + /* No more minors found: empty skb. Which will terminate the dump. */ + return skb->len; } -/** - * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use - * @mdev: DRBD device. - * @nlp: Netlink/connector packet from drbdsetup - * @reply: Reply packet for drbdsetup - */ -static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) { - unsigned short *tl; - char rv; - - tl = reply->tag_list; + enum drbd_ret_code retcode; + struct timeout_parms tp; + int err; - rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : - test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); - put_unaligned(TT_END, tl++); /* Close the tag list */ + tp.timeout_type = + adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : + test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED : + UT_DEFAULT; - return (int)((char *)tl - (char *)reply->tag_list); + err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp); + if (err) { + nlmsg_free(adm_ctx.reply_skb); + return err; + } +out: + drbd_adm_finish(info, retcode); + return 0; } -static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { - /* default to resume from last known position, if possible */ - struct start_ov args = - { .start_sector = mdev->ov_start_sector }; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; - if (!start_ov_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + mdev = adm_ctx.mdev; + if (info->attrs[DRBD_NLA_START_OV_PARMS]) { + /* resume from last known position, if possible */ + struct start_ov_parms parms = + { .ov_start_sector = mdev->ov_start_sector }; + int err = start_ov_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; + } /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; - reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; int skip_initial_sync = 0; int err; + struct new_c_uuid_parms args; - struct new_c_uuid args; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out_nolock; - memset(&args, 0, sizeof(struct new_c_uuid)); - if (!new_c_uuid_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; + mdev = adm_ctx.mdev; + memset(&args, 0, sizeof(args)); + if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { + err = new_c_uuid_parms_from_attrs(&args, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out_nolock; + } } mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */ @@ -2139,510 +2475,164 @@ out_dec: put_ldev(mdev); out: mutex_unlock(mdev->state_mutex); - - reply->ret_code = retcode; - return 0; -} - -static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) -{ - struct new_connection args; - - if (!new_connection_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = NO_ERROR; - if (!drbd_new_tconn(args.name)) - reply->ret_code = ERR_NOMEM; - - return 0; -} - -static int drbd_nl_new_minor(struct drbd_tconn *tconn, - struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) -{ - struct new_minor args; - - args.vol_nr = 0; - args.minor = 0; - - if (!new_minor_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); - +out_nolock: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static enum drbd_ret_code +drbd_check_conn_name(const char *name) { - if (mdev->state.disk == D_DISKLESS && - mdev->state.conn == C_STANDALONE && - mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev_to_minor(mdev)); - reply->ret_code = NO_ERROR; - } else { - reply->ret_code = ERR_MINOR_CONFIGURED; + if (!name || !name[0]) { + drbd_msg_put_info("connection name missing"); + return ERR_MANDATORY_TAG; } - return 0; -} - -static int drbd_nl_del_conn(struct drbd_tconn *tconn, - struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) -{ - if (conn_lowest_minor(tconn) < 0) { - drbd_free_tconn(tconn); - reply->ret_code = NO_ERROR; - } else { - reply->ret_code = ERR_CONN_IN_USE; + /* if we want to use these in sysfs/configfs/debugfs some day, + * we must not allow slashes */ + if (strchr(name, '/')) { + drbd_msg_put_info("invalid connection name"); + return ERR_INVALID_REQUEST; } - - return 0; + return NO_ERROR; } -enum cn_handler_type { - CHT_MINOR, - CHT_CONN, - CHT_CTOR, - /* CHT_RES, later */ -}; -struct cn_handler_struct { - enum cn_handler_type type; - union { - int (*minor_based)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - int (*conn_based)(struct drbd_tconn *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - int (*constructor)(struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - }; - int reply_body_size; -}; - -static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, - [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, - [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, - [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, - [ P_net_conf ] = { CHT_CONN, { .conn_based = &drbd_nl_net_conf }, 0 }, - [ P_disconnect ] = { CHT_CONN, { .conn_based = &drbd_nl_disconnect }, 0 }, - [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, - [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, - [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, - [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, - [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, - [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, - [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, - [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, - [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, - [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, - sizeof(struct syncer_conf_tag_len_struct) + - sizeof(struct disk_conf_tag_len_struct) + - sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, - sizeof(struct get_state_tag_len_struct) + - sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, - sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, - sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, - [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, - [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, - [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, - [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, - [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, -}; - -static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) +int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) { - struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; - struct cn_handler_struct *cm; - struct cn_msg *cn_reply; - struct drbd_nl_cfg_reply *reply; - struct drbd_conf *mdev; - struct drbd_tconn *tconn; - int retcode, rr; - int reply_size = sizeof(struct cn_msg) - + sizeof(struct drbd_nl_cfg_reply) - + sizeof(short int); - - if (!try_module_get(THIS_MODULE)) { - printk(KERN_ERR "drbd: try_module_get() failed!\n"); - return; - } - - if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) { - retcode = ERR_PERM; - goto fail; - } + enum drbd_ret_code retcode; - if (nlp->packet_type >= P_nl_after_last_packet || - nlp->packet_type == P_return_code_only) { - retcode = ERR_PACKET_NR; - goto fail; - } + retcode = drbd_adm_prepare(skb, info, 0); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - cm = cnd_table + nlp->packet_type; + retcode = drbd_check_conn_name(adm_ctx.conn_name); + if (retcode != NO_ERROR) + goto out; - /* This may happen if packet number is 0: */ - if (cm->minor_based == NULL) { - retcode = ERR_PACKET_NR; - goto fail; + if (adm_ctx.tconn) { + retcode = ERR_INVALID_REQUEST; + drbd_msg_put_info("connection exists"); + goto out; } - reply_size += cm->reply_body_size; - - /* allocation not in the IO path, cqueue thread context */ - cn_reply = kzalloc(reply_size, GFP_KERNEL); - if (!cn_reply) { + if (!drbd_new_tconn(adm_ctx.conn_name)) retcode = ERR_NOMEM; - goto fail; - } - reply = (struct drbd_nl_cfg_reply *) cn_reply->data; - - reply->packet_type = - cm->reply_body_size ? nlp->packet_type : P_return_code_only; - reply->minor = nlp->drbd_minor; - reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ - /* reply->tag_list; might be modified by cm->function. */ - - retcode = ERR_MINOR_INVALID; - rr = 0; - switch (cm->type) { - case CHT_MINOR: - mdev = minor_to_mdev(nlp->drbd_minor); - if (!mdev) - goto fail; - rr = cm->minor_based(mdev, nlp, reply); - break; - case CHT_CONN: - tconn = conn_by_name(nlp->obj_name); - if (!tconn) { - retcode = ERR_CONN_NOT_KNOWN; - goto fail; - } - rr = cm->conn_based(tconn, nlp, reply); - break; - case CHT_CTOR: - rr = cm->constructor(nlp, reply); - break; - /* case CHT_RES: */ - } - - cn_reply->id = req->id; - cn_reply->seq = req->seq; - cn_reply->ack = req->ack + 1; - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; - cn_reply->flags = 0; - - rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); - if (rr && rr != -ESRCH) - printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); - - kfree(cn_reply); - module_put(THIS_MODULE); - return; - fail: - drbd_nl_send_reply(req, retcode); - module_put(THIS_MODULE); -} - -static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ - -static unsigned short * -__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, - unsigned short len, int nul_terminated) -{ - unsigned short l = tag_descriptions[tag_number(tag)].max_len; - len = (len < l) ? len : l; - put_unaligned(tag, tl++); - put_unaligned(len, tl++); - memcpy(tl, data, len); - tl = (unsigned short*)((char*)tl + len); - if (nul_terminated) - *((char*)tl - 1) = 0; - return tl; -} - -static unsigned short * -tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) -{ - return __tl_add_blob(tl, tag, data, len, 0); -} - -static unsigned short * -tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) -{ - return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); -} - -static unsigned short * -tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) -{ - put_unaligned(tag, tl++); - switch(tag_type(tag)) { - case TT_INTEGER: - put_unaligned(sizeof(int), tl++); - put_unaligned(*(int *)val, (int *)tl); - tl = (unsigned short*)((char*)tl+sizeof(int)); - break; - case TT_INT64: - put_unaligned(sizeof(u64), tl++); - put_unaligned(*(u64 *)val, (u64 *)tl); - tl = (unsigned short*)((char*)tl+sizeof(u64)); - break; - default: - /* someone did something stupid. */ - ; - } - return tl; -} - -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct get_state_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - - /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - - tl = get_state_to_tags((struct get_state *)&state, tl); - - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_get_state; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); -} - -void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct call_helper_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - - /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - - tl = tl_add_str(tl, T_helper, helper_name); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_call_helper; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); +out: + drbd_adm_finish(info, retcode); + return 0; } -void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, - const char *seen_hash, const char *calc_hash, - const struct drbd_peer_request *peer_req) +int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) { - struct cn_msg *cn_reply; - struct drbd_nl_cfg_reply *reply; - unsigned short *tl; - struct page *page; - unsigned len; + struct drbd_genlmsghdr *dh = info->userhdr; + enum drbd_ret_code retcode; - if (!peer_req) - return; - if (!reason || !reason[0]) - return; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - /* apparently we have to memcpy twice, first to prepare the data for the - * struct cn_msg, then within cn_netlink_send from the cn_msg to the - * netlink skb. */ - /* receiver thread context, which is not in the writeout path (of this node), - * but may be in the writeout path of the _other_ node. - * GFP_NOIO to avoid potential "distributed deadlock". */ - cn_reply = kzalloc( - sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct dump_ee_tag_len_struct)+ - sizeof(short int), - GFP_NOIO); - - if (!cn_reply) { - dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, " - "sector %llu, size %u\n", - (unsigned long long)peer_req->i.sector, - peer_req->i.size); - return; + /* FIXME drop minor_count parameter, limit to MINORMASK */ + if (dh->minor >= minor_count) { + drbd_msg_put_info("requested minor out of range"); + retcode = ERR_INVALID_REQUEST; + goto out; } - - reply = (struct drbd_nl_cfg_reply*)cn_reply->data; - tl = reply->tag_list; - - tl = tl_add_str(tl, T_dump_ee_reason, reason); - tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); - tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector); - tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id); - - /* dump the first 32k */ - len = min_t(unsigned, peer_req->i.size, 32 << 10); - put_unaligned(T_ee_data, tl++); - put_unaligned(len, tl++); - - page = peer_req->pages; - page_chain_for_each(page) { - void *d = kmap_atomic(page, KM_USER0); - unsigned l = min_t(unsigned, len, PAGE_SIZE); - memcpy(tl, d, l); - kunmap_atomic(d, KM_USER0); - tl = (unsigned short*)((char*)tl + l); - len -= l; - if (len == 0) - break; + /* FIXME we need a define here */ + if (adm_ctx.volume >= 256) { + drbd_msg_put_info("requested volume id out of range"); + retcode = ERR_INVALID_REQUEST; + goto out; } - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; // not used here. - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char*)tl - (char*)reply->tag_list); - cn_reply->flags = 0; - reply->packet_type = P_dump_ee; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); - kfree(cn_reply); + retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); +out: + drbd_adm_finish(info, retcode); + return 0; } -void drbd_bcast_sync_progress(struct drbd_conf *mdev) +int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) { - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct sync_progress_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - unsigned long rs_left; - unsigned int res; - - /* no local ref, no bitmap, no syncer progress, no broadcast. */ - if (!get_ldev(mdev)) - return; - drbd_get_syncer_progress(mdev, &rs_left, &res); - put_ldev(mdev); - - tl = tl_add_int(tl, T_sync_progress, &res); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; - reply->packet_type = P_sync_progress; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); + mdev = adm_ctx.mdev; + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + retcode = NO_ERROR; + } else + retcode = ERR_MINOR_CONFIGURED; +out: + drbd_adm_finish(info, retcode); + return 0; } -int __init drbd_nl_init(void) +int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) { - static struct cb_id cn_id_drbd; - int err, try=10; - - cn_id_drbd.val = CN_VAL_DRBD; - do { - cn_id_drbd.idx = cn_idx; - err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); - if (!err) - break; - cn_idx = (cn_idx + CN_IDX_STEP); - } while (try--); + enum drbd_ret_code retcode; - if (err) { - printk(KERN_ERR "drbd: cn_drbd failed to register\n"); - return err; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_free_tconn(adm_ctx.tconn); + retcode = NO_ERROR; + } else { + retcode = ERR_CONN_IN_USE; } +out: + drbd_adm_finish(info, retcode); return 0; } -void drbd_nl_cleanup(void) +void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) { - static struct cb_id cn_id_drbd; - - cn_id_drbd.idx = cn_idx; - cn_id_drbd.val = CN_VAL_DRBD; + static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ + struct sk_buff *msg; + struct drbd_genlmsghdr *d_out; + unsigned seq; + int err = -ENOMEM; + + seq = atomic_inc_return(&drbd_genl_seq); + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); + if (!msg) + goto failed; + + err = -EMSGSIZE; + d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT); + if (!d_out) /* cannot happen, but anyways. */ + goto nla_put_failure; + d_out->minor = mdev_to_minor(mdev); + d_out->ret_code = 0; + + if (nla_put_status_info(msg, mdev, sib)) + goto nla_put_failure; + genlmsg_end(msg, d_out); + err = drbd_genl_multicast_events(msg, 0); + /* msg has been consumed or freed in netlink_broadcast() */ + if (err && err != -ESRCH) + goto failed; - cn_del_callback(&cn_id_drbd); -} + return; -void drbd_nl_send_reply(struct cn_msg *req, int ret_code) -{ - char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - int rr; - - memset(buffer, 0, sizeof(buffer)); - cn_reply->id = req->id; - - cn_reply->seq = req->seq; - cn_reply->ack = req->ack + 1; - cn_reply->len = sizeof(struct drbd_nl_cfg_reply); - cn_reply->flags = 0; - - reply->packet_type = P_return_code_only; - reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; - reply->ret_code = ret_code; - - rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); - if (rr && rr != -ESRCH) - printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); +nla_put_failure: + nlmsg_free(msg); +failed: + dev_err(DEV, "Error %d while broadcasting event. " + "Event seq:%u sib_reason:%u\n", + err, seq, sib->sib_reason); } - diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ffee90d6d374..a280bc238acd 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -970,6 +970,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, enum drbd_fencing_p fp; enum drbd_req_event what = NOTHING; union drbd_state nsm = (union drbd_state){ .i = -1 }; + struct sib_info sib; + + sib.sib_reason = SIB_STATE_CHANGE; + sib.os = os; + sib.ns = ns; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { clear_bit(CRASHED_PRIMARY, &mdev->flags); @@ -984,7 +989,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } /* Inform userspace about the change... */ - drbd_bcast_state(mdev, ns); + drbd_bcast_event(mdev, &sib); if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index e192167e6145..d28fdd8fcd49 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -51,7 +51,6 @@ #endif - extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 @@ -159,6 +158,7 @@ enum drbd_ret_code { ERR_CONN_IN_USE = 159, ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, + ERR_INVALID_REQUEST = 162, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -349,37 +349,4 @@ enum drbd_timeout_flag { #define DRBD_MD_INDEX_FLEX_EXT -2 #define DRBD_MD_INDEX_FLEX_INT -3 -/* Start of the new netlink/connector stuff */ - -enum drbd_ncr_flags { - DRBD_NL_CREATE_DEVICE = 0x01, - DRBD_NL_SET_DEFAULTS = 0x02, -}; -#define DRBD_NL_OBJ_NAME_LEN 32 - - -/* For searching a vacant cn_idx value */ -#define CN_IDX_STEP 6977 - -struct drbd_nl_cfg_req { - int packet_type; - union { - struct { - unsigned int drbd_minor; - enum drbd_ncr_flags flags; - }; - struct { - char obj_name[DRBD_NL_OBJ_NAME_LEN]; - }; - }; - unsigned short tag_list[]; -}; - -struct drbd_nl_cfg_reply { - int packet_type; - unsigned int minor; - int ret_code; /* enum ret_code or set_st_err_t */ - unsigned short tag_list[]; /* only used with get_* calls */ -}; - #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 8a86f659d363..c8c67239f616 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -95,7 +95,7 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ #endif #endif -#if 1 +#ifdef GENL_MAGIC_DEBUG static void dprint_field(const char *dir, int nla_type, const char *name, void *valp) { -- cgit v1.2.3 From 73d901b74f1070c8a664349b564ba6f8bc8ab283 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:38:56 +0100 Subject: drbd: remove now unused connector related files Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_nl.h | 172 ----------------------------------------- include/linux/drbd_tag_magic.h | 84 -------------------- 2 files changed, 256 deletions(-) delete mode 100644 include/linux/drbd_nl.h delete mode 100644 include/linux/drbd_tag_magic.h (limited to 'include') diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h deleted file mode 100644 index 1216c7a432c5..000000000000 --- a/include/linux/drbd_nl.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - PAKET( name, - TYPE ( pn, pr, member ) - ... - ) - - You may never reissue one of the pn arguments -*/ - -#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) -#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" -#endif - -NL_PACKET(primary, 1, - NL_BIT( 1, T_MAY_IGNORE, primary_force) -) - -NL_PACKET(secondary, 2, ) - -NL_PACKET(disk_conf, 3, - NL_INT64( 2, T_MAY_IGNORE, disk_size) - NL_STRING( 3, T_MANDATORY, backing_dev, 128) - NL_STRING( 4, T_MANDATORY, meta_dev, 128) - NL_INTEGER( 5, T_MANDATORY, meta_dev_idx) - NL_INTEGER( 6, T_MAY_IGNORE, on_io_error) - NL_INTEGER( 7, T_MAY_IGNORE, fencing) - NL_BIT( 37, T_MAY_IGNORE, use_bmbv) - NL_BIT( 53, T_MAY_IGNORE, no_disk_flush) - NL_BIT( 54, T_MAY_IGNORE, no_md_flush) - /* 55 max_bio_size was available in 8.2.6rc2 */ - NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) - NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) - NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) -) - -NL_PACKET(detach, 4, ) - -NL_PACKET(net_conf, 5, - NL_STRING( 8, T_MANDATORY, my_addr, 128) - NL_STRING( 9, T_MANDATORY, peer_addr, 128) - NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX) - NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX) - NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX) - NL_INTEGER( 14, T_MAY_IGNORE, timeout) - NL_INTEGER( 15, T_MANDATORY, wire_protocol) - NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int) - NL_INTEGER( 17, T_MAY_IGNORE, ping_int) - NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size) - NL_INTEGER( 19, T_MAY_IGNORE, max_buffers) - NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark) - NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size) - NL_INTEGER( 22, T_MAY_IGNORE, ko_count) - NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p) - NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p) - NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) - NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) - NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) - NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) - NL_INTEGER( 81, T_MAY_IGNORE, on_congestion) - NL_INTEGER( 82, T_MAY_IGNORE, cong_fill) - NL_INTEGER( 83, T_MAY_IGNORE, cong_extents) - /* 59 addr_family was available in GIT, never released */ - NL_BIT( 60, T_MANDATORY, mind_af) - NL_BIT( 27, T_MAY_IGNORE, want_lose) - NL_BIT( 28, T_MAY_IGNORE, two_primaries) - NL_BIT( 41, T_MAY_IGNORE, always_asbp) - NL_BIT( 61, T_MAY_IGNORE, no_cork) - NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) - NL_BIT( 70, T_MANDATORY, dry_run) -) - -NL_PACKET(disconnect, 6, - NL_BIT( 84, T_MAY_IGNORE, force) -) - -NL_PACKET(resize, 7, - NL_INT64( 29, T_MAY_IGNORE, resize_size) - NL_BIT( 68, T_MAY_IGNORE, resize_force) - NL_BIT( 69, T_MANDATORY, no_resync) -) - -NL_PACKET(syncer_conf, 8, - NL_INTEGER( 30, T_MAY_IGNORE, rate) - NL_INTEGER( 31, T_MAY_IGNORE, after) - NL_INTEGER( 32, T_MAY_IGNORE, al_extents) -/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume) - * NL_INTEGER( 72, T_MAY_IGNORE, dp_interval) - * NL_INTEGER( 73, T_MAY_IGNORE, throttle_th) - * NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th) - * feature will be reimplemented differently with 8.3.9 */ - NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) - NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) - NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) - NL_BIT( 65, T_MAY_IGNORE, use_rle) - NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) - NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) - NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) - NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) - NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) - NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) -) - -NL_PACKET(invalidate, 9, ) -NL_PACKET(invalidate_peer, 10, ) -NL_PACKET(pause_sync, 11, ) -NL_PACKET(resume_sync, 12, ) -NL_PACKET(suspend_io, 13, ) -NL_PACKET(resume_io, 14, ) -NL_PACKET(outdate, 15, ) -NL_PACKET(get_config, 16, ) -NL_PACKET(get_state, 17, - NL_INTEGER( 33, T_MAY_IGNORE, state_i) -) - -NL_PACKET(get_uuids, 18, - NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64))) - NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) -) - -NL_PACKET(get_timeout_flag, 19, - NL_BIT( 36, T_MAY_IGNORE, use_degraded) -) - -NL_PACKET(call_helper, 20, - NL_STRING( 38, T_MAY_IGNORE, helper, 32) -) - -/* Tag nr 42 already allocated in drbd-8.1 development. */ - -NL_PACKET(sync_progress, 23, - NL_INTEGER( 43, T_MAY_IGNORE, sync_progress) -) - -NL_PACKET(dump_ee, 24, - NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32) - NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX) - NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX) - NL_INT64( 48, T_MAY_IGNORE, ee_sector) - NL_INT64( 49, T_MAY_IGNORE, ee_block_id) - NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10) -) - -NL_PACKET(start_ov, 25, - NL_INT64( 66, T_MAY_IGNORE, start_sector) -) - -NL_PACKET(new_c_uuid, 26, - NL_BIT( 63, T_MANDATORY, clear_bm) -) - -#ifdef NL_RESPONSE -NL_RESPONSE(return_code_only, 27) -#endif - -NL_PACKET(new_connection, 28, /* CHT_CTOR */ - NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) -) - -NL_PACKET(new_minor, 29, /* CHT_CONN */ - NL_INTEGER( 86, T_MANDATORY, minor) - NL_INTEGER( 87, T_MANDATORY, vol_nr) -) - -NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ -NL_PACKET(del_connection, 31, ) /* CHT_CONN */ - -#undef NL_PACKET -#undef NL_INTEGER -#undef NL_INT64 -#undef NL_BIT -#undef NL_STRING -#undef NL_RESPONSE diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h deleted file mode 100644 index 069543190516..000000000000 --- a/include/linux/drbd_tag_magic.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef DRBD_TAG_MAGIC_H -#define DRBD_TAG_MAGIC_H - -#define TT_END 0 -#define TT_REMOVED 0xE000 - -/* declare packet_type enums */ -enum packet_types { -#define NL_PACKET(name, number, fields) P_ ## name = number, -#define NL_RESPONSE(name, number) P_ ## name = number, -#define NL_INTEGER(pn, pr, member) -#define NL_INT64(pn, pr, member) -#define NL_BIT(pn, pr, member) -#define NL_STRING(pn, pr, member, len) -#include "drbd_nl.h" - P_nl_after_last_packet, -}; - -/* These struct are used to deduce the size of the tag lists: */ -#define NL_PACKET(name, number, fields) \ - struct name ## _tag_len_struct { fields }; -#define NL_INTEGER(pn, pr, member) \ - int member; int tag_and_len ## member; -#define NL_INT64(pn, pr, member) \ - __u64 member; int tag_and_len ## member; -#define NL_BIT(pn, pr, member) \ - unsigned char member:1; int tag_and_len ## member; -#define NL_STRING(pn, pr, member, len) \ - unsigned char member[len]; int member ## _len; \ - int tag_and_len ## member; -#include "linux/drbd_nl.h" - -/* declare tag-list-sizes */ -static const int tag_list_sizes[] = { -#define NL_PACKET(name, number, fields) 2 fields , -#define NL_INTEGER(pn, pr, member) + 4 + 4 -#define NL_INT64(pn, pr, member) + 4 + 8 -#define NL_BIT(pn, pr, member) + 4 + 1 -#define NL_STRING(pn, pr, member, len) + 4 + (len) -#include "drbd_nl.h" -}; - -/* The two highest bits are used for the tag type */ -#define TT_MASK 0xC000 -#define TT_INTEGER 0x0000 -#define TT_INT64 0x4000 -#define TT_BIT 0x8000 -#define TT_STRING 0xC000 -/* The next bit indicates if processing of the tag is mandatory */ -#define T_MANDATORY 0x2000 -#define T_MAY_IGNORE 0x0000 -#define TN_MASK 0x1fff -/* The remaining 13 bits are used to enumerate the tags */ - -#define tag_type(T) ((T) & TT_MASK) -#define tag_number(T) ((T) & TN_MASK) - -/* declare tag enums */ -#define NL_PACKET(name, number, fields) fields -enum drbd_tags { -#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr , -#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr , -#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr , -#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr , -#include "drbd_nl.h" -}; - -struct tag { - const char *name; - int type_n_flags; - int max_len; -}; - -/* declare tag names */ -#define NL_PACKET(name, number, fields) fields -static const struct tag tag_descriptions[] = { -#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) }, -#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) }, -#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) }, -#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) }, -#include "drbd_nl.h" -}; - -#endif -- cgit v1.2.3 From 85f75dd7630436b0aa46a6393099c0f23121f5f0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 15 Mar 2011 16:26:37 +0100 Subject: drbd: introduce in-kernel "down" command This greatly simplifies deconfiguration of whole resources. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 - drivers/block/drbd/drbd_nl.c | 203 ++++++++++++++++++++++++++++++----------- include/linux/drbd_genl.h | 2 + 3 files changed, 154 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 113c7b465384..40b7b93def75 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2303,9 +2303,7 @@ fail: void drbd_free_tconn(struct drbd_tconn *tconn) { - mutex_lock(&drbd_cfg_mutex); list_del(&tconn->all_tconn); - mutex_unlock(&drbd_cfg_mutex); idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f965dfe4b5ff..d952e877f8d5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -49,6 +49,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); @@ -1416,6 +1417,18 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } +static int adm_detach(struct drbd_conf *mdev) +{ + enum drbd_ret_code retcode; + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ + retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + wait_event(mdev->misc_wait, + mdev->state.disk != D_DISKLESS || + !atomic_read(&mdev->local_cnt)); + drbd_resume_io(mdev); + return retcode; +} + /* Detaching the disk is a process in multiple stages. First we need to lock * out application IO, in-flight IO, IO stuck in drbd_al_begin_io. * Then we transition to D_DISKLESS, and wait for put_ldev() to return all @@ -1423,7 +1436,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * Only then we have finally detached. */ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -1432,13 +1444,7 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - wait_event(mdev->misc_wait, - mdev->state.disk != D_DISKLESS || - !atomic_read(&mdev->local_cnt)); - drbd_resume_io(mdev); + retcode = adm_detach(adm_ctx.mdev); out: drbd_adm_finish(info, retcode); return 0; @@ -1680,10 +1686,49 @@ out: return 0; } +static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force) +{ + enum drbd_state_rv rv; + if (force) { + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate >= C_WF_CONNECTION) + _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + spin_unlock_irq(&tconn->req_lock); + return SS_SUCCESS; + } + + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); + + switch (rv) { + case SS_NOTHING_TO_DO: + case SS_ALREADY_STANDALONE: + return SS_SUCCESS; + case SS_PRIMARY_NOP: + /* Our state checking code wants to see the peer outdated. */ + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED), CS_VERBOSE); + break; + case SS_CW_FAILED_BY_PEER: + /* The peer probably wants to see us outdated. */ + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), 0); + if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + rv = SS_SUCCESS; + } + break; + default:; + /* no special handling necessary */ + } + + return rv; +} + int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { struct disconnect_parms parms; struct drbd_tconn *tconn; + enum drbd_state_rv rv; enum drbd_ret_code retcode; int err; @@ -1704,35 +1749,8 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) } } - if (parms.force_disconnect) { - spin_lock_irq(&tconn->req_lock); - if (tconn->cstate >= C_WF_CONNECTION) - _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - spin_unlock_irq(&tconn->req_lock); - goto done; - } - - retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); - - if (retcode == SS_NOTHING_TO_DO) - goto done; - else if (retcode == SS_ALREADY_STANDALONE) - goto done; - else if (retcode == SS_PRIMARY_NOP) { - /* Our state checking code wants to see the peer outdated. */ - retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED), CS_VERBOSE); - } else if (retcode == SS_CW_FAILED_BY_PEER) { - /* The peer probably wants to see us outdated. */ - retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - disk, D_OUTDATED), 0); - if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - retcode = SS_SUCCESS; - } - } - - if (retcode < SS_SUCCESS) + rv = conn_try_disconnect(tconn, parms.force_disconnect); + if (rv < SS_SUCCESS) goto fail; if (wait_event_interruptible(tconn->ping_wait, @@ -1743,7 +1761,6 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) goto fail; } - done: retcode = NO_ERROR; fail: drbd_adm_finish(info, retcode); @@ -2644,9 +2661,21 @@ out: return 0; } +static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) +{ + if (mdev->state.disk == D_DISKLESS && + /* no need to be mdev->state.conn == C_STANDALONE && + * we may want to delete a minor from a live replication group. + */ + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + return NO_ERROR; + } else + return ERR_MINOR_CONFIGURED; +} + int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2655,19 +2684,89 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - if (mdev->state.disk == D_DISKLESS && - /* no need to be mdev->state.conn == C_STANDALONE && - * we may want to delete a minor from a live replication group. - */ - mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev_to_minor(mdev)); - retcode = NO_ERROR; - /* if this was the last volume of this connection, - * this will terminate all threads */ + mutex_lock(&drbd_cfg_mutex); + retcode = adm_delete_minor(adm_ctx.mdev); + mutex_unlock(&drbd_cfg_mutex); + /* if this was the last volume of this connection, + * this will terminate all threads */ + if (retcode == NO_ERROR) conn_reconfig_done(adm_ctx.tconn); - } else - retcode = ERR_MINOR_CONFIGURED; +out: + drbd_adm_finish(info, retcode); + return 0; +} + +int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + enum drbd_state_rv rv; + struct drbd_conf *mdev; + unsigned i; + + retcode = drbd_adm_prepare(skb, info, 0); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (!adm_ctx.tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto out; + } + + mutex_lock(&drbd_cfg_mutex); + /* demote */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + retcode = drbd_set_role(mdev, R_SECONDARY, 0); + if (retcode < SS_SUCCESS) { + drbd_msg_put_info("failed to demote"); + goto out_unlock; + } + } + + /* disconnect */ + rv = conn_try_disconnect(adm_ctx.tconn, 0); + if (rv < SS_SUCCESS) { + retcode = rv; /* enum type mismatch! */ + drbd_msg_put_info("failed to disconnect"); + goto out_unlock; + } + + /* detach */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + rv = adm_detach(mdev); + if (rv < SS_SUCCESS) { + retcode = rv; /* enum type mismatch! */ + drbd_msg_put_info("failed to detach"); + goto out_unlock; + } + } + + /* delete volumes */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + retcode = adm_delete_minor(mdev); + if (retcode != NO_ERROR) { + /* "can not happen" */ + drbd_msg_put_info("failed to delete volume"); + goto out_unlock; + } + } + + /* stop all threads */ + conn_reconfig_done(adm_ctx.tconn); + + /* delete connection */ + if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_free_tconn(adm_ctx.tconn); + retcode = NO_ERROR; + } else { + /* "can not happen" */ + retcode = ERR_CONN_IN_USE; + drbd_msg_put_info("failed to delete connection"); + goto out_unlock; + } +out_unlock: + mutex_unlock(&drbd_cfg_mutex); out: drbd_adm_finish(info, retcode); return 0; @@ -2683,12 +2782,14 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; + mutex_lock(&drbd_cfg_mutex); if (conn_lowest_minor(adm_ctx.tconn) < 0) { drbd_free_tconn(adm_ctx.tconn); retcode = NO_ERROR; } else { retcode = ERR_CONN_IN_USE; } + mutex_unlock(&drbd_cfg_mutex); out: drbd_adm_finish(info, retcode); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 84e16848f7a1..a07d69279b1a 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -347,3 +347,5 @@ GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) -- cgit v1.2.3 From 047e95e259e81d7b97eca10cda0aa93082531ac1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 14:43:36 +0100 Subject: drbd: Allow volumes to become primary only on one side Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 21 ++++++++++++++++++--- drivers/block/drbd/drbd_strings.c | 1 + include/linux/drbd.h | 3 ++- 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 886b996ec7b3..11685658659e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -329,6 +329,18 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio dev_info(DEV, "%s\n", pb); } +static bool vol_has_primary_peer(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int minor; + + idr_for_each_entry(&tconn->volumes, mdev, minor) { + if (mdev->state.peer == R_PRIMARY) + return true; + } + return false; +} + /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -349,9 +361,12 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } if (get_net_conf(mdev->tconn)) { - if (!mdev->tconn->net_conf->two_primaries && - ns.role == R_PRIMARY && ns.peer == R_PRIMARY) - rv = SS_TWO_PRIMARIES; + if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { + if (ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; + else if (vol_has_primary_peer(mdev->tconn)) + rv = SS_O_VOL_PEER_PRI; + } put_net_conf(mdev->tconn); } diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index c44a2a602772..9a664bd27404 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", + [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", }; const char *drbd_conn_str(enum drbd_conns s) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28fdd8fcd49..9cdb888607ae 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -300,7 +300,8 @@ enum drbd_state_rv { SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */ SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ - SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */ + SS_O_VOL_PEER_PRI = -20, + SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ }; /* from drbd_strings.c */ -- cgit v1.2.3 From f399002e68e626e7bc443e6fcab1772704cc197f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Mar 2011 14:31:09 +0100 Subject: drbd: distribute former syncer_conf settings to disk, connection, and resource level This commit breaks the API again. Move per-volume former syncer options into disk_conf. Move per-connection former syncer options into net_conf. Renamed the remainign sync_conf to res_opts Syncer settings have been changeable at runtime, so we need to prepare for these settings to be runtime-changeable in their new home as well. Introduce new configuration operations, and share the netlink attribute between "attach" (create new disk) and "disk-opts" (change options). Same for "connect" and "net-opts". Some fields cannot be changed at runtime, however. Introduce a new flag GENLA_F_INVARIANT to be able to trigger on that in the generated validation and assignment functions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +- drivers/block/drbd/drbd_main.c | 72 +++-- drivers/block/drbd/drbd_nl.c | 550 ++++++++++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 51 ++-- drivers/block/drbd/drbd_state.c | 4 +- drivers/block/drbd/drbd_worker.c | 50 ++-- include/linux/drbd_genl.h | 133 +++++---- include/linux/drbd_limits.h | 2 + include/linux/genl_magic_func.h | 49 ++-- include/linux/genl_magic_struct.h | 18 +- 10 files changed, 572 insertions(+), 367 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d6e7e657e7a4..bc265f3733c6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -860,7 +860,7 @@ struct drbd_md { s32 bm_offset; /* signed relative sector offset to bitmap */ /* u32 al_nr_extents; important for restoring the AL - * is stored into sync_conf.al_extents, which in turn + * is stored into ldev->dc.al_extents, which in turn * gets applied to act_log->nr_elements */ }; @@ -929,6 +929,7 @@ struct drbd_tconn { /* is a resource from the config file */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ + struct res_opts res_opts; struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ @@ -945,6 +946,8 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; void *int_dig_out; void *int_dig_in; void *int_dig_vv; @@ -963,7 +966,6 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct syncer_conf sync_conf; struct drbd_backing_dev *ldev __protected_by(local); sector_t p_size; /* partner's disk size */ @@ -1037,8 +1039,6 @@ struct drbd_conf { /* size of out-of-sync range in sectors. */ sector_t ov_last_oos_size; unsigned long ov_left; /* in bits */ - struct crypto_hash *csums_tfm; - struct crypto_hash *verify_tfm; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -1188,7 +1188,7 @@ extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 -extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_sync_param(struct drbd_conf *mdev); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 79a0e042252f..bdb12723585e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -784,7 +784,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, return ok; } -int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) +int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; struct socket *sock; @@ -793,7 +793,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) - + strlen(mdev->sync_conf.verify_alg) + 1 + + strlen(mdev->tconn->net_conf->verify_alg) + 1 : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); @@ -812,16 +812,25 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - p->rate = cpu_to_be32(sc->rate); - p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead); - p->c_delay_target = cpu_to_be32(sc->c_delay_target); - p->c_fill_target = cpu_to_be32(sc->c_fill_target); - p->c_max_rate = cpu_to_be32(sc->c_max_rate); + if (get_ldev(mdev)) { + p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); + p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); + p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); + p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); + p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + put_ldev(mdev); + } else { + p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); + p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); + p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); + p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); + } if (apv >= 88) - strcpy(p->verify_alg, mdev->sync_conf.verify_alg); + strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, mdev->sync_conf.csums_alg); + strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); } else @@ -1043,7 +1052,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, int bits; /* may we use this feature? */ - if ((mdev->sync_conf.use_rle == 0) || + if ((mdev->tconn->net_conf->use_rle == 0) || (mdev->tconn->agreed_pro_version < 90)) return 0; @@ -1790,26 +1799,8 @@ static int drbd_release(struct gendisk *gd, fmode_t mode) static void drbd_set_defaults(struct drbd_conf *mdev) { - /* This way we get a compile error when sync_conf grows, - and we forgot to initialize it here */ - mdev->sync_conf = (struct syncer_conf) { - /* .rate = */ DRBD_RATE_DEF, - /* .after = */ DRBD_AFTER_DEF, - /* .al_extents = */ DRBD_AL_EXTENTS_DEF, - /* .verify_alg = */ {}, 0, - /* .cpu_mask = */ {}, 0, - /* .csums_alg = */ {}, 0, - /* .use_rle = */ 0, - /* .on_no_data = */ DRBD_ON_NO_DATA_DEF, - /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, - /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, - /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, - /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF, - /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF - }; - - /* Have to use that way, because the layout differs between - big endian and little endian */ + /* Beware! The actual layout differs + * between big endian and little endian */ mdev->state = (union drbd_state) { { .role = R_SECONDARY, .peer = R_UNKNOWN, @@ -2286,6 +2277,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + tconn->res_opts = (struct res_opts) { + {}, 0, /* cpu_mask */ + DRBD_ON_NO_DATA_DEF, /* on_no_data */ + }; + mutex_lock(&drbd_cfg_mutex); list_add_tail(&tconn->all_tconn, &drbd_tconns); mutex_unlock(&drbd_cfg_mutex); @@ -2559,10 +2555,10 @@ void drbd_free_sock(struct drbd_tconn *tconn) void drbd_free_resources(struct drbd_conf *mdev) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = NULL; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = NULL; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = NULL; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = NULL; crypto_free_hash(mdev->tconn->cram_hmac_tfm); mdev->tconn->cram_hmac_tfm = NULL; crypto_free_hash(mdev->tconn->integrity_w_tfm); @@ -2589,7 +2585,7 @@ struct meta_data_on_disk { u32 md_size_sect; u32 al_offset; /* offset to this block */ u32 al_nr_extents; /* important for restoring the AL */ - /* `-- act_log->nr_elements <-- sync_conf.al_extents */ + /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 la_peer_max_bio_size; /* last peer max_bio_size */ @@ -2715,7 +2711,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); - mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); + bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); spin_lock_irq(&mdev->tconn->req_lock); @@ -2727,8 +2723,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - if (mdev->sync_conf.al_extents < 7) - mdev->sync_conf.al_extents = 127; + if (bdev->dc.al_extents < 7) + bdev->dc.al_extents = 127; err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ac0a175e778c..18cd2ed4e8ca 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -53,8 +53,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); @@ -66,7 +68,7 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); /* .dumpit */ @@ -170,7 +172,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ - err = drbd_cfg_context_from_attrs(NULL, info->attrs); + err = drbd_cfg_context_from_attrs(NULL, info); if (err) goto fail; @@ -616,6 +618,7 @@ static const char *from_attrs_err_to_txt(int err) { return err == -ENOMSG ? "required attribute missing" : err == -EOPNOTSUPP ? "unknown mandatory attribute" : + err == -EEXIST ? "can not change invariant setting" : "invalid attribute value"; } @@ -633,7 +636,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { - err = set_role_parms_from_attrs(&parms, info->attrs); + err = set_role_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -898,24 +901,24 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass * failed, and 0 on success. You should call drbd_md_sync() after you called * this function. */ -static int drbd_check_al_size(struct drbd_conf *mdev) +static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) { struct lru_cache *n, *t; struct lc_element *e; unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) - mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN)) + dc->al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && - mdev->act_log->nr_elements == mdev->sync_conf.al_extents) + mdev->act_log->nr_elements == dc->al_extents) return 0; in_use = 0; t = mdev->act_log; n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, - mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); + dc->al_extents, sizeof(struct lc_element), 0); if (n == NULL) { dev_err(DEV, "Cannot allocate act_log lru!\n"); @@ -1069,6 +1072,114 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_conf *mdev; + struct disk_conf *ndc; /* new disk conf */ + int err, fifo_size; + int *rs_plan_s = NULL; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; + + /* we also need a disk + * to change the options on */ + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto out; + } + +/* FIXME freeze IO, cluster wide. + * + * We should make sure no-one uses + * some half-updated struct when we + * assign it later. */ + + ndc = kmalloc(sizeof(*ndc), GFP_KERNEL); + if (!ndc) { + retcode = ERR_NOMEM; + goto fail; + } + + memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc)); + err = disk_conf_from_attrs_for_change(ndc, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + } + + if (!expect(ndc->resync_rate >= 1)) + ndc->resync_rate = 1; + + /* clip to allowed range */ + if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN)) + ndc->al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX)) + ndc->al_extents = DRBD_AL_EXTENTS_MAX; + + /* most sanity checks done, try to assign the new sync-after + * dependency. need to hold the global lock in there, + * to avoid a race in the dependency loop check. */ + retcode = drbd_alter_sa(mdev, ndc->resync_after); + if (retcode != NO_ERROR) + goto fail; + + fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; + if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { + rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (!rs_plan_s) { + dev_err(DEV, "kmalloc of fifo_buffer failed"); + retcode = ERR_NOMEM; + goto fail; + } + } + + if (fifo_size != mdev->rs_plan_s.size) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + rs_plan_s = NULL; + } + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + drbd_al_shrink(mdev); + err = drbd_check_al_size(mdev, ndc); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + if (err) { + retcode = ERR_NOMEM; + goto fail; + } + + /* FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + mdev->ldev->dc = *ndc; + + drbd_md_sync(mdev); + + + if (mdev->state.conn >= C_CONNECTED) + drbd_send_sync_param(mdev); + + fail: + put_ldev(mdev); + kfree(ndc); + kfree(rs_plan_s); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; @@ -1111,12 +1222,29 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; - nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; - nbc->dc.fencing = DRBD_FENCING_DEF; - nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - - err = disk_conf_from_attrs(&nbc->dc, info->attrs); + nbc->dc = (struct disk_conf) { + {}, 0, /* backing_dev */ + {}, 0, /* meta_dev */ + 0, /* meta_dev_idx */ + DRBD_DISK_SIZE_SECT_DEF, /* disk_size */ + DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */ + DRBD_ON_IO_ERROR_DEF, /* on_io_error */ + DRBD_FENCING_DEF, /* fencing */ + DRBD_RATE_DEF, /* resync_rate */ + DRBD_AFTER_DEF, /* resync_after */ + DRBD_AL_EXTENTS_DEF, /* al_extents */ + DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */ + DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */ + DRBD_C_FILL_TARGET_DEF, /* c_fill_target */ + DRBD_C_MAX_RATE_DEF, /* c_max_rate */ + DRBD_C_MIN_RATE_DEF, /* c_min_rate */ + 0, /* no_disk_barrier */ + 0, /* no_disk_flush */ + 0, /* no_disk_drain */ + 0, /* no_md_flush */ + }; + + err = disk_conf_from_attrs(&nbc->dc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1267,7 +1395,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev)) { + if (drbd_check_al_size(mdev, &nbc->dc)) { retcode = ERR_NOMEM; goto force_diskless_dec; } @@ -1498,6 +1626,158 @@ out: return 0; } +static bool conn_resync_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET || + mdev->state.conn == C_PAUSED_SYNC_S || + mdev->state.conn == C_PAUSED_SYNC_T) + return true; + } + return false; +} + +static bool conn_ov_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_VERIFY_S || + mdev->state.conn == C_VERIFY_T) + return true; + } + return false; +} + +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_tconn *tconn; + struct net_conf *new_conf = NULL; + int err; + int ovr; /* online verify running */ + int rsr; /* re-sync running */ + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; + + new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + retcode = ERR_NOMEM; + goto out; + } + + /* we also need a net config + * to change the options on */ + if (!get_net_conf(tconn)) { + drbd_msg_put_info("net conf missing, try connect"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + conn_reconfig_start(tconn); + + memcpy(new_conf, tconn->net_conf, sizeof(*new_conf)); + err = net_conf_from_attrs_for_change(new_conf, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } + + /* re-sync running */ + rsr = conn_resync_running(tconn); + if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) { + retcode = ERR_CSUMS_RESYNC_RUNNING; + goto fail; + } + + if (!rsr && new_conf->csums_alg[0]) { + csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + retcode = ERR_CSUMS_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { + retcode = ERR_CSUMS_ALG_ND; + goto fail; + } + } + + /* online verify running */ + ovr = conn_ov_running(tconn); + if (ovr) { + if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) { + retcode = ERR_VERIFY_RUNNING; + goto fail; + } + } + + if (!ovr && new_conf->verify_alg[0]) { + verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + retcode = ERR_VERIFY_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { + retcode = ERR_VERIFY_ALG_ND; + goto fail; + } + } + + + /* For now, use struct assignment, not pointer assignment. + * We don't have any means to determine who might still + * keep a local alias into the struct, + * so we cannot just free it and hope for the best :( + * FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + *tconn->net_conf = *new_conf; + + if (!rsr) { + crypto_free_hash(tconn->csums_tfm); + tconn->csums_tfm = csums_tfm; + csums_tfm = NULL; + } + if (!ovr) { + crypto_free_hash(tconn->verify_tfm); + tconn->verify_tfm = verify_tfm; + verify_tfm = NULL; + } + + if (tconn->cstate >= C_WF_REPORT_PARAMS) + drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); + + fail: + crypto_free_hash(csums_tfm); + crypto_free_hash(verify_tfm); + kfree(new_conf); + put_net_conf(tconn); + conn_reconfig_done(tconn); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { char hmac_name[CRYPTO_MAX_ALG_NAME]; @@ -1531,33 +1811,47 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } /* allocation not in the IO path, cqueue thread context */ - new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; goto fail; } - new_conf->timeout = DRBD_TIMEOUT_DEF; - new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; - new_conf->ping_int = DRBD_PING_INT_DEF; - new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; - new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - new_conf->ko_count = DRBD_KO_COUNT_DEF; - new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - new_conf->want_lose = 0; - new_conf->two_primaries = 0; - new_conf->wire_protocol = DRBD_PROT_C; - new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; - new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; - new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; - new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - - err = net_conf_from_attrs(new_conf, info->attrs); + *new_conf = (struct net_conf) { + {}, 0, /* my_addr */ + {}, 0, /* peer_addr */ + {}, 0, /* shared_secret */ + {}, 0, /* cram_hmac_alg */ + {}, 0, /* integrity_alg */ + {}, 0, /* verify_alg */ + {}, 0, /* csums_alg */ + DRBD_PROTOCOL_DEF, /* wire_protocol */ + DRBD_CONNECT_INT_DEF, /* try_connect_int */ + DRBD_TIMEOUT_DEF, /* timeout */ + DRBD_PING_INT_DEF, /* ping_int */ + DRBD_PING_TIMEO_DEF, /* ping_timeo */ + DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */ + DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */ + DRBD_KO_COUNT_DEF, /* ko_count */ + DRBD_MAX_BUFFERS_DEF, /* max_buffers */ + DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */ + DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */ + DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */ + DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */ + DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */ + DRBD_RR_CONFLICT_DEF, /* rr_conflict */ + DRBD_ON_CONGESTION_DEF, /* on_congestion */ + DRBD_CONG_FILL_DEF, /* cong_fill */ + DRBD_CONG_EXTENTS_DEF, /* cong_extents */ + 0, /* two_primaries */ + 0, /* want_lose */ + 0, /* no_cork */ + 0, /* always_asbp */ + 0, /* dry_run */ + 0, /* use_rle */ + }; + + err = net_conf_from_attrs(new_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1789,7 +2083,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) tconn = adm_ctx.tconn; memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { - err = disconnect_parms_from_attrs(&parms, info->attrs); + err = disconnect_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1848,7 +2142,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) memset(&rs, 0, sizeof(struct resize_parms)); if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { - err = resize_parms_from_attrs(&rs, info->attrs); + err = resize_parms_from_attrs(&rs, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1904,26 +2198,21 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; - int err; - int ovr; /* online verify running */ - int rsr; /* re-sync running */ - struct crypto_hash *verify_tfm = NULL; - struct crypto_hash *csums_tfm = NULL; - struct syncer_conf sc; cpumask_var_t new_cpu_mask; + struct drbd_tconn *tconn; int *rs_plan_s = NULL; - int fifo_size; + struct res_opts sc; + int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto fail; - mdev = adm_ctx.mdev; + tconn = adm_ctx.tconn; if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; @@ -1933,172 +2222,43 @@ int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) if (((struct drbd_genlmsghdr*)info->userhdr)->flags & DRBD_GENL_F_SET_DEFAULTS) { - memset(&sc, 0, sizeof(struct syncer_conf)); - sc.rate = DRBD_RATE_DEF; - sc.after = DRBD_AFTER_DEF; - sc.al_extents = DRBD_AL_EXTENTS_DEF; + memset(&sc, 0, sizeof(struct res_opts)); sc.on_no_data = DRBD_ON_NO_DATA_DEF; - sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; - sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; - sc.c_max_rate = DRBD_C_MAX_RATE_DEF; - sc.c_min_rate = DRBD_C_MIN_RATE_DEF; } else - memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + sc = tconn->res_opts; - err = syncer_conf_from_attrs(&sc, info->attrs); + err = res_opts_from_attrs(&sc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - /* re-sync running */ - rsr = ( mdev->state.conn == C_SYNC_SOURCE || - mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_PAUSED_SYNC_S || - mdev->state.conn == C_PAUSED_SYNC_T ); - - if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { - retcode = ERR_CSUMS_RESYNC_RUNNING; - goto fail; - } - - if (!rsr && sc.csums_alg[0]) { - csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(csums_tfm)) { - csums_tfm = NULL; - retcode = ERR_CSUMS_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { - retcode = ERR_CSUMS_ALG_ND; - goto fail; - } - } - - /* online verify running */ - ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); - - if (ovr) { - if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { - retcode = ERR_VERIFY_RUNNING; - goto fail; - } - } - - if (!ovr && sc.verify_alg[0]) { - verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(verify_tfm)) { - verify_tfm = NULL; - retcode = ERR_VERIFY_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { - retcode = ERR_VERIFY_ALG_ND; - goto fail; - } - } - /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { err = __bitmap_parse(sc.cpu_mask, 32, 0, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; goto fail; } } - if (!expect(sc.rate >= 1)) - sc.rate = 1; - - /* clip to allowed range */ - if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) - sc.al_extents = DRBD_AL_EXTENTS_MIN; - if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) - sc.al_extents = DRBD_AL_EXTENTS_MAX; - - /* most sanity checks done, try to assign the new sync-after - * dependency. need to hold the global lock in there, - * to avoid a race in the dependency loop check. */ - retcode = drbd_alter_sa(mdev, sc.after); - if (retcode != NO_ERROR) - goto fail; - - fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); - if (!rs_plan_s) { - dev_err(DEV, "kmalloc of fifo_buffer failed"); - retcode = ERR_NOMEM; - goto fail; - } - } - - /* ok, assign the rest of it as well. - * lock against receive_SyncParam() */ - spin_lock(&mdev->peer_seq_lock); - mdev->sync_conf = sc; - - if (!rsr) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; - csums_tfm = NULL; - } - - if (!ovr) { - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; - verify_tfm = NULL; - } - - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - rs_plan_s = NULL; - } - - spin_unlock(&mdev->peer_seq_lock); - if (get_ldev(mdev)) { - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - drbd_al_shrink(mdev); - err = drbd_check_al_size(mdev); - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); + tconn->res_opts = sc; - put_ldev(mdev); - drbd_md_sync(mdev); - - if (err) { - retcode = ERR_NOMEM; - goto fail; - } - } - - if (mdev->state.conn >= C_CONNECTED) - drbd_send_sync_param(mdev, &sc); - - if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) { - cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(mdev->tconn); - mdev->tconn->receiver.reset_cpu_mask = 1; - mdev->tconn->asender.reset_cpu_mask = 1; - mdev->tconn->worker.reset_cpu_mask = 1; + if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(tconn); + tconn->receiver.reset_cpu_mask = 1; + tconn->asender.reset_cpu_mask = 1; + tconn->worker.reset_cpu_mask = 1; } - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: kfree(rs_plan_s); free_cpumask_var(new_cpu_mask); - crypto_free_hash(csums_tfm); - crypto_free_hash(verify_tfm); drbd_adm_finish(info, retcode); return 0; @@ -2307,6 +2467,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) goto nla_put_failure; + if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) + goto nla_put_failure; + if (got_ldev) if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) goto nla_put_failure; @@ -2314,9 +2477,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) goto nla_put_failure; - if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) - goto nla_put_failure; - nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; @@ -2532,7 +2692,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) /* resume from last known position, if possible */ struct start_ov_parms parms = { .ov_start_sector = mdev->ov_start_sector }; - int err = start_ov_parms_from_attrs(&parms, info->attrs); + int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -2568,7 +2728,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; memset(&args, 0, sizeof(args)); if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { - err = new_c_uuid_parms_from_attrs(&args, info->attrs); + err = new_c_uuid_parms_from_attrs(&args, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 50c52712715e..c8c826b2444f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->tconn->cstate_mutex : &mdev->own_state_mutex; - ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); + ok &= drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); ok &= drbd_send_state(mdev); @@ -2085,7 +2085,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) int throttle = 0; /* feature disabled? */ - if (mdev->sync_conf.c_min_rate == 0) + if (mdev->ldev->dc.c_min_rate == 0) return 0; spin_lock_irq(&mdev->al_lock); @@ -2125,7 +2125,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) db = mdev->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); - if (dbdt > mdev->sync_conf.c_min_rate) + if (dbdt > mdev->ldev->dc.c_min_rate) throttle = 1; } return throttle; @@ -3001,7 +3001,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) return false; - mdev->sync_conf.rate = be32_to_cpu(p->rate); + if (get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + put_ldev(mdev); + } if (apv >= 88) { if (apv == 88) { @@ -3029,10 +3032,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { + if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg, p->verify_alg); goto disconnect; } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3043,10 +3046,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { + if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg, p->csums_alg); goto disconnect; } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3057,37 +3060,39 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv > 94) { - mdev->sync_conf.rate = be32_to_cpu(p->rate); - mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); - mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); - mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); - mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); + if (apv > 94 && get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target); + mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target); + mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate); - fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); + put_ldev(mdev); goto disconnect; } } + put_ldev(mdev); } spin_lock(&mdev->peer_seq_lock); /* lock against drbd_nl_syncer_conf() */ if (verify_tfm) { - strcpy(mdev->sync_conf.verify_alg, p->verify_alg); - mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; + strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = verify_tfm; dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(mdev->sync_conf.csums_alg, p->csums_alg); - mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; + strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = csums_tfm; dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } if (fifo_size != mdev->rs_plan_s.size) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 11685658659e..77fad527fb1d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -402,7 +402,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_CONNECTED_OUTDATES; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) + (mdev->tconn->net_conf->verify_alg[0] == 0)) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && @@ -668,7 +668,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && + if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a730520e468e..005876b32f74 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -310,12 +310,12 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on @@ -451,13 +451,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ - steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ - want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; + want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ - want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : - sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); + want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target : + sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10); } correction = want - mdev->rs_in_flight - mdev->rs_planed; @@ -476,7 +476,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev) if (req_sect < 0) req_sect = 0; - max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ; if (req_sect > max_sect) req_sect = max_sect; @@ -492,11 +492,11 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ + if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = mdev->sync_conf.rate; + mdev->c_sync_rate = mdev->ldev->dc.resync_rate; number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } @@ -619,7 +619,7 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { + if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); @@ -810,7 +810,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (mdev->csums_tfm && mdev->rs_total) { + if (mdev->tconn->csums_tfm && mdev->rs_total) { const unsigned long s = mdev->rs_same_csum; const unsigned long t = mdev->rs_total; const int ratio = @@ -1019,13 +1019,13 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ - if (mdev->csums_tfm) { - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + if (mdev->tconn->csums_tfm) { + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); D_ASSERT(digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } @@ -1069,7 +1069,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { ok = 0; /* terminate the connection in case the allocation failed */ @@ -1077,7 +1077,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) } if (likely(!(peer_req->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1141,10 +1141,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); D_ASSERT(digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); @@ -1319,9 +1319,9 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) struct drbd_conf *odev = mdev; while (1) { - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return 1; - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1408,11 +1408,11 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor) return ERR_SYNC_AFTER_CYCLE; /* dependency chain ends here, no cycles. */ - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); } } @@ -1424,7 +1424,7 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) write_lock_irq(&global_state_lock); retcode = sync_after_error(mdev, na); if (retcode == NO_ERROR) { - mdev->sync_conf.after = na; + mdev->ldev->dc.resync_after = na; do { changes = _drbd_pause_after(mdev); changes |= _drbd_resume_next(mdev); @@ -1637,7 +1637,7 @@ int drbd_worker(struct drbd_thread *thi) struct drbd_work *w = NULL; struct drbd_conf *mdev; LIST_HEAD(work_list); - int minor, intr = 0; + int vnr, intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1722,7 +1722,7 @@ int drbd_worker(struct drbd_thread *thi) spin_unlock_irq(&tconn->data.work.q_lock); drbd_thread_stop(&tconn->receiver); - idr_for_each_entry(&tconn->volumes, mdev, minor) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. * wait here for the exiting receiver. */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a07d69279b1a..938e8560a833 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,66 +102,73 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __u64_field(1, GENLA_F_MANDATORY, disk_size) - __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) - __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) - __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) - __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) + __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) + __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + + /* use the resize command to try and change the disk_size */ + __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + /* we could change the max_bio_bvecs, + * but it won't propagate through the stack */ + __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) __u32_field(7, GENLA_F_MANDATORY, fencing) - __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(11, GENLA_F_MANDATORY, no_md_flush) - __flg_field(12, GENLA_F_MANDATORY, use_bmbv) + + __u32_field(8, GENLA_F_MANDATORY, resync_rate) + __u32_field(9, GENLA_F_MANDATORY, resync_after) + __u32_field(10, GENLA_F_MANDATORY, al_extents) + __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(12, GENLA_F_MANDATORY, c_delay_target) + __u32_field(13, GENLA_F_MANDATORY, c_fill_target) + __u32_field(14, GENLA_F_MANDATORY, c_max_rate) + __u32_field(15, GENLA_F_MANDATORY, c_min_rate) + + __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(19, GENLA_F_MANDATORY, no_md_flush) + ) -GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, - __u32_field(1, GENLA_F_MANDATORY, rate) - __u32_field(2, GENLA_F_MANDATORY, after) - __u32_field(3, GENLA_F_MANDATORY, al_extents) - __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) - __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __flg_field(7, GENLA_F_MANDATORY, use_rle) - __u32_field(8, GENLA_F_MANDATORY, on_no_data) - __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(10, GENLA_F_MANDATORY, c_delay_target) - __u32_field(11, GENLA_F_MANDATORY, c_fill_target) - __u32_field(12, GENLA_F_MANDATORY, c_max_rate) - __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, + __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field(2, GENLA_F_MANDATORY, on_no_data) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) + __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) + __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_REQUIRED, my_addr, 128) - __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) - __u32_field(6, GENLA_F_REQUIRED, wire_protocol) - __u32_field(7, GENLA_F_MANDATORY, try_connect_int) - __u32_field(8, GENLA_F_MANDATORY, timeout) - __u32_field(9, GENLA_F_MANDATORY, ping_int) - __u32_field(10, GENLA_F_MANDATORY, ping_timeo) - __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(13, GENLA_F_MANDATORY, ko_count) - __u32_field(14, GENLA_F_MANDATORY, max_buffers) - __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(20, GENLA_F_MANDATORY, rr_conflict) - __u32_field(21, GENLA_F_MANDATORY, on_congestion) - __u32_field(22, GENLA_F_MANDATORY, cong_fill) - __u32_field(23, GENLA_F_MANDATORY, cong_extents) - __flg_field(24, GENLA_F_MANDATORY, two_primaries) - __flg_field(25, GENLA_F_MANDATORY, want_lose) - __flg_field(26, GENLA_F_MANDATORY, no_cork) - __flg_field(27, GENLA_F_MANDATORY, always_asbp) - __flg_field(28, GENLA_F_MANDATORY, dry_run) + __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field(8, GENLA_F_MANDATORY, wire_protocol) + __u32_field(9, GENLA_F_MANDATORY, try_connect_int) + __u32_field(10, GENLA_F_MANDATORY, timeout) + __u32_field(11, GENLA_F_MANDATORY, ping_int) + __u32_field(12, GENLA_F_MANDATORY, ping_timeo) + __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(15, GENLA_F_MANDATORY, ko_count) + __u32_field(16, GENLA_F_MANDATORY, max_buffers) + __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(22, GENLA_F_MANDATORY, rr_conflict) + __u32_field(23, GENLA_F_MANDATORY, on_congestion) + __u32_field(24, GENLA_F_MANDATORY, cong_fill) + __u32_field(25, GENLA_F_MANDATORY, cong_extents) + __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(28, GENLA_F_MANDATORY, no_cork) + __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) + __flg_field(31, GENLA_F_MANDATORY, use_rle) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, @@ -270,11 +277,10 @@ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on replication links */ -GENL_op(DRBD_ADM_SYNCER, 9, - GENL_doit(drbd_adm_syncer), +GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, + GENL_doit(drbd_adm_resource_opts), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) ) GENL_op( @@ -284,16 +290,28 @@ GENL_op( GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) ) +GENL_op( + DRBD_ADM_CHG_NET_OPTS, 29, + GENL_doit(drbd_adm_net_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on minors */ GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) ) +GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, + GENL_doit(drbd_adm_disk_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) +) + GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), @@ -301,7 +319,6 @@ GENL_op( GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) ) - /* operates on all volumes within a resource */ GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 22920a8af4e2..659a8eb38830 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -166,5 +166,7 @@ #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_PROTOCOL_DEF DRBD_PROT_C + #undef RANGE #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index c8c67239f616..e458282a3728 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -190,11 +190,12 @@ static struct nlattr *nested_attr_tb[128]; #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ - /* static, potentially unused */ \ -int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +/* *_from_attrs functions are static, but potentially unused */ \ +static int __ ## s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info, bool exclude_invariants) \ { \ const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ - struct nlattr *tla = tb[tag_number]; \ + struct nlattr *tla = info->attrs[tag_number]; \ struct nlattr **ntb = nested_attr_tb; \ struct nlattr *nla; \ int err; \ @@ -211,33 +212,49 @@ int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ \ s_fields \ return 0; \ -} +} __attribute__((unused)) \ +static int s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, false); \ +} __attribute__((unused)) \ +static int s_name ## _from_attrs_for_change(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, true); \ +} __attribute__((unused)) \ -#undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ nla = ntb[__nla_type(attr_nr)]; \ if (nla) { \ - if (s) \ - s->name = __get(nla); \ - DPRINT_FIELD("<<", nla_type, name, s, nla); \ + if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + pr_info("<< must not change invariant attr: %s\n", #name); \ + return -EEXIST; \ + } \ + assignment; \ + } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + /* attribute missing from payload, */ \ + /* which was expected */ \ } else if ((attr_flag) & GENLA_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla)) + /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ - nla = ntb[__nla_type(attr_nr)]; \ - if (nla) { \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ __get(s->name, nla, maxlen); \ - DPRINT_ARRAY("<<", nla_type, name, s, nla); \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ - pr_info("<< missing attr: %s\n", #name); \ - return -ENOMSG; \ - } \ + DPRINT_ARRAY("<<", nla_type, name, s, nla)) #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 745ebfd6c7e5..9a605b9ee834 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -59,12 +59,20 @@ enum { GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, - /* This will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb, to exclude "sensitive" - * information from broadcasts, or on unpriviledged get requests. - * This is useful because genetlink multicast groups can be listened in - * on by anyone. */ + /* Below will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb resp. _from_attrs */ + + /* To exclude "sensitive" information from broadcasts, or on + * unpriviledged get requests. This is useful because genetlink + * multicast groups can be listened in on by anyone. */ GENLA_F_SENSITIVE = 1 << 16, + + /* INVARIAN options cannot be changed at runtime. + * Useful to share an attribute policy and struct definition, + * between some "create" and "change" commands, + * but disallow certain fields to be changed online. + */ + GENLA_F_INVARIANT = 1 << 17, }; #define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) -- cgit v1.2.3 From cb703454a283d8dd5599e928eeea30367ca18874 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 24 Mar 2011 11:03:07 +0100 Subject: drbd: Converted drbd_try_outdate_peer() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_nl.c | 144 ++++++++++++++++++------------------- drivers/block/drbd/drbd_receiver.c | 7 +- drivers/block/drbd/drbd_state.c | 85 ++++++++++++++++++---- drivers/block/drbd/drbd_state.h | 5 ++ include/linux/drbd.h | 3 +- 6 files changed, 152 insertions(+), 96 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c1eb4462096e..74637cc1461c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1472,8 +1472,8 @@ extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force); -extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); -extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); +extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); +extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f1ec727f7df5..85290a9beb6d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -366,116 +366,122 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) return ret; } -enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) +static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) { + enum drbd_fencing_p fp = FP_NOT_AVAIL; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (get_ldev_if_state(mdev, D_CONSISTENT)) { + fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing); + put_ldev(mdev); + } + } + + return fp; +} + +bool conn_try_outdate_peer(struct drbd_tconn *tconn) +{ + union drbd_state mask = { }; + union drbd_state val = { }; + enum drbd_fencing_p fp; char *ex_to_string; int r; - enum drbd_disk_state nps; - enum drbd_fencing_p fp; - D_ASSERT(mdev->state.pdsk == D_UNKNOWN); + if (tconn->cstate >= C_WF_REPORT_PARAMS) { + conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n"); + return false; + } - if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } else { - dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); - nps = mdev->state.pdsk; + fp = highest_fencing_policy(tconn); + switch (fp) { + case FP_NOT_AVAIL: + conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n"); goto out; + case FP_DONT_CARE: + return true; + default: ; } - r = drbd_khelper(mdev, "fence-peer"); + r = conn_khelper(tconn, "fence-peer"); switch ((r>>8) & 0xff) { case 3: /* peer is inconsistent */ ex_to_string = "peer is inconsistent or worse"; - nps = D_INCONSISTENT; + mask.pdsk = D_MASK; + val.pdsk = D_INCONSISTENT; break; case 4: /* peer got outdated, or was already outdated */ ex_to_string = "peer was fenced"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; break; case 5: /* peer was down */ - if (mdev->state.disk == D_UP_TO_DATE) { + if (conn_highest_disk(tconn) == D_UP_TO_DATE) { /* we will(have) create(d) a new UUID anyways... */ ex_to_string = "peer is unreachable, assumed to be dead"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; } else { ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; - nps = mdev->state.pdsk; } break; case 6: /* Peer is primary, voluntarily outdate myself. * This is useful when an unconnected R_SECONDARY is asked to * become R_PRIMARY, but finds the other peer being active. */ ex_to_string = "peer is active"; - dev_warn(DEV, "Peer is primary, outdating myself.\n"); - nps = D_UNKNOWN; - _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); + conn_warn(tconn, "Peer is primary, outdating myself.\n"); + mask.disk = D_MASK; + val.disk = D_OUTDATED; break; case 7: if (fp != FP_STONITH) - dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); + conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; break; default: /* The script is broken ... */ - nps = D_UNKNOWN; - dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); - return nps; + conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); + return false; /* Eventually leave IO frozen */ } - dev_info(DEV, "fence-peer helper returned %d (%s)\n", - (r>>8) & 0xff, ex_to_string); + conn_info(tconn, "fence-peer helper returned %d (%s)\n", + (r>>8) & 0xff, ex_to_string); -out: - if (mdev->state.susp_fen && nps >= D_UNKNOWN) { - /* The handler was not successful... unfreeze here, the - state engine can not unfreeze... */ - _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE); - } + out: - return nps; + /* Not using + conn_request_state(tconn, mask, val, CS_VERBOSE); + here, because we might were able to re-establish the connection in the + meantime. */ + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate < C_WF_REPORT_PARAMS) + _conn_request_state(tconn, mask, val, CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); + + return conn_highest_pdsk(tconn) <= D_OUTDATED; } static int _try_outdate_peer_async(void *data) { - struct drbd_conf *mdev = (struct drbd_conf *)data; - enum drbd_disk_state nps; - union drbd_state ns; + struct drbd_tconn *tconn = (struct drbd_tconn *)data; - nps = drbd_try_outdate_peer(mdev); - - /* Not using - drbd_request_state(mdev, NS(pdsk, nps)); - here, because we might were able to re-establish the connection - in the meantime. This can only partially be solved in the state's - engine is_valid_state() and is_valid_state_transition() - functions. - - nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN. - pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, - therefore we have to have the pre state change check here. - */ - spin_lock_irq(&mdev->tconn->req_lock); - ns = mdev->state; - if (ns.conn < C_WF_REPORT_PARAMS) { - ns.pdsk = nps; - _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - } - spin_unlock_irq(&mdev->tconn->req_lock); + conn_try_outdate_peer(tconn); return 0; } -void drbd_try_outdate_peer_async(struct drbd_conf *mdev) +void conn_try_outdate_peer_async(struct drbd_tconn *tconn) { struct task_struct *opa; - opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev)); + opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h"); if (IS_ERR(opa)) - dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); + conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n"); } enum drbd_state_rv @@ -486,7 +492,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) int try = 0; int forced = 0; union drbd_state mask, val; - enum drbd_disk_state nps; if (new_role == R_PRIMARY) request_ping(mdev->tconn); /* Detect a dead peer ASAP */ @@ -519,32 +524,23 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_NO_UP_TO_DATE_DISK && mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { D_ASSERT(mdev->state.pdsk == D_UNKNOWN); - nps = drbd_try_outdate_peer(mdev); - if (nps == D_OUTDATED || nps == D_INCONSISTENT) { + if (conn_try_outdate_peer(mdev->tconn)) { val.disk = D_UP_TO_DATE; mask.disk = D_MASK; } - - val.pdsk = nps; - mask.pdsk = D_MASK; - continue; } if (rv == SS_NOTHING_TO_DO) goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { - nps = drbd_try_outdate_peer(mdev); - - if (force && nps > D_OUTDATED) { + if (!conn_try_outdate_peer(mdev->tconn) && force) { dev_warn(DEV, "Forced into split brain situation!\n"); - nps = D_OUTDATED; - } - - mask.pdsk = D_MASK; - val.pdsk = nps; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; + } continue; } if (rv == SS_TWO_PRIMARIES) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1fd871bc889e..91aa49f478e8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4030,9 +4030,11 @@ static void drbd_disconnect(struct drbd_tconn *tconn) drbd_free_sock(tconn); idr_for_each(&tconn->volumes, drbd_disconnected, tconn); - conn_info(tconn, "Connection closed\n"); + if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) + conn_try_outdate_peer_async(tconn); + spin_lock_irq(&tconn->req_lock); oc = tconn->cstate; if (oc >= C_UNCONNECTED) @@ -4109,9 +4111,6 @@ static int drbd_disconnected(int vnr, void *p, void *data) put_ldev(mdev); } - if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) - drbd_try_outdate_peer_async(mdev); - /* serialize with bitmap writeout triggered by the state change, * if any. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 52ff1c7379e9..b4f668db3296 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -61,6 +61,73 @@ bool conn_all_vols_unconf(struct drbd_tconn *tconn) return true; } +/* Unfortunately the states where not correctly ordered, when + they where defined. therefore can not use max_t() here. */ +static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2) +{ + if (role1 == R_PRIMARY || role2 == R_PRIMARY) + return R_PRIMARY; + if (role1 == R_SECONDARY || role2 == R_SECONDARY) + return R_SECONDARY; + return R_UNKNOWN; +} +static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) +{ + if (role1 == R_UNKNOWN || role2 == R_UNKNOWN) + return R_UNKNOWN; + if (role1 == R_SECONDARY || role2 == R_SECONDARY) + return R_SECONDARY; + return R_PRIMARY; +} + +enum drbd_role conn_highest_role(struct drbd_tconn *tconn) +{ + enum drbd_role role = R_UNKNOWN; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + role = max_role(role, mdev->state.role); + + return role; +} + +enum drbd_role conn_highest_peer(struct drbd_tconn *tconn) +{ + enum drbd_role peer = R_UNKNOWN; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + peer = max_role(peer, mdev->state.peer); + + return peer; +} + +enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_DISKLESS; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = max_t(enum drbd_disk_state, ds, mdev->state.disk); + + return ds; +} + +enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_DISKLESS; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk); + + return ds; +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. @@ -329,18 +396,6 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio dev_info(DEV, "%s\n", pb); } -static bool vol_has_primary_peer(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr; - - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.peer == R_PRIMARY) - return true; - } - return false; -} - /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -364,7 +419,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { if (ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - else if (vol_has_primary_peer(mdev->tconn)) + else if (conn_highest_peer(mdev->tconn) == R_PRIMARY) rv = SS_O_VOL_PEER_PRI; } put_net_conf(mdev->tconn); @@ -1390,8 +1445,8 @@ static int _set_state_itr_fn(int vnr, void *p, void *data) rv = __drbd_set_state(mdev, ns, flags, NULL); - ms.role = max_t(enum drbd_role, mdev->state.role, ms.role); - ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer); + ms.role = max_role(ns.role, ms.role); + ms.peer = max_role(ns.peer, ms.peer); ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk); ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk); params->ms = ms; diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 55df0728bc88..394a1998acd9 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -110,4 +110,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev, return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); } +enum drbd_role conn_highest_role(struct drbd_tconn *tconn); +enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); +enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); +enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); + #endif diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9cdb888607ae..60d308819096 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -65,7 +65,8 @@ enum drbd_io_error_p { }; enum drbd_fencing_p { - FP_DONT_CARE, + FP_NOT_AVAIL = -1, /* Not a policy */ + FP_DONT_CARE = 0, FP_RESOURCE, FP_STONITH }; -- cgit v1.2.3 From 0c8e36d9b843be56e4e43d4ef3c3eb6a97205599 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 16:00:17 +0200 Subject: drbd: Introduce protocol version 100 headers The 8 byte header finally becomes too small. With the protocol 100 header we have 16 bit for the volume number, proper 32 bit for the data length, and 32 bit for further extensions in the future. Previous versions of drbd are using version 80 headers for all packets short enough for protocol 80. They support both header versions in worker context, but only version 80 headers in asynchronous context. For backwards compatibility, continue to use version 80 headers for short packets before protocol version 100. From protocol version 100 on, use the same header version for all packets. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++++++ drivers/block/drbd/drbd_main.c | 32 ++++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 14 ++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 2 -- include/linux/drbd_limits.h | 2 ++ 7 files changed, 50 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6d55bb75a081..bf1aad683387 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -307,6 +307,14 @@ struct p_header95 { u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ } __packed; +struct p_header100 { + u32 magic; + u16 volume; + u16 command; + u32 length; + u32 pad; +} __packed; + extern unsigned int drbd_header_size(struct drbd_tconn *tconn); /* these defines must not be changed without changing the protocol version */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b9dcc50135c4..5d9112cefcd7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -698,9 +698,15 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) */ unsigned int drbd_header_size(struct drbd_tconn *tconn) { - BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); - BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); - return sizeof(struct p_header80); + if (tconn->agreed_pro_version >= 100) { + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); + return sizeof(struct p_header100); + } else { + BUILD_BUG_ON(sizeof(struct p_header80) != + sizeof(struct p_header95)); + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); + return sizeof(struct p_header80); + } } static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) @@ -719,10 +725,24 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, return sizeof(struct p_header95); } -static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer, - enum drbd_packet cmd, int size) +static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd, + int size, int vnr) +{ + h->magic = cpu_to_be32(DRBD_MAGIC_100); + h->volume = cpu_to_be16(vnr); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); + h->pad = 0; + return sizeof(struct p_header100); +} + +static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, + void *buffer, enum drbd_packet cmd, int size) { - if (tconn->agreed_pro_version >= 95) + if (tconn->agreed_pro_version >= 100) + return prepare_header100(buffer, cmd, size, vnr); + else if (tconn->agreed_pro_version >= 95 && + size > DRBD_MAX_SIZE_H80_PACKET) return prepare_header95(buffer, cmd, size); else return prepare_header80(buffer, cmd, size); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d9bb1a5c756a..0f52b88719c8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2833,8 +2833,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) retcode = ERR_INVALID_REQUEST; goto out; } - /* FIXME we need a define here */ - if (adm_ctx.volume >= 256) { + if (adm_ctx.volume > DRBD_VOLUME_MAX) { drbd_msg_put_info("requested volume id out of range"); retcode = ERR_INVALID_REQUEST; goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7e0ab2246fb6..311b95453cb7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -983,8 +983,18 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i { unsigned int header_size = drbd_header_size(tconn); - if (header_size == sizeof(struct p_header95) && - *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { + if (header_size == sizeof(struct p_header100) && + *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { + struct p_header100 *h = header; + if (h->pad != 0) { + conn_err(tconn, "Header padding is not zero\n"); + return -EINVAL; + } + pi->vnr = be16_to_cpu(h->volume); + pi->cmd = be16_to_cpu(h->command); + pi->size = be32_to_cpu(h->length); + } else if (header_size == sizeof(struct p_header95) && + *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { struct p_header95 *h = header; pi->cmd = be16_to_cpu(h->command); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 60d308819096..fe8d6ba31bcb 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -341,6 +341,7 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +#define DRBD_MAGIC_100 0x8620ec20 /* how I came up with this magic? * base64 decode "actlog==" ;) */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 938e8560a833..10144d546a66 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -95,8 +95,6 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - /* currently only 256 volumes per group, - * but maybe we still change that */ __u32_field(1, GENLA_F_MANDATORY, ctx_volume) __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) ) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 659a8eb38830..7f5149bef70e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -19,6 +19,8 @@ #define DRBD_MINOR_COUNT_MAX 256 #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_VOLUME_MAX 65535 + #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 -- cgit v1.2.3 From b032b6fa3528d6eed972db32257cb316a66e0dac Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 18:16:10 -0700 Subject: drbd: Allow online change of replication protocol only with agreed_pv >= 100 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++++ include/linux/drbd.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 40de384aade6..d4b29fd603f4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1671,6 +1671,11 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) struct drbd_conf *mdev; int i; + if (tconn->net_conf && tconn->agreed_pro_version < 100 && + tconn->cstate == C_WF_REPORT_PARAMS && + new_conf->wire_protocol != tconn->net_conf->wire_protocol) + return ERR_NEED_APV_100; + if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) return ERR_NOT_PROTO_C; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index fe8d6ba31bcb..6c7c85d8fc41 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -160,6 +160,7 @@ enum drbd_ret_code { ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, + ERR_NEED_APV_100 = 163, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3 From d8cd289dbe69ce9b8115d6f200ceff657e5dafa0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 12:27:11 +0200 Subject: drbd: Remove left-over unused define Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7f5149bef70e..bcebb016fda3 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,5 +170,4 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C -#undef RANGE #endif -- cgit v1.2.3 From b966b5dd8e17e6c105ca55533fd412de5d5b429e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 14:56:09 +0200 Subject: drbd: Generate the drbd_set_*_defaults() functions from drbd_genl.h Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 86 +++++------------------------------- include/linux/drbd_genl.h | 91 +++++++++++++++++++-------------------- include/linux/genl_magic_func.h | 26 +++++++++++ include/linux/genl_magic_struct.h | 8 ++++ 6 files changed, 91 insertions(+), 124 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 44f77265d2b0..8655fcb82028 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1390,7 +1390,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ -extern void drbd_set_res_opts_default(struct res_opts *r); +extern void drbd_set_res_opts_defaults(struct res_opts *r); extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 427e959e4869..4ae3e7a99d7c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2488,7 +2488,7 @@ struct drbd_tconn *conn_create(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); - drbd_set_res_opts_default(&tconn->res_opts); + drbd_set_res_opts_defaults(&tconn->res_opts); down_write(&drbd_cfg_rwsem); kref_init(&tconn->kref); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7320ac00f0fb..f5732cf46c2f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1090,77 +1090,6 @@ static bool should_set_defaults(struct genl_info *info) return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); } -/* Maybe we should we generate these functions - * from the drbd_genl.h magic as well? - * That way we would not "accidentally forget" to add defaults here. */ - -#define RESET_ARRAY_FIELD(field) do { \ - memset(field, 0, sizeof(field)); \ - field ## _len = 0; \ -} while (0) -void drbd_set_res_opts_default(struct res_opts *r) -{ - RESET_ARRAY_FIELD(r->cpu_mask); - r->on_no_data = DRBD_ON_NO_DATA_DEF; -} - -static void drbd_set_net_conf_defaults(struct net_conf *nc) -{ - /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT - * in drbd_genl.h, they can only be change with disconnect/reconnect */ - RESET_ARRAY_FIELD(nc->shared_secret); - - RESET_ARRAY_FIELD(nc->cram_hmac_alg); - RESET_ARRAY_FIELD(nc->integrity_alg); - RESET_ARRAY_FIELD(nc->verify_alg); - RESET_ARRAY_FIELD(nc->csums_alg); -#undef RESET_ARRAY_FIELD - - nc->wire_protocol = DRBD_PROTOCOL_DEF; - nc->try_connect_int = DRBD_CONNECT_INT_DEF; - nc->timeout = DRBD_TIMEOUT_DEF; - nc->ping_int = DRBD_PING_INT_DEF; - nc->ping_timeo = DRBD_PING_TIMEO_DEF; - nc->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - nc->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - nc->ko_count = DRBD_KO_COUNT_DEF; - nc->max_buffers = DRBD_MAX_BUFFERS_DEF; - nc->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - nc->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - nc->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - nc->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - nc->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - nc->rr_conflict = DRBD_RR_CONFLICT_DEF; - nc->on_congestion = DRBD_ON_CONGESTION_DEF; - nc->cong_fill = DRBD_CONG_FILL_DEF; - nc->cong_extents = DRBD_CONG_EXTENTS_DEF; - nc->two_primaries = 0; - nc->no_cork = 0; - nc->always_asbp = 0; - nc->use_rle = 0; -} - -static void drbd_set_disk_conf_defaults(struct disk_conf *dc) -{ - /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT - * in drbd_genl.h, they can only be change with detach/reattach */ - dc->on_io_error = DRBD_ON_IO_ERROR_DEF; - dc->fencing = DRBD_FENCING_DEF; - dc->resync_rate = DRBD_RATE_DEF; - dc->resync_after = DRBD_AFTER_DEF; - dc->al_extents = DRBD_AL_EXTENTS_DEF; - dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - dc->c_delay_target = DRBD_C_DELAY_TARGET_DEF; - dc->c_fill_target = DRBD_C_FILL_TARGET_DEF; - dc->c_max_rate = DRBD_C_MAX_RATE_DEF; - dc->c_min_rate = DRBD_C_MIN_RATE_DEF; - dc->no_disk_barrier = 0; - dc->no_disk_flush = 0; - dc->no_disk_drain = 0; - dc->no_md_flush = 0; -} - - int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1198,7 +1127,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf)); if (should_set_defaults(info)) - drbd_set_disk_conf_defaults(new_disk_conf); + set_disk_conf_defaults(new_disk_conf); err = disk_conf_from_attrs_for_change(new_disk_conf, info); if (err) { @@ -1315,7 +1244,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - drbd_set_disk_conf_defaults(&nbc->dc); + set_disk_conf_defaults(&nbc->dc); err = disk_conf_from_attrs(&nbc->dc, info); if (err) { @@ -1911,7 +1840,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) *new_conf = *old_conf; if (should_set_defaults(info)) - drbd_set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_conf); err = net_conf_from_attrs_for_change(new_conf, info); if (err) { @@ -2029,7 +1958,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - drbd_set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_conf); err = net_conf_from_attrs(new_conf, info); if (err) { @@ -2301,6 +2230,11 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } +void drbd_set_res_opts_defaults(struct res_opts *r) +{ + return set_res_opts_defaults(r); +} + int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2325,7 +2259,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) res_opts = tconn->res_opts; if (should_set_defaults(info)) - drbd_set_res_opts_default(&res_opts); + set_res_opts_defaults(&res_opts); err = res_opts_from_attrs(&res_opts, info); if (err) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 10144d546a66..549800668cb9 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -110,63 +110,62 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, * but it won't propagate through the stack */ __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) - __u32_field(6, GENLA_F_MANDATORY, on_io_error) - __u32_field(7, GENLA_F_MANDATORY, fencing) - - __u32_field(8, GENLA_F_MANDATORY, resync_rate) - __u32_field(9, GENLA_F_MANDATORY, resync_after) - __u32_field(10, GENLA_F_MANDATORY, al_extents) - __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(12, GENLA_F_MANDATORY, c_delay_target) - __u32_field(13, GENLA_F_MANDATORY, c_fill_target) - __u32_field(14, GENLA_F_MANDATORY, c_max_rate) - __u32_field(15, GENLA_F_MANDATORY, c_min_rate) - - __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(19, GENLA_F_MANDATORY, no_md_flush) - + __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) + + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + + __flg_field_def(16, GENLA_F_MANDATORY, no_disk_barrier, 0) + __flg_field_def(17, GENLA_F_MANDATORY, no_disk_flush, 0) + __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) + __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) - __u32_field(2, GENLA_F_MANDATORY, on_no_data) + __str_field_def(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) - __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __str_field_def(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field(8, GENLA_F_MANDATORY, wire_protocol) - __u32_field(9, GENLA_F_MANDATORY, try_connect_int) - __u32_field(10, GENLA_F_MANDATORY, timeout) - __u32_field(11, GENLA_F_MANDATORY, ping_int) - __u32_field(12, GENLA_F_MANDATORY, ping_timeo) - __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(15, GENLA_F_MANDATORY, ko_count) - __u32_field(16, GENLA_F_MANDATORY, max_buffers) - __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(22, GENLA_F_MANDATORY, rr_conflict) - __u32_field(23, GENLA_F_MANDATORY, on_congestion) - __u32_field(24, GENLA_F_MANDATORY, cong_fill) - __u32_field(25, GENLA_F_MANDATORY, cong_extents) - __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __str_field_def(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, try_connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(13, GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(14, GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(15, GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(16, GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(17, GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(18, GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(19, GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(20, GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(21, GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(22, GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field(28, GENLA_F_MANDATORY, no_cork) - __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field_def(28, GENLA_F_MANDATORY, no_cork, 0) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field(31, GENLA_F_MANDATORY, use_rle) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index e458282a3728..e908f1c50355 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -427,6 +427,32 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #include GENL_MAGIC_INCLUDE_FILE + +/* Functions for initializing structs to default values. */ + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) +#undef __u32_field_def +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __flg_field_def +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __str_field_def +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + memset(x->name, 0, sizeof(x->name)); \ + x->name ## _len = 0; +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \ +static void set_ ## s_name ## _defaults(struct s_name *x) { \ +s_fields \ +} + +#include GENL_MAGIC_INCLUDE_FILE + #endif /* __KERNEL__ */ /* }}}1 */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 9a605b9ee834..f2c7cc7831df 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -107,6 +107,14 @@ enum { __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ nla_memcpy, NLA_PUT) +/* fields with default values */ +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + __flg_field(attr_nr, attr_flag, name) +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + __u32_field(attr_nr, attr_flag, name) +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + __str_field(attr_nr, attr_flag, name, maxlen) + #define __nla_put_flag(skb, attrtype, value) \ do { \ if (value) \ -- cgit v1.2.3 From 563e4cf25ec804eb02cd30a41baa2fcc6c06679b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 4 May 2011 10:33:52 +0200 Subject: drbd: Introduce __s32_field in the genetlink macro magic ...and drop explicit typecasts (int)meta_dev_idx < 0. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++---- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_struct.h | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a1854e3aa15e..b8ea4807c981 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1253,7 +1253,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1289,7 +1289,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) */ bdev = blkdev_get_by_path(nbc->dc.meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - ((int)nbc->dc.meta_dev_idx < 0) ? + (nbc->dc.meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, @@ -1325,7 +1325,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if ((int)nbc->dc.meta_dev_idx < 0) { + if (nbc->dc.meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); @@ -1356,7 +1356,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if ((int)nbc->dc.meta_dev_idx >= 0) + if (nbc->dc.meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 549800668cb9..f143e3c0f33b 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,7 +102,7 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) - __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + __s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f2c7cc7831df..ddbdd0a24476 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -97,6 +97,9 @@ enum { #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ nla_get_u32, NLA_PUT_U32) +#define __s32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ + nla_get_u32, NLA_PUT_U32) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ nla_get_u64, NLA_PUT_U64) -- cgit v1.2.3 From a5d8e1fb9d22851de89bbf52db6b11c56b895dd4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 16:06:51 +0200 Subject: drbd: Convert boolean flags on netlink from NLA_FLAG to NLA_U8 Flags of type NLA_FLAG are either present or absent, but do not have a value by themselves. Use type NLA_U8 for our boolean flags instead, and use the value to determine if the flag should be on or off. On the drbdsetup command line, all those flags have an optional yes/no argument which defaults to yes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_struct.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index ddbdd0a24476..b1ddbb5bd725 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -86,8 +86,8 @@ enum { /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ - __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ - nla_get_flag, __nla_put_flag) + __field(attr_nr, attr_flag, name, NLA_U8, char, \ + nla_get_u8, NLA_PUT_U8) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ nla_get_u8, NLA_PUT_U8) @@ -118,12 +118,6 @@ enum { #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ __str_field(attr_nr, attr_flag, name, maxlen) -#define __nla_put_flag(skb, attrtype, value) \ - do { \ - if (value) \ - NLA_PUT_FLAG(skb, attrtype); \ - } while (0) - #define GENL_op_init(args...) args #define GENL_doit(handler) \ .doit = handler, \ -- cgit v1.2.3 From 66b2f6b9c59c5e7003e13281dfe72e174f93988c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-disk-flushes into disk-flushes={yes|no} Change the --no-disk-flushes drbdsetup command line option as well as the no_disk_flush netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 83d39859a9fe..e7a6eeae94e2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1179,7 +1179,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) rcu_read_lock(); dc = rcu_dereference(mdev->ldev->disk_conf); - if (wo == WO_bdev_flush && dc->no_disk_flush) + if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; if (wo == WO_drain_io && dc->no_disk_drain) wo = WO_none; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index f143e3c0f33b..945c4dd3470c 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,8 +122,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, no_disk_barrier, 0) - __flg_field_def(17, GENLA_F_MANDATORY, no_disk_flush, 0) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) -- cgit v1.2.3 From d0c980e236243cd03aa2291243587ac1ba3c2b04 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-disk-drain into disk-drain={yes|no} Change the --no-disk-drain drbdsetup command line option as well as the no_disk_drain netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e7a6eeae94e2..5d1bdda8ec9f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1181,7 +1181,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; - if (wo == WO_drain_io && dc->no_disk_drain) + if (wo == WO_drain_io && !dc->disk_drain) wo = WO_none; rcu_read_unlock(); mdev->write_ordering = wo; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 945c4dd3470c..30ad6600b444 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -124,7 +124,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) -- cgit v1.2.3 From e544046ab842ab93c275a6fc4e043c1cb637076d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-md-flushes into md-flushes={yes|no} Change the --no-md-flushes drbdsetup command line option as well as the no_md_flush netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 6 +++--- include/linux/drbd_genl.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9af097416e26..4a946a877bde 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1449,10 +1449,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ - if (new_disk_conf->no_md_flush) - set_bit(MD_NO_FUA, &mdev->flags); - else + if (new_disk_conf->md_flushes) clear_bit(MD_NO_FUA, &mdev->flags); + else + set_bit(MD_NO_FUA, &mdev->flags); /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 30ad6600b444..53518fc23154 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -125,7 +125,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, -- cgit v1.2.3 From bb77d34ecc6fe6cdc3f4f0841a516695c2eacc04 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-tcp-cork into tcp-cork={yes|no} Change the --no-tcp-cork drbdsetup command line option as well as the no_cork netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 2 +- include/linux/drbd_genl.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5d1bdda8ec9f..b4858bb78940 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5040,7 +5040,7 @@ int drbd_asender(struct drbd_thread *thi) int expect = header_size; bool ping_timeout_active = false; struct net_conf *nc; - int ping_timeo, no_cork, ping_int; + int ping_timeo, tcp_cork, ping_int; current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ @@ -5051,7 +5051,7 @@ int drbd_asender(struct drbd_thread *thi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); ping_timeo = nc->ping_timeo; - no_cork = nc->no_cork; + tcp_cork = nc->tcp_cork; ping_int = nc->ping_int; rcu_read_unlock(); @@ -5066,14 +5066,14 @@ int drbd_asender(struct drbd_thread *thi) /* TODO: conditionally cork; it may hurt latency if we cork without much to send */ - if (!no_cork) + if (tcp_cork) drbd_tcp_cork(tconn->meta.socket); if (tconn_finish_peer_reqs(tconn)) { conn_err(tconn, "tconn_finish_peer_reqs() failed\n"); goto reconnect; } /* but unconditionally uncork unless disabled */ - if (!no_cork) + if (tcp_cork) drbd_tcp_uncork(tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e37c42d5dd6e..78c3de49eff6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1694,7 +1694,7 @@ int drbd_worker(struct drbd_thread *thi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - cork = nc ? !nc->no_cork : 0; + cork = nc ? nc->tcp_cork : 0; rcu_read_unlock(); if (tconn->data.socket && cork) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 53518fc23154..6632d10f1ee1 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -162,7 +162,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, no_cork, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) -- cgit v1.2.3 From 7bac3e6f7e74993475a94487effe05dc1f68bdc7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 17:50:57 +0200 Subject: drbd: Also define the default values of boolean flags in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 16 ++++++++-------- include/linux/drbd_limits.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6632d10f1ee1..02647dc8c67c 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,10 +122,10 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -160,12 +160,12 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index bcebb016fda3..3d3e2d5125cb 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,4 +170,14 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C +#define DRBD_DISK_BARRIER_DEF 0 +#define DRBD_DISK_FLUSHES_DEF 1 +#define DRBD_DISK_DRAIN_DEF 1 +#define DRBD_MD_FLUSHES_DEF 1 +#define DRBD_TCP_CORK_DEF 1 + +#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 +#define DRBD_ALWAYS_ASBP_DEF 0 +#define DRBD_USE_RLE_DEF 0 + #endif -- cgit v1.2.3 From 6139f60dc192e2c5478c1126d1aff7905dc0a98a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 20:00:02 +0200 Subject: drbd: Rename the want_lose field/flag to discard_my_data This is what it is called in config files and on the command line as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- include/linux/drbd_genl.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 56b190c65546..fa36757ffc4a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -413,7 +413,7 @@ struct p_rs_param_95 { } __packed; enum drbd_conn_flags { - CF_WANT_LOSE = 1, + CF_DISCARD_MY_DATA = 1, CF_DRY_RUN = 2, }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 22c2b4c881da..86c8bc5ac603 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -943,8 +943,8 @@ int __drbd_send_protocol(struct drbd_tconn *tconn) p->after_sb_2p = cpu_to_be32(nc->after_sb_2p); p->two_primaries = cpu_to_be32(nc->two_primaries); cf = 0; - if (nc->want_lose) - cf |= CF_WANT_LOSE; + if (nc->discard_my_data) + cf |= CF_DISCARD_MY_DATA; if (nc->dry_run) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); @@ -988,7 +988,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); rcu_read_lock(); - uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0; + uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0; rcu_read_unlock(); uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 195428ee6052..9a82306adf92 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -606,7 +606,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) mutex_lock(&mdev->tconn->conf_update); nc = mdev->tconn->net_conf; if (nc) - nc->want_lose = 0; /* without copy; single bit op is atomic */ + nc->discard_my_data = 0; /* without copy; single bit op is atomic */ mutex_unlock(&mdev->tconn->conf_update); set_disk_ro(mdev->vdisk, false); @@ -1738,7 +1738,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) + if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) return ERR_DISCARD; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aa42967398e3..e4e8f8a408d1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2908,9 +2908,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (nc->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + if (nc->discard_my_data && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!nc->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + if (!nc->discard_my_data && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -3009,7 +3009,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_protocol *p = pi->data; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_want_lose, p_two_primaries, cf; + int p_discard_my_data, p_two_primaries, cf; struct net_conf *nc; p_proto = be32_to_cpu(p->protocol); @@ -3018,7 +3018,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) p_after_sb_2p = be32_to_cpu(p->after_sb_2p); p_two_primaries = be32_to_cpu(p->two_primaries); cf = be32_to_cpu(p->conn_flags); - p_want_lose = cf & CF_WANT_LOSE; + p_discard_my_data = cf & CF_DISCARD_MY_DATA; if (tconn->agreed_pro_version >= 87) { char integrity_alg[SHARED_SECRET_MAX]; @@ -3075,8 +3075,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } - if (p_want_lose && nc->want_lose) { - conn_err(tconn, "both sides have the 'want_lose' flag set\n"); + if (p_discard_my_data && nc->discard_my_data) { + conn_err(tconn, "both sides have the 'discard_my_data' flag set\n"); goto disconnect_rcu_unlock; } @@ -3806,7 +3806,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } mutex_lock(&mdev->tconn->conf_update); - mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */ + mdev->tconn->net_conf->discard_my_data = 0; /* without copy; single bit op is atomic */ mutex_unlock(&mdev->tconn->conf_update); drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 02647dc8c67c..6aece551d87e 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -161,7 +161,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, discard_my_data) __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) -- cgit v1.2.3 From 6394b9358e6187414b7a6de7ba2c681ee4a790ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:29:52 +0200 Subject: drbd: Refer to resync-rate consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa36757ffc4a..8026adacd3d2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -389,21 +389,21 @@ struct p_barrier_ack { } __packed; struct p_rs_param { - u32 rate; + u32 resync_rate; /* Since protocol version 88 and higher. */ char verify_alg[0]; } __packed; struct p_rs_param_89 { - u32 rate; + u32 resync_rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; } __packed; struct p_rs_param_95 { - u32 rate; + u32 resync_rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; u32 c_plan_ahead; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 86c8bc5ac603..26d7763d5255 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -888,14 +888,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev) if (get_ldev(mdev)) { dc = rcu_dereference(mdev->ldev->disk_conf); - p->rate = cpu_to_be32(dc->resync_rate); + p->resync_rate = cpu_to_be32(dc->resync_rate); p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); p->c_delay_target = cpu_to_be32(dc->c_delay_target); p->c_fill_target = cpu_to_be32(dc->c_fill_target); p->c_max_rate = cpu_to_be32(dc->c_max_rate); put_ldev(mdev); } else { - p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e4e8f8a408d1..684f79542727 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) old_disk_conf = mdev->ldev->disk_conf; *new_disk_conf = *old_disk_conf; - new_disk_conf->resync_rate = be32_to_cpu(p->rate); + new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); } if (apv >= 88) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6aece551d87e..778708d92939 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -113,7 +113,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3d3e2d5125cb..48339ae69d50 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -98,10 +98,11 @@ /* syncer { */ /* FIXME allow rate to be zero? */ -#define DRBD_RATE_MIN 1 +#define DRBD_RESYNC_RATE_MIN 1 /* channel bonding 10 GbE, or other hardware */ -#define DRBD_RATE_MAX (4 << 20) -#define DRBD_RATE_DEF 250 /* kb/second */ +#define DRBD_RESYNC_RATE_MAX (4 << 20) +#define DRBD_RESYNC_RATE_DEF 250 +#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ /* less than 7 would hit performance unnecessarily. * 919 slots context information per transaction, -- cgit v1.2.3 From 69ef82dea4c34e4a0541fc3f415b0fef70fe12b0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:34:35 +0200 Subject: drbd: Refer to connect-int consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 12 ++++++------ include/linux/drbd_genl.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 684f79542727..7deade196a33 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -617,7 +617,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) struct sockaddr_in6 peer_in6; struct net_conf *nc; int err, peer_addr_len, my_addr_len; - int sndbuf_size, rcvbuf_size, try_connect_int; + int sndbuf_size, rcvbuf_size, connect_int; int disconnect_on_error = 1; rcu_read_lock(); @@ -629,7 +629,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - try_connect_int = nc->try_connect_int; + connect_int = nc->connect_int; my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); memcpy(&src_in6, nc->my_addr, my_addr_len); @@ -653,7 +653,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = try_connect_int * HZ; + sock->sk->sk_sndtimeo = connect_int * HZ; drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); /* explicitly bind to the configured IP as source IP @@ -702,7 +702,7 @@ out: static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) { int timeo, err, my_addr_len; - int sndbuf_size, rcvbuf_size, try_connect_int; + int sndbuf_size, rcvbuf_size, connect_int; struct socket *s_estab = NULL, *s_listen; struct sockaddr_in6 my_addr; struct net_conf *nc; @@ -717,7 +717,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - try_connect_int = nc->try_connect_int; + connect_int = nc->connect_int; my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); memcpy(&my_addr, nc->my_addr, my_addr_len); @@ -731,7 +731,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) goto out; } - timeo = try_connect_int * HZ; + timeo = connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 778708d92939..67c816c0fc28 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -143,7 +143,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, try_connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) -- cgit v1.2.3 From 95f8efd08bcce65df994049a292b94e56c7ada67 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 11:15:34 +0200 Subject: drbd: Fix the upper limit of resync-after The 32-bit resync_after netlink field takes a device minor number as parameter, which is no longer limited to 255. We cannot statically verify which device numbers are valid, so set the ummer limit to the highest possible signed 32-bit integer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 26 +++++++++++++------------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 6 files changed, 24 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8026adacd3d2..e16722840767 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1408,8 +1408,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor); -void drbd_sync_after_changed(struct drbd_conf *mdev); +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); +void drbd_resync_after_changed(struct drbd_conf *mdev); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9a82306adf92..74c27f1507f3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1183,10 +1183,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } write_lock_irq(&global_state_lock); - retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); + retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); if (retcode == NO_ERROR) { rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - drbd_sync_after_changed(mdev); + drbd_resync_after_changed(mdev); } write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec8f4245ef9a..6410c55831e0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -57,7 +57,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel); /* About the global_state_lock Each state transition on an device holds a read lock. In case we have - to evaluate the sync after dependencies, we grab a write lock, because + to evaluate the resync after dependencies, we grab a write lock, because we need stable states on all devices for that. */ rwlock_t global_state_lock; @@ -1340,17 +1340,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; - int ra; + int resync_after; while (1) { if (!odev->ldev) return 1; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); - if (ra == -1) + if (resync_after == -1) return 1; - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1426,36 +1426,36 @@ void suspend_other_sg(struct drbd_conf *mdev) } /* caller must hold global_state_lock */ -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; - int ra; + int resync_after; if (o_minor == -1) return NO_ERROR; if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) - return ERR_SYNC_AFTER; + return ERR_RESYNC_AFTER; /* check for loops */ odev = minor_to_mdev(o_minor); while (1) { if (odev == mdev) - return ERR_SYNC_AFTER_CYCLE; + return ERR_RESYNC_AFTER_CYCLE; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); /* dependency chain ends here, no cycles. */ - if (ra == -1) + if (resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); } } /* caller must hold global_state_lock */ -void drbd_sync_after_changed(struct drbd_conf *mdev) +void drbd_resync_after_changed(struct drbd_conf *mdev) { int changes; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 6c7c85d8fc41..05063e6db81f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -130,8 +130,8 @@ enum drbd_ret_code { ERR_INTR = 129, /* EINTR */ ERR_RESIZE_RESYNC = 130, ERR_NO_PRIMARY = 131, - ERR_SYNC_AFTER = 132, - ERR_SYNC_AFTER_CYCLE = 133, + ERR_RESYNC_AFTER = 132, + ERR_RESYNC_AFTER_CYCLE = 133, ERR_PAUSE_IS_SET = 134, ERR_PAUSE_IS_CLEAR = 135, ERR_PACKET_NR = 137, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 67c816c0fc28..a59466f7f661 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 48339ae69d50..c4a8f0fef7b2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,9 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_AFTER_MIN -1 -#define DRBD_AFTER_MAX 255 -#define DRBD_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_MIN -1 +#define DRBD_RESYNC_AFTER_MAX (1<<30) +#define DRBD_RESYNC_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_SCALE '1' /* } */ -- cgit v1.2.3 From 3a45abd577727d2268e190d372600f8652883453 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 12:02:54 +0200 Subject: drbd: Convert resync-after into a signed netlink field Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 3 +++ include/linux/genl_magic_struct.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a59466f7f661..7b174a093a8d 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) + __s32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index e908f1c50355..94e839aafae3 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -437,6 +437,9 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __u32_field_def #define __u32_field_def(attr_nr, attr_flag, name, default) \ x->name = default; +#undef __s32_field_def +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; #undef __flg_field_def #define __flg_field_def(attr_nr, attr_flag, name, default) \ x->name = default; diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index b1ddbb5bd725..0fca21fd1af5 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -115,6 +115,8 @@ enum { __flg_field(attr_nr, attr_flag, name) #define __u32_field_def(attr_nr, attr_flag, name, default) \ __u32_field(attr_nr, attr_flag, name) +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + __s32_field(attr_nr, attr_flag, name) #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ __str_field(attr_nr, attr_flag, name, maxlen) -- cgit v1.2.3 From c5482bbd9607bf38cbc952eacaa429e6ba3160a0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:44:55 +0200 Subject: drbd: Rename DISK_SIZE_SECT -> DISK_SIZE We don't have the units in constant names in other places, either. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index c4a8f0fef7b2..8f8bbea545e0 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,9 +125,10 @@ * the upper limit with 64bit kernel, enough ram and flexible meta data * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ -#define DRBD_DISK_SIZE_SECT_MIN 0 -#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) -#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_MIN 0 +#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_SCALE 's' /* sectors */ #define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE -- cgit v1.2.3 From dcb20d1a8e7d9602e52a9b673ae4d7f746d2cbb2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 14:30:24 +0200 Subject: drbd: Refuse to change network options online when... * the peer does not speak protocol_version 100 and the user wants to change one of: - wire_protocol - two_primaries - integrity_alg * the user wants to remove the allow_two_primaries flag when there are two primaries Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 22 ++++++++++++++++++---- include/linux/drbd.h | 1 + 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 74c27f1507f3..133a6724657d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1722,10 +1722,24 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n struct drbd_conf *mdev; int i; - if (old_conf && tconn->agreed_pro_version < 100 && - tconn->cstate == C_WF_REPORT_PARAMS && - new_conf->wire_protocol != old_conf->wire_protocol) - return ERR_NEED_APV_100; + if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) { + if (new_conf->wire_protocol != old_conf->wire_protocol) + return ERR_NEED_APV_100; + + if (new_conf->two_primaries != old_conf->two_primaries) + return ERR_NEED_APV_100; + + if (!new_conf->integrity_alg != !old_conf->integrity_alg) + return ERR_NEED_APV_100; + + if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg)) + return ERR_NEED_APV_100; + } + + if (!new_conf->two_primaries && + conn_highest_role(tconn) == R_PRIMARY && + conn_highest_peer(tconn) == R_PRIMARY) + return ERR_NEED_ALLOW_TWO_PRI; if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 05063e6db81f..679e81123229 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -161,6 +161,7 @@ enum drbd_ret_code { ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, ERR_NEED_APV_100 = 163, + ERR_NEED_ALLOW_TWO_PRI = 164, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3 From 309f0b70ab789bf85c5f5f32dbc466d42f024747 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 May 2011 01:24:14 +0200 Subject: drbd: Use more generic constant names These constants are useful for the same purpose in more than one place. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 8f8bbea545e0..3627f760966e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,10 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_RESYNC_AFTER_MIN -1 -#define DRBD_RESYNC_AFTER_MAX (1<<30) -#define DRBD_RESYNC_AFTER_DEF -1 -#define DRBD_RESYNC_AFTER_SCALE '1' +#define DRBD_MINOR_NUMBER_MIN -1 +#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_DEF -1 +#define DRBD_MINOR_NUMBER_SCALE '1' /* } */ -- cgit v1.2.3 From 509100e6012db92f4af3796436b450447c6c8268 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 17 May 2011 13:29:46 +0200 Subject: drbd: Output signed / unsigned netlink fields correctly Note: All input values are still treated as signed; unsigned long long values are still broken. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_func.h | 23 +++++++++++------ include/linux/genl_magic_struct.h | 52 ++++++++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 94e839aafae3..2ae16126c6a4 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -78,12 +78,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ { s_fields }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ + __put, __is_signed) \ [__nla_type(attr_nr)] = { .type = nla_type }, #undef __array #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ - __get, __put) \ + __get, __put, __is_signed) \ [__nla_type(attr_nr)] = { .type = nla_type, \ .len = maxlen - (nla_type == NLA_NUL_STRING) }, @@ -241,7 +242,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ } #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name = __get(nla); \ @@ -249,7 +251,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ @@ -410,14 +413,16 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ __put(skb, attr_nr, s->name); \ } #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ __put(skb, attr_nr, min_t(int, maxlen, \ @@ -431,9 +436,11 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ /* Functions for initializing structs to default values. */ #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) #undef __u32_field_def #define __u32_field_def(attr_nr, attr_flag, name, default) \ x->name = default; diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 0fca21fd1af5..ba911da84d9f 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -87,28 +87,28 @@ enum { /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ - nla_get_u8, NLA_PUT_U8) + nla_get_u8, NLA_PUT_U8, false) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ - nla_get_u8, NLA_PUT_U8) + nla_get_u8, NLA_PUT_U8, false) #define __u16_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ - nla_get_u16, NLA_PUT_U16) + nla_get_u16, NLA_PUT_U16, false) #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ - nla_get_u32, NLA_PUT_U32) + nla_get_u32, NLA_PUT_U32, false) #define __s32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ - nla_get_u32, NLA_PUT_U32) + nla_get_u32, NLA_PUT_U32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, NLA_PUT_U64) + nla_get_u64, NLA_PUT_U64, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, NLA_PUT) + nla_strlcpy, NLA_PUT, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ - nla_memcpy, NLA_PUT) + nla_memcpy, NLA_PUT, false) /* fields with default values */ #define __flg_field_def(attr_nr, attr_flag, name, default) \ @@ -174,11 +174,13 @@ enum { \ }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, \ + __get, __put, __is_signed) \ T_ ## name = (__u16)(attr_nr | attr_flag), #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, \ + maxlen, __get, __put, __is_signed) \ T_ ## name = (__u16)(attr_nr | attr_flag), #include GENL_MAGIC_INCLUDE_FILE @@ -238,11 +240,13 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ } #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ case attr_nr: #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ case attr_nr: #include GENL_MAGIC_INCLUDE_FILE @@ -260,16 +264,36 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ struct s_name { s_fields }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ type name; #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ type name[maxlen]; \ __u32 name ## _len; #include GENL_MAGIC_INCLUDE_FILE +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#include GENL_MAGIC_INCLUDE_FILE + /* }}}1 */ #endif /* GENL_MAGIC_STRUCT_H */ /* vim: set foldmethod=marker nofoldenable : */ -- cgit v1.2.3 From bbbef2d5ad8203f67274196dac90754ae106a463 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 18 May 2011 16:48:16 +0200 Subject: drbd: Remove unused GENLA_F_MAY_IGNORE flag Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_struct.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index ba911da84d9f..f3c3425ac30f 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -40,11 +40,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * yet implemented features, if newer userland tries to use them even though * the genl_family version clearly indicates they are not available. * - * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. - * To be used for API extensions for things that have sane defaults, - * so newer userland can still talk to older kernel, knowing it will - * silently ignore these attributes if not yet known. - * * NOTE: These flags overload * NLA_F_NESTED (1 << 15) * NLA_F_NET_BYTEORDER (1 << 14) @@ -55,7 +50,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * See also: nla_type() */ enum { - GENLA_F_MAY_IGNORE = 0, GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, -- cgit v1.2.3 From 5f9359201b5cf1d94fe0e0c47fcba38cfc921863 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 19 May 2011 17:39:28 +0200 Subject: drbd: Make drbd's use of netlink attribute flags less confusing Make it more clear in the flag names which flags are internal to drbd, and which are not. The check for mandatory attributes is the only extension visible at the netlink layer. Attributes with this flag set would look like unknown attributes to some kernel versions. The netlink layer would ignore them and also skip consistency checks on the attribute type and legth. To avoid this, we check for mandatory attributes first, remove the mandatory flag, and then process the attributes normally. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 244 +++++++++++++++++++------------------- include/linux/genl_magic_func.h | 107 ++++++++--------- include/linux/genl_magic_struct.h | 64 ++++------ 3 files changed, 193 insertions(+), 222 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 7b174a093a8d..4ceecb9307d9 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -86,7 +86,7 @@ */ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, /* "arbitrary" size strings, nla_policy.len = 0 */ - __str_field(1, GENLA_F_MANDATORY, info_text, 0) + __str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0) ) /* Configuration requests typically need a context to operate on. @@ -95,133 +95,133 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - __u32_field(1, GENLA_F_MANDATORY, ctx_volume) - __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) + __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_conn_name, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) - __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) - __s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + __str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128) + __str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128) + __s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ - __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + __u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size) /* we could change the max_bio_bvecs, * but it won't propagate through the stack */ - __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) - - __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) - __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __s32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) - __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) - __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) - __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) - __u32_field_def(13, GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) - __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) - __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs) + + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) + + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) + __s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + + __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field_def(1, GENLA_F_MANDATORY, cpu_mask, 32) - __u32_field_def(2, GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) + __str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) - __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) - __str_field_def(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, my_addr, 128) + __bin_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_addr, 128) + __str_field_def(3, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(13, GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(14, GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(15, GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(16, GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(17, GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(18, GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(19, GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(20, GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(21, GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(22, GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, discard_my_data) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(6, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(7, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(24, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(25, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(28, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field_def(31, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, - __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) + __flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate) ) GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, - __u64_field(1, GENLA_F_MANDATORY, resize_size) - __flg_field(2, GENLA_F_MANDATORY, resize_force) - __flg_field(3, GENLA_F_MANDATORY, no_resync) + __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) + __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) + __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) ) GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* the reason of the broadcast, * if this is an event triggered broadcast. */ - __u32_field(1, GENLA_F_MANDATORY, sib_reason) - __u32_field(2, GENLA_F_REQUIRED, current_state) - __u64_field(3, GENLA_F_MANDATORY, capacity) - __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + __u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason) + __u32_field(2, DRBD_F_REQUIRED, current_state) + __u64_field(3, DRBD_GENLA_F_MANDATORY, capacity) + __u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid) /* These are for broadcast from after state change work. * prev_state and new_state are from the moment the state change took * place, new_state is not neccessarily the same as current_state, * there may have been more state changes since. Which will be * broadcasted soon, in their respective after state change work. */ - __u32_field(5, GENLA_F_MANDATORY, prev_state) - __u32_field(6, GENLA_F_MANDATORY, new_state) + __u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state) + __u32_field(6, DRBD_GENLA_F_MANDATORY, new_state) /* if we have a local disk: */ - __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) - __u32_field(8, GENLA_F_MANDATORY, disk_flags) - __u64_field(9, GENLA_F_MANDATORY, bits_total) - __u64_field(10, GENLA_F_MANDATORY, bits_oos) + __bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags) + __u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total) + __u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos) /* and in case resync or online verify is active */ - __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) - __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + __u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed) /* for pre and post notifications of helper execution */ - __str_field(13, GENLA_F_MANDATORY, helper, 32) - __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) + __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) + __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, - __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) + __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, - __flg_field(1, GENLA_F_MANDATORY, clear_bm) + __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) ) GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, - __u32_field(1, GENLA_F_REQUIRED, timeout_type) + __u32_field(1, DRBD_F_REQUIRED, timeout_type) ) GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, - __flg_field(1, GENLA_F_MANDATORY, force_disconnect) + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) /* @@ -232,11 +232,11 @@ GENL_mc_group(events) /* kernel -> userspace announcement of changes */ GENL_notification( DRBD_EVENT, 1, events, - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY) ) /* query kernel for specific or all info */ @@ -250,116 +250,116 @@ GENL_op( ), /* To select the object .doit. * Or a subset of objects in .dumpit. */ - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) ) #if 0 /* TO BE DONE */ /* create or destroy resources, aka replication groups */ GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) #endif /* add DRBD minor devices as volumes to resources */ GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) /* add or delete replication links to resources */ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, GENL_doit(drbd_adm_resource_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_CONNECT, 10, GENL_doit(drbd_adm_connect), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_CHG_NET_OPTS, 29, GENL_doit(drbd_adm_net_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) ) GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED) ) GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, GENL_doit(drbd_adm_disk_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_SECONDARY, 15, GENL_doit(drbd_adm_set_role), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_NEW_C_UUID, 16, GENL_doit(drbd_adm_new_c_uuid), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_START_OV, 17, GENL_doit(drbd_adm_start_ov), - GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 2ae16126c6a4..58edd403a3ff 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -3,53 +3,6 @@ #include -/* - * Extension of genl attribute validation policies {{{1 - * {{{2 - */ - -/** - * nla_is_required - return true if this attribute is required - * @nla: netlink attribute - */ -static inline int nla_is_required(const struct nlattr *nla) -{ - return nla->nla_type & GENLA_F_REQUIRED; -} - -/** - * nla_is_mandatory - return true if understanding this attribute is mandatory - * @nla: netlink attribute - * Note: REQUIRED attributes are implicitly MANDATORY as well - */ -static inline int nla_is_mandatory(const struct nlattr *nla) -{ - return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); -} - -/* Functionality to be integrated into nla_parse(), and validate_nla(), - * respectively. - * - * Enforcing the "mandatory" bit is done here, - * by rejecting unknown mandatory attributes. - * - * Part of enforcing the "required" flag would mean to embed it into - * nla_policy.type, and extending validate_nla(), which currently does - * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, - * so we cannot do that. Thats why enforcing "required" is done in the - * generated assignment functions below. */ -static int nla_check_unknown(int maxtype, struct nlattr *head, int len) -{ - struct nlattr *nla; - int rem; - nla_for_each_attr(nla, head, len, rem) { - __u16 type = nla_type(nla); - if (type > maxtype && nla_is_mandatory(nla)) - return -EOPNOTSUPP; - } - return 0; -} - /* * Magic: declare tla policy {{{1 * Magic: declare nested policies @@ -80,13 +33,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ __put, __is_signed) \ - [__nla_type(attr_nr)] = { .type = nla_type }, + [attr_nr] = { .type = nla_type }, #undef __array #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ __get, __put, __is_signed) \ - [__nla_type(attr_nr)] = { .type = nla_type, \ - .len = maxlen - (nla_type == NLA_NUL_STRING) }, + [attr_nr] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, #include GENL_MAGIC_INCLUDE_FILE @@ -189,6 +142,43 @@ static struct nlattr *nested_attr_tb[128]; #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) #endif +static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + } + } + return 0; +} + +static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ @@ -204,12 +194,9 @@ static int __ ## s_name ## _from_attrs(struct s_name *s, \ if (!tla) \ return -ENOMSG; \ DPRINT_TLA(#s_name, "<=-", #tag_name); \ - err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ if (err) \ return err; \ - err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ - if (err) \ - return err; \ \ s_fields \ return 0; \ @@ -226,17 +213,17 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ } __attribute__((unused)) \ #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ - nla = ntb[__nla_type(attr_nr)]; \ + nla = ntb[attr_nr]; \ if (nla) { \ - if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ pr_info("<< must not change invariant attr: %s\n", #name); \ return -EEXIST; \ } \ assignment; \ - } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + } else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ /* attribute missing from payload, */ \ /* which was expected */ \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + } else if ((attr_flag) & DRBD_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } @@ -415,7 +402,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ __is_signed) \ - if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ __put(skb, attr_nr, s->name); \ } @@ -423,7 +410,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ __get, __put, __is_signed) \ - if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ __put(skb, attr_nr, min_t(int, maxlen, \ s->name ## _len + (nla_type == NLA_NUL_STRING)),\ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f3c3425ac30f..1d0bd79e27b3 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -26,50 +26,34 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * Extension of genl attribute validation policies {{{2 */ -/** - * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement - * - * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. - * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. +/* + * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not + * know about. This flag can be set in nlattr->nla_type to indicate that this + * attribute must not be ignored. * - * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. - * Without this, unknown attributes (> maxtype) are _silently_ ignored - * by validate_nla(). + * We check and remove this flag in drbd_nla_check_mandatory() before + * validating the attribute types and lengths via nla_parse_nested(). + */ +#define DRBD_GENLA_F_MANDATORY (1 << 14) + +/* + * Flags specific to drbd and not visible at the netlink layer, used in + * _from_attrs and _to_skb: * - * To be used for API extensions, so older kernel can reject requests for not - * yet implemented features, if newer userland tries to use them even though - * the genl_family version clearly indicates they are not available. + * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is + * invalid. * - * NOTE: These flags overload - * NLA_F_NESTED (1 << 15) - * NLA_F_NET_BYTEORDER (1 << 14) - * from linux/netlink.h, which are not useful for validate_nla(): - * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting - * .type = NLA_NESTED in the appropriate policy. + * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be + * included in unpriviledged get requests or broadcasts. * - * See also: nla_type() + * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but + * cannot subsequently be changed. */ -enum { - GENLA_F_MANDATORY = 1 << 14, - GENLA_F_REQUIRED = 1 << 15, - - /* Below will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb resp. _from_attrs */ - - /* To exclude "sensitive" information from broadcasts, or on - * unpriviledged get requests. This is useful because genetlink - * multicast groups can be listened in on by anyone. */ - GENLA_F_SENSITIVE = 1 << 16, - - /* INVARIAN options cannot be changed at runtime. - * Useful to share an attribute policy and struct definition, - * between some "create" and "change" commands, - * but disallow certain fields to be changed online. - */ - GENLA_F_INVARIANT = 1 << 17, -}; +#define DRBD_F_REQUIRED (1 << 0) +#define DRBD_F_SENSITIVE (1 << 1) +#define DRBD_F_INVARIANT (1 << 2) -#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) +#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY)) /* }}}1 * MAGIC @@ -170,12 +154,12 @@ enum { \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, \ __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | attr_flag), + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, \ maxlen, __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | attr_flag), + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), #include GENL_MAGIC_INCLUDE_FILE -- cgit v1.2.3 From 5084d71d89e1a94193378efb12ac659e4e6ada3f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 May 2011 14:08:58 +0200 Subject: drbd: drbd_nla_check_mandatory(): Need to remove the DRBD_GENLA_F_MANDATORY flag first We need to remove the flag before checking for valid types. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_func.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 58edd403a3ff..357f2ad403b1 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -158,9 +158,9 @@ static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) nla_for_each_attr(nla, head, len, rem) { if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; if (nla_type(nla) > maxtype) return -EOPNOTSUPP; - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; } } return 0; -- cgit v1.2.3 From 67b58bf723b083d4776cd7c9959246ef46c0d36f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 6 Jun 2011 15:36:04 +0200 Subject: drbd: spelling fix: too small It is not "to small", but "too small". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++---- include/linux/drbd.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 59923db780b9..31d27dd92924 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1360,7 +1360,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) new_disk_conf->disk_size); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1374,7 +1374,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { - retcode = ERR_MD_DISK_TO_SMALL; + retcode = ERR_MD_DISK_TOO_SMALL; dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); @@ -1385,7 +1385,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * (we may currently be R_PRIMARY with no local disk...) */ if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1447,7 +1447,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto force_diskless_dec; } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 679e81123229..fedda00374af 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -112,8 +112,8 @@ enum drbd_ret_code { ERR_OPEN_MD_DISK = 105, ERR_DISK_NOT_BDEV = 107, ERR_MD_NOT_BDEV = 108, - ERR_DISK_TO_SMALL = 111, - ERR_MD_DISK_TO_SMALL = 112, + ERR_DISK_TOO_SMALL = 111, + ERR_MD_DISK_TOO_SMALL = 112, ERR_BDCLAIM_DISK = 114, ERR_BDCLAIM_MD_DISK = 115, ERR_MD_IDX_INVALID = 116, -- cgit v1.2.3 From 789c1b626cb490acb36cf481b45040b324f60fde Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 6 Jun 2011 16:16:44 +0200 Subject: drbd: Use the terminology suggested by the command names in the source code and messages Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 17 ++++------------- 3 files changed, 14 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 31d27dd92924..5b4090f52f5a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -47,8 +47,8 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info); int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); @@ -2972,7 +2972,7 @@ drbd_check_conn_name(const char *name) return NO_ERROR; } -int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2989,7 +2989,7 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) if (adm_ctx.tconn) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { retcode = ERR_INVALID_REQUEST; - drbd_msg_put_info("connection exists"); + drbd_msg_put_info("resource exists"); } /* else: still NO_ERROR */ goto out; @@ -3086,7 +3086,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out; if (!adm_ctx.tconn) { - retcode = ERR_CONN_NOT_KNOWN; + retcode = ERR_RES_NOT_KNOWN; goto out; } @@ -3140,7 +3140,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) retcode = NO_ERROR; } else { /* "can not happen" */ - retcode = ERR_CONN_IN_USE; + retcode = ERR_RES_IN_USE; drbd_msg_put_info("failed to delete connection"); } goto out; @@ -3149,7 +3149,7 @@ out: return 0; } -int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -3166,7 +3166,7 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) retcode = NO_ERROR; } else { - retcode = ERR_CONN_IN_USE; + retcode = ERR_RES_IN_USE; } if (retcode == NO_ERROR) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index fedda00374af..161cd414b036 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,8 +155,8 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, - ERR_CONN_NOT_KNOWN = 158, - ERR_CONN_IN_USE = 159, + ERR_RES_NOT_KNOWN = 158, + ERR_RES_IN_USE = 159, ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 4ceecb9307d9..47ef324b69db 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -253,25 +253,16 @@ GENL_op( GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) ) -#if 0 - /* TO BE DONE */ - /* create or destroy resources, aka replication groups */ -GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -#endif - /* add DRBD minor devices as volumes to resources */ -GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), +GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) - /* add or delete replication links to resources */ -GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + /* add or delete resources */ +GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), +GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, -- cgit v1.2.3 From 7c3063cc6f0e75cdf312f5f318f9a4c02e460397 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Jun 2011 17:52:12 +0200 Subject: drbd: Also need to check for DRBD_GENLA_F_MANDATORY flags before nla_find_nested() This is done by introducing drbd_nla_find_nested() which handles the flag before calling nla_find_nested(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++ drivers/block/drbd/drbd_nl.c | 96 ++++++++++++++++++++++++++++++++--------- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 37 ---------------- 4 files changed, 83 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c3019730a24f..c58430183d5f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1407,6 +1407,12 @@ extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); +struct nla_policy; +extern int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla); +extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy); +extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype); + /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b4090f52f5a..24187f1c93d5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -92,7 +92,7 @@ static struct drbd_config_context { #define VOLUME_UNSPECIFIED (-1U) /* pointer into the request skb, * limited lifetime! */ - char *conn_name; + char *resource_name; /* reply buffer */ struct sk_buff *reply_skb; @@ -191,15 +191,15 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* and assign stuff to the global adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; - nla = nested_attr_tb[__nla_type(T_ctx_conn_name)]; + nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) - adm_ctx.conn_name = nla_data(nla); + adm_ctx.resource_name = nla_data(nla); } else adm_ctx.volume = VOLUME_UNSPECIFIED; adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); - adm_ctx.tconn = conn_get_by_name(adm_ctx.conn_name); + adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name); if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { drbd_msg_put_info("unknown minor"); @@ -214,7 +214,8 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", - adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + adm_ctx.minor, adm_ctx.resource_name, + adm_ctx.mdev->tconn->name); drbd_msg_put_info("minor exists in different connection"); return ERR_INVALID_REQUEST; } @@ -239,7 +240,7 @@ fail: static int drbd_adm_finish(struct genl_info *info, int retcode) { struct nlattr *nla; - const char *conn_name = NULL; + const char *resource_name = NULL; if (adm_ctx.tconn) { kref_put(&adm_ctx.tconn->kref, &conn_destroy); @@ -253,9 +254,10 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; if (nla) { - nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); - if (nla) - conn_name = nla_data(nla); + int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; + nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); + if (nla && !IS_ERR(nla)) + resource_name = nla_data(nla); } drbd_adm_send_reply(adm_ctx.reply_skb, info); @@ -2526,7 +2528,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr) +int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); @@ -2534,7 +2536,7 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigne goto nla_put_failure; if (vnr != VOLUME_UNSPECIFIED) NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name); + NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name); nla_nest_end(skb, nla); return 0; @@ -2778,8 +2780,9 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; struct nlattr *nla; - const char *conn_name; + const char *resource_name; struct drbd_tconn *tconn; + int maxtype; /* Is this a followup call? */ if (cb->args[0]) { @@ -2799,12 +2802,15 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) /* No explicit context given. Dump all. */ if (!nla) goto dump; - nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; + nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); + if (IS_ERR(nla)) + return PTR_ERR(nla); /* context given, but no name present? */ if (!nla) return -EINVAL; - conn_name = nla_data(nla); - tconn = conn_get_by_name(conn_name); + resource_name = nla_data(nla); + tconn = conn_get_by_name(resource_name); if (!tconn) return -ENODEV; @@ -2957,16 +2963,16 @@ out_nolock: } static enum drbd_ret_code -drbd_check_conn_name(const char *name) +drbd_check_resource_name(const char *name) { if (!name || !name[0]) { - drbd_msg_put_info("connection name missing"); + drbd_msg_put_info("resource name missing"); return ERR_MANDATORY_TAG; } /* if we want to use these in sysfs/configfs/debugfs some day, * we must not allow slashes */ if (strchr(name, '/')) { - drbd_msg_put_info("invalid connection name"); + drbd_msg_put_info("invalid resource name"); return ERR_INVALID_REQUEST; } return NO_ERROR; @@ -2982,7 +2988,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = drbd_check_conn_name(adm_ctx.conn_name); + retcode = drbd_check_resource_name(adm_ctx.resource_name); if (retcode != NO_ERROR) goto out; @@ -2995,7 +3001,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!conn_create(adm_ctx.conn_name)) + if (!conn_create(adm_ctx.resource_name)) retcode = ERR_NOMEM; out: drbd_adm_finish(info, retcode); @@ -3213,3 +3219,53 @@ failed: "Event seq:%u sib_reason:%u\n", err, seq, sib->sib_reason); } + +int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + } + } + return 0; +} + +int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + +struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype) +{ + int err; + /* + * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and + * we don't know about that attribute, reject all the nested + * attributes. + */ + err = drbd_nla_check_mandatory(maxtype, nla); + if (err) + return ERR_PTR(err); + return nla_find_nested(nla, attrtype); +} diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 47ef324b69db..0c2102c05384 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -96,7 +96,7 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) - __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_conn_name, 128) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 357f2ad403b1..0b8a88e2e83e 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -142,43 +142,6 @@ static struct nlattr *nested_attr_tb[128]; #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) #endif -static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) -{ - struct nlattr *head = nla_data(nla); - int len = nla_len(nla); - int rem; - - /* - * validate_nla (called from nla_parse_nested) ignores attributes - * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. - * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY - * flag set also, check and remove that flag before calling - * nla_parse_nested. - */ - - nla_for_each_attr(nla, head, len, rem) { - if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; - if (nla_type(nla) > maxtype) - return -EOPNOTSUPP; - } - } - return 0; -} - -static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - const struct nla_policy *policy) -{ - int err; - - err = drbd_nla_check_mandatory(maxtype, nla); - if (!err) - err = nla_parse_nested(tb, maxtype, nla, policy); - - return err; -} - #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ -- cgit v1.2.3 From 089c075d88ac9407b8d7c5c8fc4b21c0d940bd82 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 14 Jun 2011 18:28:09 +0200 Subject: drbd: Convert the generic netlink interface to accept connection endpoints Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 ++ drivers/block/drbd/drbd_main.c | 21 +++++ drivers/block/drbd/drbd_nl.c | 158 ++++++++++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 21 +++-- drivers/block/drbd/drbd_state.c | 2 + include/linux/drbd_genl.h | 62 +++++++-------- 6 files changed, 164 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c3019730a24f..6d6d1056d824 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -836,6 +836,11 @@ struct drbd_tconn { /* is a resource from the config file */ wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct res_opts res_opts; + struct sockaddr_storage my_addr; + int my_addr_len; + struct sockaddr_storage peer_addr; + int peer_addr_len; + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ int agreed_pro_version; /* actually used protocol version */ @@ -1377,6 +1382,8 @@ extern void drbd_minor_destroy(struct kref *kref); struct drbd_tconn *conn_create(const char *name); extern void conn_destroy(struct kref *kref); struct drbd_tconn *conn_get_by_name(const char *name); +extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, + void *peer_addr, int peer_addr_len); extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 178c711bc4af..79f275dc43a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2420,6 +2420,27 @@ found: return tconn; } +struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, + void *peer_addr, int peer_addr_len) +{ + struct drbd_tconn *tconn; + + rcu_read_lock(); + list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { + if (tconn->my_addr_len == my_addr_len && + tconn->peer_addr_len == peer_addr_len && + !memcmp(&tconn->my_addr, my_addr, my_addr_len) && + !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) { + kref_get(&tconn->kref); + goto found; + } + } + tconn = NULL; +found: + rcu_read_unlock(); + return tconn; +} + static int drbd_alloc_socket(struct drbd_socket *socket) { socket->rbuf = (void *) __get_free_page(GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 352be132b4be..e7933e04e7b8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -94,6 +94,8 @@ static struct drbd_config_context { /* pointer into the request skb, * limited lifetime! */ char *resource_name; + struct nlattr *my_addr; + struct nlattr *peer_addr; /* reply buffer */ struct sk_buff *reply_skb; @@ -142,6 +144,7 @@ int drbd_msg_put_info(const char *info) */ #define DRBD_ADM_NEED_MINOR 1 #define DRBD_ADM_NEED_RESOURCE 2 +#define DRBD_ADM_NEED_CONNECTION 4 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, unsigned flags) { @@ -174,6 +177,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, adm_ctx.reply_dh->minor = d_in->minor; adm_ctx.reply_dh->ret_code = NO_ERROR; + adm_ctx.volume = VOLUME_UNSPECIFIED; if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ @@ -191,12 +195,21 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* and assign stuff to the global adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; - adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; + if (nla) + adm_ctx.volume = nla_get_u32(nla); nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) adm_ctx.resource_name = nla_data(nla); - } else - adm_ctx.volume = VOLUME_UNSPECIFIED; + adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; + adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; + if ((adm_ctx.my_addr && + nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) || + (adm_ctx.peer_addr && + nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) { + err = -EINVAL; + goto fail; + } + } adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); @@ -211,6 +224,26 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, return ERR_INVALID_REQUEST; } + if (flags & DRBD_ADM_NEED_CONNECTION) { + if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) { + drbd_msg_put_info("no resource name expected"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.mdev) { + drbd_msg_put_info("no minor number expected"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.my_addr && adm_ctx.peer_addr) + adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr), + nla_len(adm_ctx.my_addr), + nla_data(adm_ctx.peer_addr), + nla_len(adm_ctx.peer_addr)); + if (!adm_ctx.tconn) { + drbd_msg_put_info("unknown connection"); + return ERR_INVALID_REQUEST; + } + } + /* some more paranoia, if the request was over-determined */ if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { @@ -268,30 +301,28 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) static void setup_khelper_env(struct drbd_tconn *tconn, char **envp) { char *afs; - struct net_conf *nc; - rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); - if (nc) { - switch (((struct sockaddr *)nc->peer_addr)->sa_family) { - case AF_INET6: - afs = "ipv6"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)nc->peer_addr)->sin6_addr); - break; - case AF_INET: - afs = "ipv4"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)nc->peer_addr)->sin_addr); - break; - default: - afs = "ssocks"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)nc->peer_addr)->sin_addr); - } - snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); + /* FIXME: A future version will not allow this case. */ + if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0) + return; + + switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) { + case AF_INET6: + afs = "ipv6"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", + &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr); + break; + case AF_INET: + afs = "ipv4"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); + break; + default: + afs = "ssocks"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); } - rcu_read_unlock(); + snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); } int drbd_khelper(struct drbd_conf *mdev, char *cmd) @@ -1874,7 +1905,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int rsr; /* re-sync running */ struct crypto crypto = { }; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -1986,18 +2017,39 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) struct drbd_conf *mdev; struct net_conf *old_conf, *new_conf = NULL; struct crypto crypto = { }; - struct drbd_tconn *oconn; struct drbd_tconn *tconn; - struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; enum drbd_ret_code retcode; int i; int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto out; + if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) { + drbd_msg_put_info("connection endpoint(s) missing"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + /* No need for _rcu here. All reconfiguration is + * strictly serialized on genl_lock(). We are protected against + * concurrent reconfiguration/addition/deletion */ + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len && + !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) { + retcode = ERR_LOCAL_ADDR; + goto out; + } + + if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len && + !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) { + retcode = ERR_PEER_ADDR; + goto out; + } + } tconn = adm_ctx.tconn; conn_reconfig_start(tconn); @@ -2027,37 +2079,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; - retcode = NO_ERROR; - - new_my_addr = (struct sockaddr *)&new_conf->my_addr; - new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - - /* No need for _rcu here. All reconfiguration is - * strictly serialized on genl_lock(). We are protected against - * concurrent reconfiguration/addition/deletion */ - list_for_each_entry(oconn, &drbd_tconns, all_tconn) { - struct net_conf *nc; - if (oconn == tconn) - continue; - - rcu_read_lock(); - nc = rcu_dereference(oconn->net_conf); - if (nc) { - taken_addr = (struct sockaddr *)&nc->my_addr; - if (new_conf->my_addr_len == nc->my_addr_len && - !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) - retcode = ERR_LOCAL_ADDR; - - taken_addr = (struct sockaddr *)&nc->peer_addr; - if (new_conf->peer_addr_len == nc->peer_addr_len && - !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) - retcode = ERR_PEER_ADDR; - } - rcu_read_unlock(); - if (retcode != NO_ERROR) - goto fail; - } - retcode = alloc_crypto(&crypto, new_conf); if (retcode != NO_ERROR) goto fail; @@ -2083,6 +2104,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; + tconn->my_addr_len = nla_len(adm_ctx.my_addr); + memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len); + tconn->peer_addr_len = nla_len(adm_ctx.peer_addr); + memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len); + mutex_unlock(&tconn->conf_update); rcu_read_lock(); @@ -2170,7 +2196,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -2529,7 +2555,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr) +int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); @@ -2537,7 +2563,11 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, uns goto nla_put_failure; if (vnr != VOLUME_UNSPECIFIED) NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name); + NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); + if (tconn->my_addr_len) + NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); + if (tconn->peer_addr_len) + NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); nla_nest_end(skb, nla); return 0; @@ -2574,7 +2604,7 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, /* We need to add connection name and volume number information still. * Minor number is in drbd_genlmsghdr. */ - if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) + if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr)) goto nla_put_failure; if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) @@ -2736,7 +2766,7 @@ next_tconn: /* this is a tconn without a single volume */ dh->minor = -1U; dh->ret_code = NO_ERROR; - if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED)) + if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED)) genlmsg_cancel(skb, dh); else genlmsg_end(skb, dh); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4ba097293278..ab1d36cb6214 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -626,23 +626,21 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) rcu_read_unlock(); return NULL; } - sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; connect_int = nc->connect_int; + rcu_read_unlock(); - my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); - memcpy(&src_in6, nc->my_addr, my_addr_len); + my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6)); + memcpy(&src_in6, &tconn->my_addr, my_addr_len); - if (((struct sockaddr *)nc->my_addr)->sa_family == AF_INET6) + if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ - peer_addr_len = min_t(int, nc->peer_addr_len, sizeof(src_in6)); - memcpy(&peer_in6, nc->peer_addr, peer_addr_len); - - rcu_read_unlock(); + peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6)); + memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len); what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, @@ -714,15 +712,14 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) rcu_read_unlock(); return NULL; } - sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; connect_int = nc->connect_int; - - my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); - memcpy(&my_addr, nc->my_addr, my_addr_len); rcu_read_unlock(); + my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6)); + memcpy(&my_addr, &tconn->my_addr, my_addr_len); + what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index cd55f46d5c55..d978e4d98a15 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1418,6 +1418,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) mutex_lock(&tconn->conf_update); old_conf = tconn->net_conf; + tconn->my_addr_len = 0; + tconn->peer_addr_len = 0; rcu_assign_pointer(tconn->net_conf, NULL); conn_free_crypto(tconn); mutex_unlock(&tconn->conf_update); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 0c2102c05384..b93db6c83882 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -97,6 +97,8 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) + __bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128) + __bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, @@ -134,38 +136,36 @@ GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __bin_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, my_addr, 128) - __bin_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_addr, 128) - __str_field_def(3, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, + __str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(4, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(5, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(6, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(7, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(8, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(10, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(11, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(12, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(13, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(14, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(15, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(16, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(17, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(18, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(20, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(21, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(22, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(23, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(24, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(25, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) - __flg_field_def(28, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(29, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) - __flg_field_def(31, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + __str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, -- cgit v1.2.3 From 6dff2902208364d058746ee794da4d960f6eec6f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 28 Jun 2011 14:18:12 +0200 Subject: drbd: Rename --dry-run to --tentative drbdadm already has a --dry-run option, so this option cannot directly be passed through to drbdsetup. Rename the drbdsetup option to resolve this conflict. For backward compatibility, make --dry-run an alias of --tentative. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 6 +++--- include/linux/drbd_genl.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 933d4767c110..72b1dfa4b656 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -926,7 +926,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - if (nc->dry_run && tconn->agreed_pro_version < 92) { + if (nc->tentative && tconn->agreed_pro_version < 92) { rcu_read_unlock(); mutex_unlock(&sock->mutex); conn_err(tconn, "--dry-run is not supported by peer"); @@ -945,7 +945,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) cf = 0; if (nc->discard_my_data) cf |= CF_DISCARD_MY_DATA; - if (nc->dry_run) + if (nc->tentative) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ab1d36cb6214..d55a3cb21c31 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2836,7 +2836,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; struct net_conf *nc; - int hg, rule_nr, rr_conflict, dry_run; + int hg, rule_nr, rr_conflict, tentative; mydisk = mdev->state.disk; if (mydisk == D_NEGOTIATING) @@ -2916,7 +2916,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol (hg < 0) ? "peer" : "this"); } rr_conflict = nc->rr_conflict; - dry_run = nc->dry_run; + tentative = nc->tentative; rcu_read_unlock(); if (hg == -100) { @@ -2949,7 +2949,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { + if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index b93db6c83882..e879a9324380 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -164,7 +164,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) -- cgit v1.2.3 From f03c254961cce65ee2b21c4beccb6975b6f9d308 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 20 Jun 2011 22:21:19 +0200 Subject: drbd: allow ping-timeout of up to 30 seconds Allow up to 300 centi-seconds to be configured for the "ping timeout". There may be setups where heavy congestion, huge buffers, and asymmetric bandwidth limitations may need a "huge" ping-timeout as work-around for "spurious connection loss" problems. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3627f760966e..82db83410f08 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -62,7 +62,7 @@ /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 -#define DRBD_PING_TIMEO_MAX 100 +#define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 /* max number of write requests between write barriers */ -- cgit v1.2.3 From d942ae44537669418a7cbfd916531d30513dbca8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 31 May 2011 13:07:24 +0200 Subject: drbd: Fixes from the 8.3 development branch * commit 'ae57a0a': drbd: Only print sanitize state's warnings, if the state change happens drbd: we should write meta data updates with FLUSH FUA drbd: fix limit define, we support 1 PiByte now drbd: fix log message argument order drbd: Typo in user-visible message. drbd: Make "(rcv|snd)buf-size" and "ping-timeout" available for the proxy, too. drbd: Allow keywords to be used in multiple config sections. drbd: fix typos in comments. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++---- drivers/block/drbd/drbd_state.c | 56 ++++++++++++++++++++++++++++++----------- include/linux/drbd_limits.h | 2 +- 3 files changed, 47 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index de42c7cf7caf..1d71b3a3586a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -372,11 +372,11 @@ struct p_connection_features { u32 protocol_max; /* should be more than enough for future enhancements - * for now, feature_flags and the reserverd array shall be zero. + * for now, feature_flags and the reserved array shall be zero. */ u32 _pad; - u64 reserverd[7]; + u64 reserved[7]; } __packed; struct p_barrier { @@ -914,7 +914,7 @@ struct drbd_conf { atomic_t ap_bio_cnt; /* Requests we need to complete */ atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ - atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t unacked_cnt; /* Need to send replies for */ atomic_t local_cnt; /* Waiting for local completion */ /* Interval tree of pending local requests */ @@ -2153,7 +2153,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* disk state is stable as well. */ break; - /* no new io accepted during tansitional states */ + /* no new io accepted during transitional states */ case D_ATTACHING: case D_FAILED: case D_NEGOTIATING: @@ -2217,7 +2217,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev) /* we wait here * as long as the device is suspended * until the bitmap is no longer on the fly during connection - * handshake as long as we would exeed the max_buffer limit. + * handshake as long as we would exceed the max_buffer limit. * * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c9d0348736d..2cf69b25f1e7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,6 +37,15 @@ struct after_state_chg_work { struct completion *done; }; +enum sanitize_state_warnings { + NO_WARNING, + ABORTED_ONLINE_VERIFY, + ABORTED_RESYNC, + CONNECTION_LOST_NEGOTIATING, + IMPLICITLY_UPGRADED_DISK, + IMPLICITLY_UPGRADED_PDSK, +}; + static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); @@ -44,7 +53,7 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort); + enum sanitize_state_warnings *warn); static inline bool is_susp(union drbd_state s) { @@ -656,6 +665,21 @@ is_valid_transition(union drbd_state os, union drbd_state ns) return rv; } +static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +{ + static const char *msg_table[] = { + [NO_WARNING] = "", + [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", + [ABORTED_RESYNC] = "Resync aborted.", + [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", + [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", + [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", + }; + + if (warn != NO_WARNING) + dev_warn(DEV, "%s\n", msg_table[warn]); +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -667,11 +691,14 @@ is_valid_transition(union drbd_state os, union drbd_state ns) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort) + enum sanitize_state_warnings *warn) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + if (warn) + *warn = NO_WARNING; + fp = FP_DONT_CARE; if (get_ldev(mdev)) { rcu_read_lock(); @@ -695,10 +722,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* An implication of the disk states onto the connection state */ /* Abort resync if a disk fails/detaches */ if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; + if (warn) + *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? + ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; ns.conn = C_CONNECTED; } @@ -709,7 +735,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + if (warn) + *warn = CONNECTION_LOST_NEGOTIATING; ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } @@ -791,16 +818,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = disk_max; if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_DISK; ns.disk = disk_min; } if (ns.pdsk > pdsk_max) ns.pdsk = pdsk_max; if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_PDSK; ns.pdsk = pdsk_min; } @@ -875,12 +902,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, { union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; + enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; os = drbd_read_state(mdev); - ns = sanitize_state(mdev, ns, &warn_sync_abort); + ns = sanitize_state(mdev, ns, &ssw); if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -909,8 +936,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + print_sanitize_warnings(mdev, ssw); drbd_pr_state_change(mdev, os, ns, flags); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 82db83410f08..f1046b13d9f6 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,7 +126,7 @@ * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ #define DRBD_DISK_SIZE_MIN 0 -#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ #define DRBD_DISK_SIZE_SCALE 's' /* sectors */ -- cgit v1.2.3 From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Jul 2011 15:38:59 +0200 Subject: drbd: detach from frozen backing device * drbd-8.3: documentation: Documented detach's --force and disk's --disk-timeout drbd: Implemented the disk-timeout option drbd: Force flag for the detach operation drbd: Allow new IOs while the local disk in in FAILED state drbd: Bitmap IO functions can not return prematurely if the disk breaks drbd: Added a kref to bm_aio_ctx drbd: Hold a reference to ldev while doing meta-data IO drbd: Keep a reference to the bio until the completion handler finished drbd: Implemented wait_until_done_or_disk_failure() drbd: Replaced md_io_mutex by an atomic: md_io_in_use drbd: moved md_io into mdev drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk drbd: Keep a reference to barrier acked requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 75 ++++++++++++++++++------ drivers/block/drbd/drbd_bitmap.c | 115 +++++++++++++++++++++++++++---------- drivers/block/drbd/drbd_int.h | 12 ++-- drivers/block/drbd/drbd_main.c | 77 ++++++++++++++++++++++--- drivers/block/drbd/drbd_nl.c | 28 ++++++++- drivers/block/drbd/drbd_receiver.c | 2 - drivers/block/drbd/drbd_req.c | 52 +++++++++++------ drivers/block/drbd/drbd_req.h | 19 +++--- drivers/block/drbd/drbd_state.c | 7 +++ drivers/block/drbd/drbd_worker.c | 9 ++- include/linux/drbd_genl.h | 9 ++- include/linux/drbd_limits.h | 6 ++ 12 files changed, 321 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aeb483daea06..58b5b61628fc 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -114,18 +114,44 @@ struct drbd_atodb_wait { static int w_al_write_transaction(struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ + int r; + + wait_event(mdev->misc_wait, + (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || + mdev->state.disk <= D_FAILED); + + return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->md_io_in_use)) + wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ + enum drbd_disk_state ds = mdev->state.disk; + return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +{ + wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); +} + static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) { struct bio *bio; - struct drbd_md_io md_io; int err; - md_io.mdev = mdev; - init_completion(&md_io.event); - md_io.error = 0; + mdev->md_io.done = 0; + mdev->md_io.error = -ENODEV; if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) rw |= REQ_FUA | REQ_FLUSH; @@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, err = -EIO; if (bio_add_page(bio, page, size, 0) != size) goto out; - bio->bi_private = &md_io; + bio->bi_private = &mdev->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + err = -ENODEV; + goto out; + } + + bio_get(bio); /* one bio_put() is in the completion handler */ + atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_for_completion(&md_io.event); + wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) - err = md_io.error; + err = mdev->md_io.error; out: bio_put(bio); @@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int err; struct page *iop = mdev->md_io_page; - D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } - mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + if (!buffer) { + dev_err(DEV, "disk failed while waiting for md_io buffer\n"); + aw->err = -EIO; + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } memset(buffer, 0, sizeof(*buffer)); buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); @@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) mdev->al_tr_number++; } - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); @@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* lock out all other meta data io for now, * and make sure the page is mapped. */ - mutex_lock(&mdev->md_io_mutex); - b = page_address(mdev->md_io_page); + b = drbd_md_get_buffer(mdev); + if (!b) + return 0; /* Always use the full ringbuffer space for now. * possible optimization: read in all of it, @@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* IO error */ if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!found_valid) { if (found_initialized != mx) dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 1; } @@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!expect(rv != 0)) goto cancel; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -643,7 +684,7 @@ cancel: mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 52c48143b22a..706e5220dd4a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) struct bm_aio_ctx { struct drbd_conf *mdev; atomic_t in_flight; - struct completion done; + unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 int error; + struct kref kref; }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ + struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + + put_ldev(ctx->mdev); + kfree(ctx); +} + /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { @@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error) bio_put(bio); - if (atomic_dec_and_test(&ctx->in_flight)) - complete(&ctx->done); + if (atomic_dec_and_test(&ctx->in_flight)) { + ctx->done = 1; + wake_up(&mdev->misc_wait); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + } } static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - struct bio *bio = bio_alloc_drbd(GFP_KERNEL); + struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; @@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must */ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = flags, - }; + struct bm_aio_ctx *ctx; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * For lazy writeout, we don't care for ongoing changes to the bitmap, * as we submit copies of pages anyways. */ - if (!ctx.flags) + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = flags, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + err = -ENODEV; + goto out; + } + + if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w continue; } } - atomic_inc(&ctx.in_flight); - bm_page_io_async(&ctx, i, rw); + atomic_inc(&ctx->in_flight); + bm_page_io_async(ctx, i, rw); ++count; cond_resched(); } /* - * We initialize ctx.in_flight to one to make sure bm_async_io_complete - * will not complete() early, and decrement / test it here. If there + * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. + * If all IO is done already (or nothing had been submitted), there is + * no need to wait. Still, we need to put the kref associated with the + * "in_flight reached zero, all done" event. */ - if (!atomic_dec_and_test(&ctx.in_flight)) - wait_for_completion(&ctx.done); + if (!atomic_dec_and_test(&ctx->in_flight)) + wait_until_done_or_disk_failure(mdev, &ctx->done); + else + kref_put(&ctx->kref, &bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); - if (ctx.error) { + if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; /* ctx.error ? */ + err = -EIO; /* ctx->error ? */ } + if (atomic_read(&ctx->in_flight)) + err = -EIO; /* Disk failed during IO... */ + now = jiffies; if (rw == WRITE) { drbd_md_flush(mdev); @@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); +out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - struct bm_aio_ctx ctx = { + struct bm_aio_ctx *ctx; + int err; + + if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { + dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); + return 0; + } + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { .mdev = mdev, .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), + .done = 0, .flags = BM_AIO_COPY_PAGES, + .error = 0, + .kref = { ATOMIC_INIT(2) }, }; - if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { - dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); - return 0; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + err = -ENODEV; + goto out; } - bm_page_io_async(&ctx, idx, WRITE_SYNC); - wait_for_completion(&ctx.done); + bm_page_io_async(ctx, idx, WRITE_SYNC); + wait_until_done_or_disk_failure(mdev, &ctx->done); - if (ctx.error) + if (ctx->error) drbd_chk_io_error(mdev, 1, true); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; - return ctx.error; + err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; + out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + return err; } /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6035784f0de3..4e582058a7c9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -780,8 +780,7 @@ struct drbd_backing_dev { }; struct drbd_md_io { - struct drbd_conf *mdev; - struct completion event; + unsigned int done; int error; }; @@ -852,6 +851,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct list_head barrier_acked_requests; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ @@ -978,7 +978,8 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct mutex md_io_mutex; /* protects the md_io_buffer */ + struct drbd_md_io md_io; + atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) @@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) case D_OUTDATED: case D_CONSISTENT: case D_UP_TO_DATE: + case D_FAILED: /* disk state is stable as well. */ break; /* no new io accepted during transitional states */ case D_ATTACHING: - case D_FAILED: case D_NEGOTIATING: case D_UNKNOWN: case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 448de7bf8223..15384986e4a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn) tconn->oldest_tle = b; tconn->newest_tle = b; INIT_LIST_HEAD(&tconn->out_of_sequence_requests); + INIT_LIST_HEAD(&tconn->barrier_acked_requests); return 1; } @@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, These have been list_move'd to the out_of_sequence_requests list in _req_mod(, BARRIER_ACKED) above. */ - list_del_init(&b->requests); + list_splice_init(&b->requests, &tconn->barrier_acked_requests); mdev = b->w.mdev; nob = b->next; @@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) b = tmp; list_splice(&carry_reads, &b->requests); } -} + /* Actions operating on the disk state, also want to work on + requests that got barrier acked. */ + switch (what) { + case FAIL_FROZEN_DISK_IO: + case RESTART_FROZEN_DISK_IO: + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); + } + case CONNECTION_LOST_WHILE_PENDING: + case RESEND: + break; + default: + conn_err(tconn, "what = %d in _tl_restart()\n", what); + } +} /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL @@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) spin_unlock_irq(&tconn->req_lock); } +/** + * tl_apply() - Applies an event to all requests for a certain mdev in the TL + * @mdev: DRBD device. + * @what: The action/event to perform with all request objects + * + * @what might ony be ABORT_DISK_IO. + */ +void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_tl_epoch *b; + struct list_head *le, *tle; + struct drbd_request *req; + + D_ASSERT(what == ABORT_DISK_IO); + + spin_lock_irq(&tconn->req_lock); + b = tconn->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + b = b->next; + } + + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + + spin_unlock_irq(&tconn->req_lock); +} + static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; @@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); atomic_set(&mdev->ap_in_flight, 0); + atomic_set(&mdev->md_io_in_use, 0); - mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->own_state_mutex); mdev->state_mutex = &mdev->own_state_mutex; @@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref) struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); struct drbd_tconn *tconn = mdev->tconn; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev) * since we updated it on metadata. */ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); +out: put_ldev(mdev); } @@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is @@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; err: - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); + out: put_ldev(mdev); return rv; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 97d1dab045d2..bf8d0b077624 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); + mod_timer(&mdev->request_timer, jiffies + HZ); goto success; fail_unlock: @@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto force_diskless_dec; + mod_timer(&mdev->request_timer, jiffies + HZ); + if (mdev->state.role == R_PRIMARY) mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else @@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev) +static int adm_detach(struct drbd_conf *mdev, int force) { enum drbd_state_rv retcode; int ret; + + if (force) { + drbd_force_state(mdev, NS(disk, D_FAILED)); + retcode = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); /* D_FAILED will transition to DISKLESS. */ @@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; +out: return retcode; } @@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev) int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct detach_parms parms = { }; + int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_detach(adm_ctx.mdev); + if (info->attrs[DRBD_NLA_DETACH_PARMS]) { + err = detach_parms_from_attrs(&parms, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + retcode = adm_detach(adm_ctx.mdev, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; @@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev); + retcode = adm_detach(mdev, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7218750d2937..3a7e54b8f418 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c4e4553f5c2c..8fa51cda3b7e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; - /* only WRITES may end up here without a master bio (on barrier ack) */ - int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ - if (s & RQ_LOCAL_PENDING) + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; if (req->i.waiting) { /* Retry all conflicting peer requests. */ @@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) req->master_bio = NULL; } + if (s & RQ_LOCAL_PENDING) + return; + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, * or protocol C P_WRITE_ACK, @@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case COMPLETED_OK: - if (bio_data_dir(req->master_bio) == WRITE) + if (req->rq_state & RQ_WRITE) mdev->writ_cnt += req->i.size >> 9; else mdev->read_cnt += req->i.size >> 9; @@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; + case ABORT_DISK_IO: + req->rq_state |= RQ_LOCAL_ABORTED; + if (req->rq_state & RQ_WRITE) + _req_may_be_done_not_susp(req, m); + else + goto goto_queue_for_net_read; + break; + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); put_ldev(mdev); + goto_queue_for_net_read: + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data) struct drbd_request *req; /* oldest request */ struct list_head *le; struct net_conf *nc; - unsigned long et; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + + if (get_ldev(mdev)) { + dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; + put_ldev(mdev); + } rcu_read_unlock(); - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + et = min_not_zero(dt, ent); + + if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) return; /* Recurring timer stopped */ spin_lock_irq(&tconn->req_lock); @@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data) le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { + if (ent && req->rq_state & RQ_NET_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + ent)) { dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + } + } + if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&tconn->req_lock); + mod_timer(&mdev->request_timer, req->start_time + et); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 5135c95fbf85..f6aff150addb 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -106,6 +106,7 @@ enum drbd_req_event { READ_COMPLETED_WITH_ERROR, READ_AHEAD_COMPLETED_WITH_ERROR, WRITE_COMPLETED_WITH_ERROR, + ABORT_DISK_IO, COMPLETED_OK, RESEND, FAIL_FROZEN_DISK_IO, @@ -119,18 +120,21 @@ enum drbd_req_event { * same time, so we should hold the request lock anyways. */ enum drbd_req_state_bits { - /* 210 - * 000: no local possible - * 001: to be submitted + /* 3210 + * 0000: no local possible + * 0001: to be submitted * UNUSED, we could map: 011: submitted, completion still pending - * 110: completed ok - * 010: completed with error + * 0110: completed ok + * 0010: completed with error + * 1001: Aborted (before completion) + * 1x10: Aborted and completed -> free */ __RQ_LOCAL_PENDING, __RQ_LOCAL_COMPLETED, __RQ_LOCAL_OK, + __RQ_LOCAL_ABORTED, - /* 76543 + /* 87654 * 00000: no network possible * 00001: to be send * 00011: to be send, on worker queue @@ -209,8 +213,9 @@ enum drbd_req_state_bits { #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4c13a6f4f184..f51cefdbeff3 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -29,6 +29,9 @@ #include "drbd_int.h" #include "drbd_req.h" +/* in drbd_main.c */ +extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); + struct after_state_chg_work { struct drbd_work w; union drbd_state os; @@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_apply(mdev, ABORT_DISK_IO); + /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, * and that may only happen after our put_ldev below. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6410c55831e0..dac8d9bc4bec 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -67,11 +67,18 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; + struct drbd_conf *mdev; md_io = (struct drbd_md_io *)bio->bi_private; + mdev = container_of(md_io, struct drbd_conf, md_io); + md_io->error = error; - complete(&md_io->event); + md_io->done = 1; + wake_up(&mdev->misc_wait); + bio_put(bio); + drbd_md_put_buffer(mdev); + put_ldev(mdev); } /* reads on behalf of the partner, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index e879a9324380..2e6cefefe5e5 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) +GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) +) + /* * Notifications and commands (genlmsghdr->cmd) */ @@ -335,7 +340,9 @@ GENL_op( ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index f1046b13d9f6..ddd332db2a5d 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -50,6 +50,12 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_SCALE '1' + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 -- cgit v1.2.3 From d5d7ebd42250620a6da2a8f6943c024391433488 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 5 Jul 2011 20:59:26 +0200 Subject: drbd: on attach, enforce clean meta data Detection of unclean shutdown has moved into user space. The kernel code will, whenever it updates the meta data, mark it as "unclean", and will refuse to attach to such unclean meta data. "drbdadm up" now schedules "drbdmeta apply-al", which will apply the activity log to the bitmap, and/or reinitialize it, if necessary, as well as set a "clean" indicator flag. This moves a bit code out of kernel space. As a side effect, it also prevents some 8.3 module from accidentally ignoring the 8.4 style activity log, if someone should downgrade, whether on purpose, or accidentally because he changed kernel versions without providing an 8.4 for the new kernel, and the new kernel comes with in-tree 8.3. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 259 --------------------------------------- drivers/block/drbd/drbd_int.h | 6 - drivers/block/drbd/drbd_main.c | 26 ++-- drivers/block/drbd/drbd_nl.c | 19 +-- drivers/block/drbd/drbd_state.c | 1 + include/linux/drbd.h | 10 +- 6 files changed, 28 insertions(+), 293 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 58b5b61628fc..da8ffd54fc18 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -462,265 +462,6 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } -/* FIXME - * reading of the activity log, - * and potentially dirtying of the affected bitmap regions, - * should be done from userland only. - * DRBD would simply always attach with an empty activity log, - * and refuse to attach to something that looks like a crashed primary. - */ - -/** - * drbd_al_read_tr() - Read a single transaction from the on disk activity log - * @mdev: DRBD device. - * @bdev: Block device to read form. - * @b: pointer to an al_transaction. - * @index: On disk slot of the transaction to read. - * - * Returns -1 on IO error, 0 on checksum error and 1 upon success. - */ -static int drbd_al_read_tr(struct drbd_conf *mdev, - struct drbd_backing_dev *bdev, - int index) -{ - struct al_transaction_on_disk *b = page_address(mdev->md_io_page); - sector_t sector; - u32 crc; - - sector = bdev->md.md_offset - + bdev->md.al_offset - + index * (MD_BLOCK_SIZE>>9); - - /* Dont process error normally, - * as this is done before disk is attached! */ - if (drbd_md_sync_page_io(mdev, bdev, sector, READ)) - return -1; - - if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) - return 0; - - if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) - return 0; - - if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) - return 0; - - if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) - return 0; - - crc = be32_to_cpu(b->crc32c); - b->crc32c = 0; - if (!expect(crc == crc32c(0, b, 4096))) - return 0; - - return 1; -} - -/** - * drbd_al_read_log() - Restores the activity log from its on disk representation. - * @mdev: DRBD device. - * @bdev: Block device to read form. - * - * Returns 1 on success, returns 0 when reading the log failed due to IO errors. - */ -int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) -{ - struct al_transaction_on_disk *b; - int i; - int rv; - int mx; - int active_extents = 0; - int transactions = 0; - int found_valid = 0; - int found_initialized = 0; - int from = 0; - int to = 0; - u32 from_tnr = 0; - u32 to_tnr = 0; - u32 cnr; - - /* Note that this is expected to be called with a newly created, - * clean and all unused activity log of the "expected size". - */ - - /* lock out all other meta data io for now, - * and make sure the page is mapped. - */ - b = drbd_md_get_buffer(mdev); - if (!b) - return 0; - - /* Always use the full ringbuffer space for now. - * possible optimization: read in all of it, - * then scan the in-memory pages. */ - - mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - - /* Find the valid transaction in the log */ - for (i = 0; i < mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, i); - /* invalid data in that block */ - if (rv == 0) - continue; - if (be16_to_cpu(b->transaction_type) == AL_TR_INITIALIZED) { - ++found_initialized; - continue; - } - - /* IO error */ - if (rv == -1) { - drbd_md_put_buffer(mdev); - return 0; - } - - cnr = be32_to_cpu(b->tr_number); - if (++found_valid == 1) { - from = i; - to = i; - from_tnr = cnr; - to_tnr = cnr; - continue; - } - - D_ASSERT(cnr != to_tnr); - D_ASSERT(cnr != from_tnr); - if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx); - from = i; - from_tnr = cnr; - } - if ((int)cnr - (int)to_tnr > 0) { - D_ASSERT(cnr - to_tnr == i - to); - to = i; - to_tnr = cnr; - } - } - - if (!found_valid) { - if (found_initialized != mx) - dev_warn(DEV, "No usable activity log found.\n"); - drbd_md_put_buffer(mdev); - return 1; - } - - /* Read the valid transactions. - * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ - i = from; - while (1) { - struct lc_element *e; - unsigned j, n, slot, extent_nr; - - rv = drbd_al_read_tr(mdev, bdev, i); - if (!expect(rv != 0)) - goto cancel; - if (rv == -1) { - drbd_md_put_buffer(mdev); - return 0; - } - - /* deal with different transaction types. - * not yet implemented */ - if (!expect(b->transaction_type == 0)) - goto cancel; - - /* on the fly re-create/resize activity log? - * will be a special transaction type flag. */ - if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) - goto cancel; - if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) - goto cancel; - - /* We are the only user of the activity log right now, - * don't actually need to take that lock. */ - spin_lock_irq(&mdev->al_lock); - - /* first, apply the context, ... */ - for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); - j < AL_CONTEXT_PER_TRANSACTION && - slot < mdev->act_log->nr_elements; j++, slot++) { - extent_nr = be32_to_cpu(b->context[j]); - e = lc_element_by_index(mdev->act_log, slot); - if (e->lc_number != extent_nr) { - if (extent_nr != LC_FREE) - active_extents++; - else - active_extents--; - } - lc_set(mdev->act_log, extent_nr, slot); - } - - /* ... then apply the updates, - * which override the context information. - * drbd_al_read_tr already did the rangecheck - * on n <= AL_UPDATES_PER_TRANSACTION */ - n = be16_to_cpu(b->n_updates); - for (j = 0; j < n; j++) { - slot = be16_to_cpu(b->update_slot_nr[j]); - extent_nr = be32_to_cpu(b->update_extent_nr[j]); - if (!expect(slot < mdev->act_log->nr_elements)) - break; - e = lc_element_by_index(mdev->act_log, slot); - if (e->lc_number != extent_nr) { - if (extent_nr != LC_FREE) - active_extents++; - else - active_extents--; - } - lc_set(mdev->act_log, extent_nr, slot); - } - spin_unlock_irq(&mdev->al_lock); - - transactions++; - -cancel: - if (i == to) - break; - i++; - if (i >= mx) - i = 0; - } - - mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - - /* ok, we are done with it */ - drbd_md_put_buffer(mdev); - - dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", - transactions, active_extents); - - return 1; -} - -/** - * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents - * @mdev: DRBD device. - */ -void drbd_al_apply_to_bm(struct drbd_conf *mdev) -{ - unsigned int enr; - unsigned long add = 0; - char ppb[10]; - int i, tmp; - - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - - for (i = 0; i < mdev->act_log->nr_elements; i++) { - enr = lc_element_by_index(mdev->act_log, i)->lc_number; - if (enr == LC_FREE) - continue; - tmp = drbd_bm_ALe_set_all(mdev, enr); - dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr); - add += tmp; - } - - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - - dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", - ppsize(ppb, Bit2KB(add))); -} - static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) { int rv; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4e582058a7c9..9d0d6d0fb820 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -164,10 +164,6 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { /* usual integer division */ #define div_floor(A, B) ((A)/(B)) -/* drbd_meta-data.c (still in drbd_main.c) */ -/* 4th incarnation of the disk layout. */ -#define DRBD_MD_MAGIC (DRBD_MAGIC+4) - extern struct ratelimit_state drbd_ratelimit_state; extern struct idr minors; /* RCU, updates: genl_lock() */ extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */ @@ -1560,7 +1556,6 @@ extern void drbd_rs_cancel_all(struct drbd_conf *mdev); extern int drbd_rs_del_all(struct drbd_conf *mdev); extern void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size); -extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); @@ -1570,7 +1565,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) -extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); /* drbd_nl.c */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 15384986e4a4..f1d696ab6e83 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2932,7 +2932,7 @@ void drbd_md_sync(struct drbd_conf *mdev) for (i = UI_CURRENT; i < UI_SIZE; i++) buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); buffer->flags = cpu_to_be32(mdev->ldev->md.flags); - buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); + buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN); buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); @@ -2967,11 +2967,12 @@ out: * @bdev: Device from which the meta data should be read in. * * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case - * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. + * something goes wrong. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { struct meta_data_on_disk *buffer; + u32 magic, flags; int i, rv = NO_ERROR; if (!get_ldev_if_state(mdev, D_ATTACHING)) @@ -2989,8 +2990,20 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) { - dev_err(DEV, "Error while reading metadata, magic not found.\n"); + magic = be32_to_cpu(buffer->magic); + flags = be32_to_cpu(buffer->flags); + if (magic == DRBD_MD_MAGIC_84_UNCLEAN || + (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) { + /* btw: that's Activity Log clean, not "all" clean. */ + dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); + rv = ERR_MD_UNCLEAN; + goto err; + } + if (magic != DRBD_MD_MAGIC_08) { + if (magic == DRBD_MD_MAGIC_07) + dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); + else + dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); rv = ERR_MD_INVALID; goto err; } @@ -3035,11 +3048,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - /* This blocks wants to be get removed... */ - bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents); - if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) - bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; - err: drbd_md_put_buffer(mdev); out: diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bf8d0b077624..b39f5dc0f47b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1267,7 +1267,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) union drbd_state ns, os; enum drbd_state_rv rv; struct net_conf *nc; - int cp_discovered = 0; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1477,11 +1476,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto force_diskless_dec; } - if (!drbd_al_read_log(mdev, nbc)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (new_disk_conf->md_flushes) @@ -1511,10 +1505,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) { + !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) set_bit(CRASHED_PRIMARY, &mdev->flags); - cp_discovered = 1; - } mdev->send_cnt = 0; mdev->recv_cnt = 0; @@ -1566,15 +1558,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (cp_discovered) { - drbd_al_apply_to_bm(mdev); - if (drbd_bitmap_io(mdev, &drbd_bm_write, - "crashed primary apply AL", BM_LOCKED_MASK)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - } - if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) drbd_suspend_al(mdev); /* IO is still suspended here... */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f51cefdbeff3..c4d0d96d7906 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1017,6 +1017,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); + mdf &= ~MDF_AL_CLEAN; if (test_bit(CRASHED_PRIMARY, &mdev->flags)) mdf |= MDF_CRASHED_PRIMARY; if (mdev->state.role == R_PRIMARY || diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 161cd414b036..1e9f754b66ac 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -162,6 +162,7 @@ enum drbd_ret_code { ERR_INVALID_REQUEST = 162, ERR_NEED_APV_100 = 163, ERR_NEED_ALLOW_TWO_PRI = 164, + ERR_MD_UNCLEAN = 165, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -321,7 +322,8 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_FULL_SYNC (1 << 3) #define MDF_WAS_UP_TO_DATE (1 << 4) #define MDF_PEER_OUT_DATED (1 << 5) -#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_AL_CLEAN (1 << 7) enum drbd_uuid_index { UI_CURRENT, @@ -341,10 +343,16 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) +/* magic numbers used in meta data and network packets */ #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a #define DRBD_MAGIC_100 0x8620ec20 +#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3) +#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4) +#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5) + + /* how I came up with this magic? * base64 decode "actlog==" ;) */ #define DRBD_AL_MAGIC 0x69cb65a2 -- cgit v1.2.3 From 65d94927e036cd8e8e1406fa7fc387b4ae730159 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Jul 2011 10:24:51 +0200 Subject: drbd: Changed some defaults * Enabled the resync controller, with a fill target of 50Kib. That gives reasonable resync speeds without tuning. A much better default than the 250KiB/s fixed. * Enable bitmap compression. It is save to use, and most people have more CPU power than network bandwidth. * ko-count of 7: Abort a connection if the peer fails to process a write request within 42 seconds. * al-extents of 1237: ~5 GiB seems to be a much more sane default these days. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index ddd332db2a5d..defdebfecb72 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX 256 +#define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_VOLUME_MAX 65535 @@ -99,7 +99,7 @@ * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 -#define DRBD_KO_COUNT_DEF 0 +#define DRBD_KO_COUNT_DEF 7 /* } */ /* syncer { */ @@ -117,7 +117,7 @@ * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 -#define DRBD_AL_EXTENTS_DEF 127 +#define DRBD_AL_EXTENTS_DEF 1237 #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -151,7 +151,7 @@ #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 -#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ +#define DRBD_C_PLAN_AHEAD_DEF 20 #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 @@ -159,7 +159,7 @@ #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ -#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ +#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ #define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ #define DRBD_C_MAX_RATE_MAX (4 << 20) @@ -167,7 +167,7 @@ #define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ #define DRBD_C_MIN_RATE_MAX (4 << 20) -#define DRBD_C_MIN_RATE_DEF 4096 +#define DRBD_C_MIN_RATE_DEF 250 #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ @@ -187,6 +187,6 @@ #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -#define DRBD_USE_RLE_DEF 0 +#define DRBD_USE_RLE_DEF 1 #endif -- cgit v1.2.3 From 32bdb64038ba3127245912dae2cc8a450bb1d705 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 May 2011 18:26:20 +0200 Subject: drbd: Define scale factors in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index defdebfecb72..cd3565cfed44 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -18,29 +18,35 @@ #define DRBD_MINOR_COUNT_MIN 1 #define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_MINOR_COUNT_SCALE '1' #define DRBD_VOLUME_MAX 65535 #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 +#define DRBD_DIALOG_REFRESH_SCALE '1' /* valid port number */ #define DRBD_PORT_MIN 1 #define DRBD_PORT_MAX 0xffff +#define DRBD_PORT_SCALE '1' /* startup { */ /* if you want more than 3.4 days, disable */ #define DRBD_WFC_TIMEOUT_MIN 0 #define DRBD_WFC_TIMEOUT_MAX 300000 #define DRBD_WFC_TIMEOUT_DEF 0 +#define DRBD_WFC_TIMEOUT_SCALE '1' #define DRBD_DEGR_WFC_TIMEOUT_MIN 0 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0 +#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' /* }*/ /* net { */ @@ -49,6 +55,7 @@ #define DRBD_TIMEOUT_MIN 1 #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ +#define DRBD_TIMEOUT_SCALE '1' /* If backing disk takes longer than disk_timeout, mark the disk as failed */ #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ @@ -60,46 +67,55 @@ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_DEF 10 /* seconds */ +#define DRBD_CONNECT_INT_SCALE '1' /* keep-alive probes when idle */ #define DRBD_PING_INT_MIN 1 #define DRBD_PING_INT_MAX 120 #define DRBD_PING_INT_DEF 10 +#define DRBD_PING_INT_SCALE '1' /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 #define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 +#define DRBD_PING_TIMEO_SCALE '1' /* max number of write requests between write barriers */ #define DRBD_MAX_EPOCH_SIZE_MIN 1 #define DRBD_MAX_EPOCH_SIZE_MAX 20000 #define DRBD_MAX_EPOCH_SIZE_DEF 2048 +#define DRBD_MAX_EPOCH_SIZE_SCALE '1' /* I don't think that a tcp send buffer of more than 10M is useful */ #define DRBD_SNDBUF_SIZE_MIN 0 #define DRBD_SNDBUF_SIZE_MAX (10<<20) #define DRBD_SNDBUF_SIZE_DEF 0 +#define DRBD_SNDBUF_SIZE_SCALE '1' #define DRBD_RCVBUF_SIZE_MIN 0 #define DRBD_RCVBUF_SIZE_MAX (10<<20) #define DRBD_RCVBUF_SIZE_DEF 0 +#define DRBD_RCVBUF_SIZE_SCALE '1' /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 #define DRBD_MAX_BUFFERS_MAX 131072 #define DRBD_MAX_BUFFERS_DEF 2048 +#define DRBD_MAX_BUFFERS_SCALE '1' /* @4k PageSize -> 4kB - 512MB */ #define DRBD_UNPLUG_WATERMARK_MIN 1 #define DRBD_UNPLUG_WATERMARK_MAX 131072 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) +#define DRBD_UNPLUG_WATERMARK_SCALE '1' /* 0 is disabled. * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 #define DRBD_KO_COUNT_DEF 7 +#define DRBD_KO_COUNT_SCALE '1' /* } */ /* syncer { */ @@ -118,6 +134,7 @@ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 1237 +#define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -148,34 +165,42 @@ #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 +#define DRBD_MAX_BIO_BVECS_SCALE '1' #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 #define DRBD_C_PLAN_AHEAD_DEF 20 +#define DRBD_C_PLAN_AHEAD_SCALE '1' #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 #define DRBD_C_DELAY_TARGET_DEF 10 +#define DRBD_C_DELAY_TARGET_SCALE '1' #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ +#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */ -#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MIN 250 #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MIN 0 #define DRBD_C_MIN_RATE_MAX (4 << 20) #define DRBD_C_MIN_RATE_DEF 250 +#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */ #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ #define DRBD_CONG_FILL_DEF 0 +#define DRBD_CONG_FILL_SCALE 's' /* sectors */ #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE #define DRBD_PROTOCOL_DEF DRBD_PROT_C -- cgit v1.2.3 From 0317d9ecbc9bac43642b4aa70e3e1106f4fd26a1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 13 Jul 2011 13:40:30 +0200 Subject: drbd: Fix the maximum accepted minor device number The maximum minor device number allowed by the kernel is (1<<20 - 1). Reject device numbers higher than that to earlier catch possible errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index cd3565cfed44..7d956e91ae7b 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -137,7 +137,7 @@ #define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 -#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1) #define DRBD_MINOR_NUMBER_DEF -1 #define DRBD_MINOR_NUMBER_SCALE '1' -- cgit v1.2.3 From b80c043327ea4faac62a329a1d35f16c47a5128e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 18 Jul 2011 11:09:17 +0200 Subject: drbd: The minor_count module parameter is only a hint nowadays * The max of minor_count is 255 * In drbdadm count the number of minors, instead of finding the highest minor number * No longer us the magic in the init script Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7d956e91ae7b..6d0a24331ed2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX (1U << 20) +#define DRBD_MINOR_COUNT_MAX 255 #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_MINOR_COUNT_SCALE '1' -- cgit v1.2.3 From 380207d08e7c4d1b19c0323777278992b4fbf9d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Nov 2011 12:31:20 +0100 Subject: drbd: Load balancing of read requests New config option for the disk secition "read-balancing", with the values: prefer-local, prefer-remote, round-robin, when-congested-remote. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 57 +++++++++++++++++++++++++++++++++++++- include/linux/drbd.h | 8 ++++++ include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 1 + 6 files changed, 68 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d397681fb7aa..e2cccb40f5af 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -698,6 +698,7 @@ enum { AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ B_RS_H_DONE, /* Before resync handler done (already executed) */ DISCARD_MY_DATA, /* discard_my_data flag per volume */ + READ_BALANCE_RR, }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e546dd3fab8a..733b8bd663d5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", + dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ceb04a94aace..98251e2a7fb7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + /* if it is still queued, we may not complete it here. * it will be canceled soon. */ if (!(req->rq_state & RQ_NET_QUEUED)) @@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; + + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + _req_may_be_done_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; + goto_read_retry_local: + req->rq_state |= RQ_LOCAL_PENDING; + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + generic_make_request(req->private_bio); + break; + case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; @@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + if (!IS_ERR_OR_NULL(req->private_bio)) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } _req_may_be_done_not_susp(req, m); break; }; @@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } +static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +{ + enum drbd_read_balancing rbm; + struct backing_dev_info *bdi; + + if (mdev->state.pdsk < D_UP_TO_DATE) + return false; + + rcu_read_lock(); + rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; + rcu_read_unlock(); + + switch (rbm) { + case RB_CONGESTED_REMOTE: + bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + return bdi_read_congested(bdi); + case RB_LEAST_PENDING: + return atomic_read(&mdev->local_cnt) > + atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_ROUND_ROBIN: + return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); + case RB_PREFER_REMOTE: + return true; + case RB_PREFER_LOCAL: + default: + return false; + } +} + /* * complete_conflicting_writes - wait for any conflicting write requests * @@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); + } else if (remote_due_to_read_balancing(mdev)) { + /* Keep the private bio in case we need it + for a local retry */ + local = 0; } } remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; @@ -1017,7 +1072,7 @@ fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, &req->i); fail_and_free_req: - if (local) { + if (!IS_ERR_OR_NULL(req->private_bio)) { bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e9f754b66ac..157ba3d74dc7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -102,6 +102,14 @@ enum drbd_on_congestion { OC_DISCONNECT, }; +enum drbd_read_balancing { + RB_PREFER_LOCAL, + RB_PREFER_REMOTE, + RB_ROUND_ROBIN, + RB_LEAST_PENDING, + RB_CONGESTED_REMOTE, +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_code { ERR_CODE_BASE = 100, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2e6cefefe5e5..826008f297fe 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 6d0a24331ed2..17ef66a5c114 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -161,6 +161,7 @@ #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_ON_CONGESTION_DEF OC_BLOCK +#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 -- cgit v1.2.3 From d60de03a6694302b691bdf858ede9cbdfb7112d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 17 Nov 2011 10:12:31 +0100 Subject: drbd: Load balancing method: striping Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 13 +++++++++++-- include/linux/drbd.h | 6 ++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 98251e2a7fb7..5b28de0c5960 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -745,10 +745,11 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } -static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector) { enum drbd_read_balancing rbm; struct backing_dev_info *bdi; + int stripe_shift; if (mdev->state.pdsk < D_UP_TO_DATE) return false; @@ -764,6 +765,14 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev) case RB_LEAST_PENDING: return atomic_read(&mdev->local_cnt) > atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_32K_STRIPING: /* stripe_shift = 15 */ + case RB_64K_STRIPING: + case RB_128K_STRIPING: + case RB_256K_STRIPING: + case RB_512K_STRIPING: + case RB_1M_STRIPING: /* stripe_shift = 20 */ + stripe_shift = (rbm - RB_32K_STRIPING + 15); + return (sector >> (stripe_shift - 9)) & 1; case RB_ROUND_ROBIN: return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); case RB_PREFER_REMOTE: @@ -841,7 +850,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); - } else if (remote_due_to_read_balancing(mdev)) { + } else if (remote_due_to_read_balancing(mdev, sector)) { /* Keep the private bio in case we need it for a local retry */ local = 0; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 157ba3d74dc7..1e86156c10f7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -108,6 +108,12 @@ enum drbd_read_balancing { RB_ROUND_ROBIN, RB_LEAST_PENDING, RB_CONGESTED_REMOTE, + RB_32K_STRIPING, + RB_64K_STRIPING, + RB_128K_STRIPING, + RB_256K_STRIPING, + RB_512K_STRIPING, + RB_1M_STRIPING, }; /* KEEP the order, do not delete or insert. Only append. */ -- cgit v1.2.3 From 26ec92871be1e6bd48d0be9ab38ee1ebbeea49f1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 Jul 2012 20:36:03 +0200 Subject: drbd: Stop using NLA_PUT*(). These macros no longer exist in kernel version v3.5-rc1. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 54 +++++++++++++++++++++++---------------- include/linux/genl_magic_func.h | 8 +++--- include/linux/genl_magic_struct.h | 16 ++++++------ 3 files changed, 45 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cbd45de533cb..dc5bd6bbb280 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2554,13 +2554,17 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsi nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; - if (vnr != VOLUME_UNSPECIFIED) - NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); - if (tconn->my_addr_len) - NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); - if (tconn->peer_addr_len) - NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); + if (vnr != VOLUME_UNSPECIFIED && + nla_put_u32(skb, T_ctx_volume, vnr)) + goto nla_put_failure; + if (nla_put_string(skb, T_ctx_resource_name, tconn->name)) + goto nla_put_failure; + if (tconn->my_addr_len && + nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr)) + goto nla_put_failure; + if (tconn->peer_addr_len && + nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr)) + goto nla_put_failure; nla_nest_end(skb, nla); return 0; @@ -2618,20 +2622,23 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; - NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); - NLA_PUT_U32(skb, T_current_state, mdev->state.i); - NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); - NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); + if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || + nla_put_u32(skb, T_current_state, mdev->state.i) || + nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + goto nla_put_failure; if (got_ldev) { - NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); - NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); - NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); - NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || + nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) || + nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || + nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) + goto nla_put_failure; if (C_SYNC_SOURCE <= mdev->state.conn && C_PAUSED_SYNC_T >= mdev->state.conn) { - NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); - NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); + if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) || + nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed)) + goto nla_put_failure; } } @@ -2641,15 +2648,18 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, case SIB_GET_STATUS_REPLY: break; case SIB_STATE_CHANGE: - NLA_PUT_U32(skb, T_prev_state, sib->os.i); - NLA_PUT_U32(skb, T_new_state, sib->ns.i); + if (nla_put_u32(skb, T_prev_state, sib->os.i) || + nla_put_u32(skb, T_new_state, sib->ns.i)) + goto nla_put_failure; break; case SIB_HELPER_POST: - NLA_PUT_U32(skb, - T_helper_exit_code, sib->helper_exit_code); + if (nla_put_u32(skb, T_helper_exit_code, + sib->helper_exit_code)) + goto nla_put_failure; /* fall through */ case SIB_HELPER_PRE: - NLA_PUT_STRING(skb, T_helper, sib->helper_name); + if (nla_put_string(skb, T_helper, sib->helper_name)) + goto nla_put_failure; break; } } diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 0b8a88e2e83e..023bc346b877 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -367,7 +367,8 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ __is_signed) \ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ - __put(skb, attr_nr, s->name); \ + if (__put(skb, attr_nr, s->name)) \ + goto nla_put_failure; \ } #undef __array @@ -375,9 +376,10 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ __get, __put, __is_signed) \ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ - __put(skb, attr_nr, min_t(int, maxlen, \ + if (__put(skb, attr_nr, min_t(int, maxlen, \ s->name ## _len + (nla_type == NLA_NUL_STRING)),\ - s->name); \ + s->name)) \ + goto nla_put_failure; \ } #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 1d0bd79e27b3..eecd19b37001 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -65,28 +65,28 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ - nla_get_u8, NLA_PUT_U8, false) + nla_get_u8, nla_put_u8, false) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ - nla_get_u8, NLA_PUT_U8, false) + nla_get_u8, nla_put_u8, false) #define __u16_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ - nla_get_u16, NLA_PUT_U16, false) + nla_get_u16, nla_put_u16, false) #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ - nla_get_u32, NLA_PUT_U32, false) + nla_get_u32, nla_put_u32, false) #define __s32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ - nla_get_u32, NLA_PUT_U32, true) + nla_get_u32, nla_put_u32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, NLA_PUT_U64, false) + nla_get_u64, nla_put_u64, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, NLA_PUT, false) + nla_strlcpy, nla_put, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ - nla_memcpy, NLA_PUT, false) + nla_memcpy, nla_put, false) /* fields with default values */ #define __flg_field_def(attr_nr, attr_flag, name, default) \ -- cgit v1.2.3 From 9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 20 Feb 2012 21:53:28 +0100 Subject: drbd: New disk option al-updates By disabling al-updates one might increase performace. The price for that is that in case a crashed primary (that had al-updates disabled) is reintegraded, it will receive a full-resync instead of a bitmap based resync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 12 ++++++++++-- drivers/block/drbd/drbd_nl.c | 17 +++++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 3 +++ include/linux/drbd_limits.h | 1 + 5 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9eae28944312..83d48d210b69 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -276,8 +276,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { - al_write_transaction(mdev); - mdev->al_writ_cnt++; + bool write_al_updates; + + rcu_read_lock(); + write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + + if (write_al_updates) { + al_write_transaction(mdev); + mdev->al_writ_cnt++; + } spin_lock_irq(&mdev->al_lock); /* FIXME diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dc5bd6bbb280..c5d4fac1a111 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); + if (new_disk_conf->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); drbd_md_sync(mdev); @@ -1545,7 +1550,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || + (test_bit(CRASHED_PRIMARY, &mdev->flags) && + drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, @@ -1588,13 +1595,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (ns.disk == D_CONSISTENT && (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; - rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before this point, because drbd_request_state() modifies these flags. */ + if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + + rcu_read_unlock(); + /* In case we are C_CONNECTED postpone any decision on the new disk state after the negotiation phase. */ if (mdev->state.conn == C_CONNECTED) { diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e86156c10f7..36ae7dd28d90 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -338,6 +338,7 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_PEER_OUT_DATED (1 << 5) #define MDF_CRASHED_PRIMARY (1 << 6) #define MDF_AL_CLEAN (1 << 7) +#define MDF_AL_DISABLED (1 << 8) enum drbd_uuid_index { UI_CURRENT, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 826008f297fe..92ec4b50a885 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -130,6 +130,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) + /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ + __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -168,6 +170,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 17ef66a5c114..1fa19c5f5e64 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -210,6 +210,7 @@ #define DRBD_DISK_DRAIN_DEF 1 #define DRBD_MD_FLUSHES_DEF 1 #define DRBD_TCP_CORK_DEF 1 +#define DRBD_AL_UPDATES_DEF 1 #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -- cgit v1.2.3 From 58ffa580a748dd16b1e5ab260bea39cdbd1e94ef Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 26 Jul 2012 14:09:49 +0200 Subject: drbd: introduce stop-sector to online verify We now can schedule only a specific range of sectors for online verify, or interrupt a running verify without interrupting the connection. Had to bump the protocol version differently, we are now 101. Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; } Also, the return value convention for worker callbacks has changed, we returned "true/false" for "keep the connection up" in 8.3, we return 0 for success and <= for failure in 8.4. Affected: receive_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 +++++++ drivers/block/drbd/drbd_nl.c | 14 +++++++++----- drivers/block/drbd/drbd_proc.c | 12 +++++++++--- drivers/block/drbd/drbd_receiver.c | 10 +++++++++- drivers/block/drbd/drbd_state.c | 17 +++++++++++++---- drivers/block/drbd/drbd_worker.c | 33 +++++++++++++++++++++++++++------ include/linux/drbd.h | 2 +- include/linux/drbd_genl.h | 1 + 8 files changed, 76 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 22adfc7189de..eddc4388a1b1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -971,6 +971,7 @@ struct drbd_conf { /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; + sector_t ov_stop_sector; /* where are we now? (sector) */ sector_t ov_position; /* Start sector of out of sync range (to merge printk reporting). */ @@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } +static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev) +{ + return mdev->tconn->agreed_pro_version >= 97 && + mdev->tconn->agreed_pro_version != 100; +} + static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) { int changed = mdev->ed_uuid != val; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4afd626ca3dc..eefb56308aea 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; enum drbd_ret_code retcode; + struct start_ov_parms parms; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) goto out; mdev = adm_ctx.mdev; + + /* resume from last known position, if possible */ + parms.ov_start_sector = mdev->ov_start_sector; + parms.ov_stop_sector = ULLONG_MAX; if (info->attrs[DRBD_NLA_START_OV_PARMS]) { - /* resume from last known position, if possible */ - struct start_ov_parms parms = - { .ov_start_sector = mdev->ov_start_sector }; int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto out; } - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = parms.ov_stop_sector; + /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ drbd_suspend_io(mdev); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index e0f0d2a6d538..56672a61eb94 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) * we convert to sectors in the display below. */ unsigned long bm_bits = drbd_bm_bits(mdev); unsigned long bit_pos; + unsigned long long stop_sector = 0; if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) + mdev->state.conn == C_VERIFY_T) { bit_pos = bm_bits - mdev->ov_left; - else + if (verify_can_do_stop_sector(mdev)) + stop_sector = mdev->ov_stop_sector; + } else bit_pos = mdev->bm_resync_fo; /* Total sectors may be slightly off for oddly * sized devices. So what. */ seq_printf(seq, - "\t%3d%% sector pos: %llu/%llu\n", + "\t%3d%% sector pos: %llu/%llu", (int)(bit_pos / (bm_bits/100+1)), (unsigned long long)bit_pos * BM_SECT_PER_BIT, (unsigned long long)bm_bits * BM_SECT_PER_BIT); + if (stop_sector != 0 && stop_sector != ULLONG_MAX) + seq_printf(seq, " stop sector: %llu", stop_sector); + seq_printf(seq, "\n"); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7fe6b01618d4..8fddec96dfbe 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) * already decided to close the connection again, * we must not "re-establish" it here. */ if (os.conn <= C_TEAR_DOWN) - return false; + return -ECONNRESET; /* If this is the "end of sync" confirmation, usually the peer disk * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits @@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } } + /* explicit verify finished notification, stop sector reached. */ + if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && + peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { + ov_out_of_sync_print(mdev); + drbd_resync_finished(mdev); + return 0; + } + /* peer says his disk is inconsistent, while we think it is uptodate, * and this happens while the peer still thinks we have a sync going on, * but we think we are already done with the sync. diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 444581828d70..12f2b4fbe559 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->state_wait); wake_up(&mdev->tconn->ping_wait); - /* aborted verify run. log the last position */ + /* Aborted verify run, or we reached the stop sector. + * Log the last position, unless end-of-device. */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { + ns.conn <= C_CONNECTED) { mdev->ov_start_sector = BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); + if (mdev->ov_left) + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); } if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && @@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Verify finished, or reached stop sector. Peer did not know about + * the stop sector, and we may even have changed the stop sector during + * verify to interrupt/stop early. Send the new state. */ + if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED + && verify_can_do_stop_sector(mdev)) + drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9d7e1fb0f431..1c9c6fd332c3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + bool stop_sector_reached = false; if (unlikely(cancel)) return 1; @@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) sector = mdev->ov_position; for (i = 0; i < number; i++) { - if (sector >= capacity) { + if (sector >= capacity) return 1; - } + + /* We check for "finished" only in the reply path: + * w_e_end_ov_reply(). + * We need to send at least one request out. */ + stop_sector_reached = i > 0 + && verify_can_do_stop_sector(mdev) + && sector >= mdev->ov_stop_sector; + if (stop_sector_reached) + break; size = BM_BLOCK_SIZE; @@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) requeue: mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + if (i == 0 || !stop_sector_reached) + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); return 1; } @@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; if (dt <= 0) dt = 1; + db = mdev->rs_total; + /* adjust for verify start and stop sectors, respective reached position */ + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + db -= mdev->ov_left; + dbdt = Bit2KB(db/dt); mdev->rs_paused /= HZ; @@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns.conn = C_CONNECTED; dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", - verify_done ? "Online verify " : "Resync", + verify_done ? "Online verify" : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); n_oos = drbd_bm_total_weight(mdev); @@ -896,7 +911,9 @@ out: mdev->rs_total = 0; mdev->rs_failed = 0; mdev->rs_paused = 0; - if (verify_done) + + /* reset start sector, if we reached end of device */ + if (verify_done && mdev->ov_left == 0) mdev->ov_start_sector = 0; drbd_md_sync(mdev); @@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) unsigned int size = peer_req->i.size; int digest_size; int err, eq = 0; + bool stop_sector_reached = false; if (unlikely(cancel)) { drbd_free_peer_req(mdev, peer_req); @@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) if ((mdev->ov_left & 0x200) == 0x200) drbd_advance_rs_marks(mdev, mdev->ov_left); - if (mdev->ov_left == 0) { + stop_sector_reached = verify_can_do_stop_sector(mdev) && + (sector + (size>>9)) >= mdev->ov_stop_sector; + + if (mdev->ov_left == 0 || stop_sector_reached) { ov_out_of_sync_print(mdev); drbd_resync_finished(mdev); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 36ae7dd28d90..5171c3530886 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -55,7 +55,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 100 +#define PRO_VERSION_MAX 101 enum drbd_io_error_p { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 92ec4b50a885..9430e9ab37a8 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -215,6 +215,7 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) + __u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, -- cgit v1.2.3 From 3174f8c5045ad247563434c4b4897bd89313eafc Mon Sep 17 00:00:00 2001 From: Philipp Marek Date: Sat, 3 Mar 2012 21:04:30 +0100 Subject: drbd: pass some more information to userspace. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 11 ++++++++++- include/linux/drbd_genl.h | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index eefb56308aea..466d6b1d9309 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2666,7 +2666,16 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || nla_put_u32(skb, T_current_state, mdev->state.i) || nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || - nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) || + nla_put_u64(skb, T_send_cnt, mdev->send_cnt) || + nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) || + nla_put_u64(skb, T_read_cnt, mdev->read_cnt) || + nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) || + nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) || + nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) || + nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) || + nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) || + nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt))) goto nla_put_failure; if (got_ldev) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 9430e9ab37a8..d0d8fac8a6e4 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -211,6 +211,16 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* for pre and post notifications of helper execution */ __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) + + __u64_field(15, 0, send_cnt) + __u64_field(16, 0, recv_cnt) + __u64_field(17, 0, read_cnt) + __u64_field(18, 0, writ_cnt) + __u64_field(19, 0, al_writ_cnt) + __u64_field(20, 0, bm_writ_cnt) + __u32_field(21, 0, ap_bio_cnt) + __u32_field(22, 0, ap_pending_cnt) + __u32_field(23, 0, rs_pending_cnt) ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, -- cgit v1.2.3 From eb12010e9af119c84e6b2214064a98681027e0e3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 1 Aug 2012 12:46:20 +0200 Subject: drbd: disambiguation, s/ERR_DISCARD/ERR_DISCARD_IMPOSSIBLE/ If for some reason (typically "split-brained" cluster manager) drbd replica data has diverged, we can chose a victim, and reconnect using "--discard-my-data", causing the victim to become sync-target, fetching all changed blocks from the peer. If we are Primary, we are potentially in use, and we refuse to "roll back" changes to the data below the page cache and other users. Rename the error symbol for this to ERR_DISCARD_IMPOSSIBLE. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- include/linux/drbd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 35bb572a2076..d1073705bf1f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1829,7 +1829,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n return ERR_STONITH_AND_PROT_A; } if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) - return ERR_DISCARD; + return ERR_DISCARD_IMPOSSIBLE; } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5171c3530886..0b93e5e2e064 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -136,7 +136,7 @@ enum drbd_ret_code { ERR_AUTH_ALG = 120, ERR_AUTH_ALG_ND = 121, ERR_NOMEM = 122, - ERR_DISCARD = 123, + ERR_DISCARD_IMPOSSIBLE = 123, ERR_DISK_CONFIGURED = 124, ERR_NET_CONFIGURED = 125, ERR_MANDATORY_TAG = 126, -- cgit v1.2.3 From 328e0f125bf41f4f33f684db22015f92cb44fe56 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 19 Oct 2012 14:37:47 +0200 Subject: drbd: Broadcast sync progress no more often than once per second Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 6 ++++++ drivers/block/drbd/drbd_worker.c | 4 ++++ include/linux/drbd.h | 4 ++-- 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 057ffed6eb7e..784f4eb2ed61 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -965,6 +965,7 @@ struct drbd_conf { unsigned long rs_mark_time[DRBD_SYNC_MARKS]; /* current index into rs_mark_{left,time} */ int rs_last_mark; + unsigned long rs_last_bcast; /* [unit jiffies] */ /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 298dd3e35e02..d339a2754a85 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3295,6 +3295,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; + if (sib->sib_reason == SIB_SYNC_PROGRESS && + time_after(jiffies, mdev->rs_last_bcast + HZ)) + mdev->rs_last_bcast = jiffies; + else + return; + seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); if (!msg) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 64a7305c678a..424dc7bdf9b7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1696,6 +1696,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) write_unlock_irq(&global_state_lock); if (r == SS_SUCCESS) { + /* reset rs_last_bcast when a resync or verify is started, + * to deal with potential jiffies wrap. */ + mdev->rs_last_bcast = jiffies - HZ; + dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", drbd_conn_str(ns.conn), (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0b93e5e2e064..0c5a18ec322c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,8 +52,8 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.11" -#define API_VERSION 88 +#define REL_VERSION "8.4.2" +#define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit v1.2.3