diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2020-10-30 15:30:51 +0300 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2020-12-15 01:21:50 +0300 |
commit | a5cbd5fc22d5043a8a76e15d75d031fe24d1f69c (patch) | |
tree | 2379298419dcbfaddef2d7bbd1cba86ba68c8bd4 /net/ceph/mon_client.c | |
parent | 8921f25116af3081fb56871feb93f2dcaf52c722 (diff) | |
download | linux-a5cbd5fc22d5043a8a76e15d75d031fe24d1f69c.tar.xz |
libceph, ceph: get and handle cluster maps with addrvecs
In preparation for msgr2, make the cluster send us maps with addrvecs
including both LEGACY and MSGR2 addrs instead of a single LEGACY addr.
This means advertising support for SERVER_NAUTILUS and also some older
features: SERVER_MIMIC, MONENC and MONNAMES.
MONNAMES and MONENC are actually pre-argonaut, we just never updated
ceph_monmap_decode() for them. Decoding is unconditional, see commit
23c625ce3065 ("libceph: assume argonaut on the server side").
SERVER_MIMIC doesn't bear any meaning for the kernel client.
Since ceph_decode_entity_addrvec() is guarded by encoding version
checks (and in msgr2 case it is guarded implicitly by the fact that
server is speaking msgr2), we assume MSG_ADDR2 for it.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net/ceph/mon_client.c')
-rw-r--r-- | net/ceph/mon_client.c | 145 |
1 files changed, 105 insertions, 40 deletions
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index ebfecf8d0918..a9754a7fa78c 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops; static int __validate_auth(struct ceph_mon_client *monc); +static int decode_mon_info(void **p, void *end, bool msgr2, + struct ceph_entity_addr *addr) +{ + void *mon_info_end; + u32 struct_len; + u8 struct_v; + int ret; + + ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v, + &struct_len); + if (ret) + return ret; + + mon_info_end = *p + struct_len; + ceph_decode_skip_string(p, end, e_inval); /* skip mon name */ + ret = ceph_decode_entity_addrvec(p, end, msgr2, addr); + if (ret) + return ret; + + *p = mon_info_end; + return 0; + +e_inval: + return -EINVAL; +} + /* * Decode a monmap blob (e.g., during mount). + * + * Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC). */ -static struct ceph_monmap *ceph_monmap_decode(void *p, void *end) +static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2) { - struct ceph_monmap *m = NULL; - int i, err = -EINVAL; + struct ceph_monmap *monmap = NULL; struct ceph_fsid fsid; - u32 epoch, num_mon; - u32 len; + u32 struct_len; + int blob_len; + int num_mon; + u8 struct_v; + u32 epoch; + int ret; + int i; + + ceph_decode_32_safe(p, end, blob_len, e_inval); + ceph_decode_need(p, end, blob_len, e_inval); + + ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len); + if (ret) + goto fail; + + dout("%s struct_v %d\n", __func__, struct_v); + ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval); + ceph_decode_32_safe(p, end, epoch, e_inval); + if (struct_v >= 6) { + u32 feat_struct_len; + u8 feat_struct_v; - ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_need(&p, end, len, bad); + *p += sizeof(struct ceph_timespec); /* skip last_changed */ + *p += sizeof(struct ceph_timespec); /* skip created */ - dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p)); - p += sizeof(u16); /* skip version */ + ret = ceph_start_decoding(p, end, 1, "mon_feature_t", + &feat_struct_v, &feat_struct_len); + if (ret) + goto fail; - ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad); - ceph_decode_copy(&p, &fsid, sizeof(fsid)); - epoch = ceph_decode_32(&p); + *p += feat_struct_len; /* skip persistent_features */ - num_mon = ceph_decode_32(&p); + ret = ceph_start_decoding(p, end, 1, "mon_feature_t", + &feat_struct_v, &feat_struct_len); + if (ret) + goto fail; + *p += feat_struct_len; /* skip optional_features */ + } + ceph_decode_32_safe(p, end, num_mon, e_inval); + + dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch, + num_mon); if (num_mon > CEPH_MAX_MON) - goto bad; - m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS); - if (m == NULL) - return ERR_PTR(-ENOMEM); - m->fsid = fsid; - m->epoch = epoch; - m->num_mon = num_mon; - for (i = 0; i < num_mon; ++i) { - struct ceph_entity_inst *inst = &m->mon_inst[i]; - - /* copy name portion */ - ceph_decode_copy_safe(&p, end, &inst->name, - sizeof(inst->name), bad); - err = ceph_decode_entity_addr(&p, end, &inst->addr); - if (err) - goto bad; + goto e_inval; + + monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO); + if (!monmap) { + ret = -ENOMEM; + goto fail; } - dout("monmap_decode epoch %d, num_mon %d\n", m->epoch, - m->num_mon); - for (i = 0; i < m->num_mon; i++) - dout("monmap_decode mon%d is %s\n", i, - ceph_pr_addr(&m->mon_inst[i].addr)); - return m; -bad: - dout("monmap_decode failed with %d\n", err); - kfree(m); - return ERR_PTR(err); + monmap->fsid = fsid; + monmap->epoch = epoch; + monmap->num_mon = num_mon; + + /* legacy_mon_addr map or mon_info map */ + for (i = 0; i < num_mon; i++) { + struct ceph_entity_inst *inst = &monmap->mon_inst[i]; + + ceph_decode_skip_string(p, end, e_inval); /* skip mon name */ + inst->name.type = CEPH_ENTITY_TYPE_MON; + inst->name.num = cpu_to_le64(i); + + if (struct_v >= 6) + ret = decode_mon_info(p, end, msgr2, &inst->addr); + else + ret = ceph_decode_entity_addr(p, end, &inst->addr); + if (ret) + goto fail; + + dout("%s mon%d addr %s\n", __func__, i, + ceph_pr_addr(&inst->addr)); + } + + return monmap; + +e_inval: + ret = -EINVAL; +fail: + kfree(monmap); + return ERR_PTR(ret); } /* @@ -476,7 +541,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, p = msg->front.iov_base; end = p + msg->front.iov_len; - monmap = ceph_monmap_decode(p, end); + monmap = ceph_monmap_decode(&p, end, false); if (IS_ERR(monmap)) { pr_err("problem decoding monmap, %d\n", (int)PTR_ERR(monmap)); |