diff options
-rw-r--r-- | fs/ceph/mds_client.c | 2 | ||||
-rw-r--r-- | fs/ceph/mdsmap.c | 21 | ||||
-rw-r--r-- | include/linux/ceph/ceph_features.h | 11 | ||||
-rw-r--r-- | include/linux/ceph/decode.h | 4 | ||||
-rw-r--r-- | include/linux/ceph/mdsmap.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/osdmap.h | 4 | ||||
-rw-r--r-- | net/ceph/decode.c | 56 | ||||
-rw-r--r-- | net/ceph/mon_client.c | 145 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 4 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 45 |
10 files changed, 222 insertions, 72 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 278fe67e2617..afd22815fbda 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5014,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; } - newmap = ceph_mdsmap_decode(&p, end); + newmap = ceph_mdsmap_decode(&p, end, false); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad_unlock; diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 1096d1d3a84c..abd9af7727ad 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -114,7 +114,7 @@ bad: * Ignore any fields we don't care about (there are quite a few of * them). */ -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) +struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) { struct ceph_mdsmap *m; const void *start = *p; @@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) namelen = ceph_decode_32(p); /* skip mds name */ *p += namelen; - ceph_decode_need(p, end, - 4*sizeof(u32) + sizeof(u64) + - sizeof(addr) + sizeof(struct ceph_timespec), - bad); - mds = ceph_decode_32(p); - inc = ceph_decode_32(p); - state = ceph_decode_32(p); + ceph_decode_32_safe(p, end, mds, bad); + ceph_decode_32_safe(p, end, inc, bad); + ceph_decode_32_safe(p, end, state, bad); *p += sizeof(u64); /* state_seq */ - err = ceph_decode_entity_addr(p, end, &addr); + if (info_v >= 8) + err = ceph_decode_entity_addrvec(p, end, msgr2, &addr); + else + err = ceph_decode_entity_addr(p, end, &addr); if (err) goto corrupt; - ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); + + ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since), + bad); laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0; *p += sizeof(u32); ceph_decode_32_safe(p, end, namelen, bad); diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 999636d53cf2..3a47acd9cc14 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -8,7 +8,8 @@ * feature. Base case is 1 (first use). */ #define CEPH_FEATURE_INCARNATION_1 (0ull) -#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL +#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL +#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \ @@ -75,7 +76,7 @@ DEFINE_CEPH_FEATURE( 0, 1, UID) DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR) DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS) - +DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS) DEFINE_CEPH_FEATURE( 3, 1, FLOCK) DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2) DEFINE_CEPH_FEATURE( 5, 1, MONNAMES) @@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2) DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID) DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE) DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL) -DEFINE_CEPH_FEATURE(28, 2, SERVER_M) +DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC) DEFINE_CEPH_FEATURE(29, 1, MDSENC) DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL) DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me @@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_SERVER_NAUTILUS | \ CEPH_FEATURE_FLOCK | \ CEPH_FEATURE_SUBSCRIBE2 | \ + CEPH_FEATURE_MONNAMES | \ CEPH_FEATURE_RECONNECT_SEQ | \ CEPH_FEATURE_DIRLAYOUTHASH | \ CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ + CEPH_FEATURE_MONENC | \ CEPH_FEATURE_CRUSH_TUNABLES | \ CEPH_FEATURE_SERVER_LUMINOUS | \ CEPH_FEATURE_RESEND_ON_SPLIT | \ @@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_SERVER_MIMIC | \ CEPH_FEATURE_MDSENC | \ CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \ diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 450384fe487c..9a934e04f841 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -220,6 +220,7 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv, */ #define CEPH_ENTITY_ADDR_TYPE_NONE 0 #define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1) +#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2) static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a) { @@ -239,6 +240,9 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a) extern int ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr); +int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2, + struct ceph_entity_addr *addr); + /* * encoders */ diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 35d385296fbb..523fd0452856 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) } extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); +struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index cad9acfbc320..5553019c3f07 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) } struct ceph_osdmap *ceph_osdmap_alloc(void); -extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); -struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, +struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2); +struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2, struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map); diff --git a/net/ceph/decode.c b/net/ceph/decode.c index eea529595a7a..6429b6713507 100644 --- a/net/ceph/decode.c +++ b/net/ceph/decode.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/ceph/ceph_debug.h> #include <linux/ceph/decode.h> @@ -82,3 +83,58 @@ bad: } EXPORT_SYMBOL(ceph_decode_entity_addr); +/* + * Return addr of desired type (MSGR2 or LEGACY) or error. + * Make sure there is only one match. + * + * Assume encoding with MSG_ADDR2. + */ +int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2, + struct ceph_entity_addr *addr) +{ + __le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 : + CEPH_ENTITY_ADDR_TYPE_LEGACY; + struct ceph_entity_addr tmp_addr; + int addr_cnt; + bool found; + u8 marker; + int ret; + int i; + + ceph_decode_8_safe(p, end, marker, e_inval); + if (marker != 2) { + pr_err("bad addrvec marker %d\n", marker); + return -EINVAL; + } + + ceph_decode_32_safe(p, end, addr_cnt, e_inval); + + found = false; + for (i = 0; i < addr_cnt; i++) { + ret = ceph_decode_entity_addr(p, end, &tmp_addr); + if (ret) + return ret; + + if (tmp_addr.type == my_type) { + if (found) { + pr_err("another match of type %d in addrvec\n", + le32_to_cpu(my_type)); + return -EINVAL; + } + + memcpy(addr, &tmp_addr, sizeof(*addr)); + found = true; + } + } + if (!found && addr_cnt != 0) { + pr_err("no match of type %d in addrvec\n", + le32_to_cpu(my_type)); + return -ENOENT; + } + + return 0; + +e_inval: + return -EINVAL; +} +EXPORT_SYMBOL(ceph_decode_entity_addrvec); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index ebfecf8d0918..a9754a7fa78c 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops; static int __validate_auth(struct ceph_mon_client *monc); +static int decode_mon_info(void **p, void *end, bool msgr2, + struct ceph_entity_addr *addr) +{ + void *mon_info_end; + u32 struct_len; + u8 struct_v; + int ret; + + ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v, + &struct_len); + if (ret) + return ret; + + mon_info_end = *p + struct_len; + ceph_decode_skip_string(p, end, e_inval); /* skip mon name */ + ret = ceph_decode_entity_addrvec(p, end, msgr2, addr); + if (ret) + return ret; + + *p = mon_info_end; + return 0; + +e_inval: + return -EINVAL; +} + /* * Decode a monmap blob (e.g., during mount). + * + * Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC). */ -static struct ceph_monmap *ceph_monmap_decode(void *p, void *end) +static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2) { - struct ceph_monmap *m = NULL; - int i, err = -EINVAL; + struct ceph_monmap *monmap = NULL; struct ceph_fsid fsid; - u32 epoch, num_mon; - u32 len; + u32 struct_len; + int blob_len; + int num_mon; + u8 struct_v; + u32 epoch; + int ret; + int i; + + ceph_decode_32_safe(p, end, blob_len, e_inval); + ceph_decode_need(p, end, blob_len, e_inval); + + ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len); + if (ret) + goto fail; + + dout("%s struct_v %d\n", __func__, struct_v); + ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval); + ceph_decode_32_safe(p, end, epoch, e_inval); + if (struct_v >= 6) { + u32 feat_struct_len; + u8 feat_struct_v; - ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_need(&p, end, len, bad); + *p += sizeof(struct ceph_timespec); /* skip last_changed */ + *p += sizeof(struct ceph_timespec); /* skip created */ - dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p)); - p += sizeof(u16); /* skip version */ + ret = ceph_start_decoding(p, end, 1, "mon_feature_t", + &feat_struct_v, &feat_struct_len); + if (ret) + goto fail; - ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad); - ceph_decode_copy(&p, &fsid, sizeof(fsid)); - epoch = ceph_decode_32(&p); + *p += feat_struct_len; /* skip persistent_features */ - num_mon = ceph_decode_32(&p); + ret = ceph_start_decoding(p, end, 1, "mon_feature_t", + &feat_struct_v, &feat_struct_len); + if (ret) + goto fail; + *p += feat_struct_len; /* skip optional_features */ + } + ceph_decode_32_safe(p, end, num_mon, e_inval); + + dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch, + num_mon); if (num_mon > CEPH_MAX_MON) - goto bad; - m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS); - if (m == NULL) - return ERR_PTR(-ENOMEM); - m->fsid = fsid; - m->epoch = epoch; - m->num_mon = num_mon; - for (i = 0; i < num_mon; ++i) { - struct ceph_entity_inst *inst = &m->mon_inst[i]; - - /* copy name portion */ - ceph_decode_copy_safe(&p, end, &inst->name, - sizeof(inst->name), bad); - err = ceph_decode_entity_addr(&p, end, &inst->addr); - if (err) - goto bad; + goto e_inval; + + monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO); + if (!monmap) { + ret = -ENOMEM; + goto fail; } - dout("monmap_decode epoch %d, num_mon %d\n", m->epoch, - m->num_mon); - for (i = 0; i < m->num_mon; i++) - dout("monmap_decode mon%d is %s\n", i, - ceph_pr_addr(&m->mon_inst[i].addr)); - return m; -bad: - dout("monmap_decode failed with %d\n", err); - kfree(m); - return ERR_PTR(err); + monmap->fsid = fsid; + monmap->epoch = epoch; + monmap->num_mon = num_mon; + + /* legacy_mon_addr map or mon_info map */ + for (i = 0; i < num_mon; i++) { + struct ceph_entity_inst *inst = &monmap->mon_inst[i]; + + ceph_decode_skip_string(p, end, e_inval); /* skip mon name */ + inst->name.type = CEPH_ENTITY_TYPE_MON; + inst->name.num = cpu_to_le64(i); + + if (struct_v >= 6) + ret = decode_mon_info(p, end, msgr2, &inst->addr); + else + ret = ceph_decode_entity_addr(p, end, &inst->addr); + if (ret) + goto fail; + + dout("%s mon%d addr %s\n", __func__, i, + ceph_pr_addr(&inst->addr)); + } + + return monmap; + +e_inval: + ret = -EINVAL; +fail: + kfree(monmap); + return ERR_PTR(ret); } /* @@ -476,7 +541,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, p = msg->front.iov_base; end = p + msg->front.iov_len; - monmap = ceph_monmap_decode(p, end); + monmap = ceph_monmap_decode(&p, end, false); if (IS_ERR(monmap)) { pr_err("problem decoding monmap, %d\n", (int)PTR_ERR(monmap)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 8966eae543d3..51be5a7482fc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3918,9 +3918,9 @@ static int handle_one_map(struct ceph_osd_client *osdc, set_pool_was_full(osdc); if (incremental) - newmap = osdmap_apply_incremental(&p, end, osdc->osdmap); + newmap = osdmap_apply_incremental(&p, end, false, osdc->osdmap); else - newmap = ceph_osdmap_decode(&p, end); + newmap = ceph_osdmap_decode(&p, end, false); if (IS_ERR(newmap)) return PTR_ERR(newmap); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index fa08c15be0c0..2b1dd252f231 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end, /* * decode a full map. */ -static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) +static int osdmap_decode(void **p, void *end, bool msgr2, + struct ceph_osdmap *map) { u8 struct_v; u32 epoch = 0; @@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) goto e_inval; for (i = 0; i < map->max_osd; i++) { - err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]); + struct ceph_entity_addr *addr = &map->osd_addr[i]; + + if (struct_v >= 8) + err = ceph_decode_entity_addrvec(p, end, msgr2, addr); + else + err = ceph_decode_entity_addr(p, end, addr); if (err) goto bad; + + dout("%s osd%d addr %s\n", __func__, i, ceph_pr_addr(addr)); } /* pg_temp */ @@ -1790,7 +1798,7 @@ bad: /* * Allocate and decode a full map. */ -struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) +struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2) { struct ceph_osdmap *map; int ret; @@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) if (!map) return ERR_PTR(-ENOMEM); - ret = osdmap_decode(p, end, map); + ret = osdmap_decode(p, end, msgr2, map); if (ret) { ceph_osdmap_destroy(map); return ERR_PTR(ret); @@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) * new_state: { osd=6, xorstate=EXISTS } # clear osd_state */ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, - struct ceph_osdmap *map) + bool msgr2, struct ceph_osdmap *map) { void *new_up_client; void *new_state; void *new_weight_end; u32 len; + int ret; int i; new_up_client = *p; @@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, struct ceph_entity_addr addr; ceph_decode_skip_32(p, end, e_inval); - if (ceph_decode_entity_addr(p, end, &addr)) - goto e_inval; + if (struct_v >= 7) + ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr); + else + ret = ceph_decode_entity_addr(p, end, &addr); + if (ret) + return ret; } new_state = *p; @@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, while (len--) { s32 osd; u32 xorstate; - int ret; osd = ceph_decode_32(p); if (struct_v >= 5) @@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, osd = ceph_decode_32(p); BUG_ON(osd >= map->max_osd); - if (ceph_decode_entity_addr(p, end, &addr)) - goto e_inval; + if (struct_v >= 7) + ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr); + else + ret = ceph_decode_entity_addr(p, end, &addr); + if (ret) + return ret; + + dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr)); + pr_info("osd%d up\n", osd); map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; map->osd_addr[osd] = addr; @@ -1927,7 +1946,7 @@ e_inval: /* * decode and apply an incremental map update. */ -struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, +struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2, struct ceph_osdmap *map) { struct ceph_fsid fsid; @@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (len > 0) { dout("apply_incremental full map len %d, %p to %p\n", len, *p, end); - return ceph_osdmap_decode(p, min(*p+len, end)); + return ceph_osdmap_decode(p, min(*p+len, end), msgr2); } /* new crush? */ @@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_up_client, new_state, new_weight */ - err = decode_new_up_state_weight(p, end, struct_v, map); + err = decode_new_up_state_weight(p, end, struct_v, msgr2, map); if (err) goto bad; |