From 3469ac1aa3a2f1e2586a412923c414779a0af854 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 May 2012 15:33:36 -0700 Subject: ceph: drop support for preferred_osd pgs This was an ill-conceived feature that has been removed from Ceph. Do this gracefully: - reject attempts to specify a preferred_osd via the ioctl - stop exposing this information via virtual xattrs - always fill in -1 for requests, in case we talk to an older server - don't calculate preferred_osd placements/pgids Reviewed-by: Alex Elder Signed-off-by: Sage Weil --- net/ceph/osdmap.c | 47 ++++++++++------------------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) (limited to 'net/ceph/osdmap.c') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 29ad46ec9dcf..7d39f3cb4947 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1000,7 +1000,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, { unsigned num, num_mask; struct ceph_pg pgid; - s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); int poolid = le32_to_cpu(fl->fl_pg_pool); struct ceph_pg_pool_info *pool; unsigned ps; @@ -1011,23 +1010,13 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, if (!pool) return -EIO; ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); - if (preferred >= 0) { - ps += preferred; - num = le32_to_cpu(pool->v.lpg_num); - num_mask = pool->lpg_num_mask; - } else { - num = le32_to_cpu(pool->v.pg_num); - num_mask = pool->pg_num_mask; - } + num = le32_to_cpu(pool->v.pg_num); + num_mask = pool->pg_num_mask; pgid.ps = cpu_to_le16(ps); - pgid.preferred = cpu_to_le16(preferred); + pgid.preferred = cpu_to_le16(-1); pgid.pool = fl->fl_pg_pool; - if (preferred >= 0) - dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps, - (int)preferred); - else - dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); + dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); ol->ol_pgid = pgid; ol->ol_stripe_unit = fl->fl_object_stripe_unit; @@ -1046,23 +1035,17 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_pool_info *pool; int ruleno; unsigned poolid, ps, pps, t; - int preferred; poolid = le32_to_cpu(pgid.pool); ps = le16_to_cpu(pgid.ps); - preferred = (s16)le16_to_cpu(pgid.preferred); pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); if (!pool) return NULL; /* pg_temp? */ - if (preferred >= 0) - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num), - pool->lpgp_num_mask); - else - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), - pool->pgp_num_mask); + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), + pool->pgp_num_mask); pgid.ps = cpu_to_le16(t); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { @@ -1080,23 +1063,13 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, return NULL; } - /* don't forcefeed bad device ids to crush */ - if (preferred >= osdmap->max_osd || - preferred >= osdmap->crush->max_devices) - preferred = -1; - - if (preferred >= 0) - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.lpgp_num), - pool->lpgp_num_mask); - else - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.pgp_num), - pool->pgp_num_mask); + pps = ceph_stable_mod(ps, + le32_to_cpu(pool->v.pgp_num), + pool->pgp_num_mask); pps += poolid; *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, min_t(int, pool->v.size, *num), - preferred, osdmap->osd_weight); + -1, osdmap->osd_weight); return osds; } -- cgit v1.2.3 From 41ebcc0907c58f75d0b25afcaf8b9c35c6b1ad14 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 May 2012 15:39:29 -0700 Subject: crush: remove forcefeed functionality Remove forcefeed functionality from CRUSH. This is an ugly misfeature that is mostly useless and unused. Remove it. Reflects ceph.git commit ed974b5000f2851207d860a651809af4a1867942. Reviewed-by: Alex Elder Signed-off-by: Sage Weil Conflicts: net/ceph/crush/mapper.c --- include/linux/crush/mapper.h | 1 - net/ceph/crush/mapper.c | 48 +------------------------------------------- net/ceph/osdmap.c | 2 +- 3 files changed, 2 insertions(+), 49 deletions(-) (limited to 'net/ceph/osdmap.c') diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 9322ab8bccd8..71d79f44a7d0 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -14,7 +14,6 @@ extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, i extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, - int forcefeed, /* -1 for none */ const __u32 *weights); #endif diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index fba9460fe572..11cf352201ba 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -463,15 +463,12 @@ reject: * @x: hash input * @result: pointer to result vector * @result_max: maximum result size - * @force: force initial replica choice; -1 for none */ int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, - int force, const __u32 *weight) + const __u32 *weight) { int result_len; - int force_context[CRUSH_MAX_DEPTH]; - int force_pos = -1; int a[CRUSH_MAX_SET]; int b[CRUSH_MAX_SET]; int c[CRUSH_MAX_SET]; @@ -497,27 +494,6 @@ int crush_do_rule(const struct crush_map *map, w = a; o = b; - /* - * determine hierarchical context of force, if any. note - * that this may or may not correspond to the specific types - * referenced by the crush rule. it will also only affect - * the first descent (TAKE). - */ - if (force >= 0 && - force < map->max_devices && - map->device_parents[force] != 0 && - !is_out(map, weight, force, x)) { - while (1) { - force_context[++force_pos] = force; - if (force >= 0) - force = map->device_parents[force]; - else - force = map->bucket_parents[-1-force]; - if (force == 0) - break; - } - } - for (step = 0; step < rule->len; step++) { struct crush_rule_step *curstep = &rule->steps[step]; @@ -525,14 +501,6 @@ int crush_do_rule(const struct crush_map *map, switch (curstep->op) { case CRUSH_RULE_TAKE: w[0] = curstep->arg1; - - /* find position in force_context/hierarchy */ - while (force_pos >= 0 && force_context[force_pos] != w[0]) - force_pos--; - /* and move past it */ - if (force_pos >= 0) - force_pos--; - wsize = 1; break; @@ -567,20 +535,6 @@ int crush_do_rule(const struct crush_map *map, continue; } j = 0; - if (osize == 0 && force_pos >= 0) { - /* skip any intermediate types */ - while (force_pos && - force_context[force_pos] < 0 && - curstep->arg2 != - map->buckets[-1 - - force_context[force_pos]]->type) - force_pos--; - o[osize] = force_context[force_pos]; - if (recurse_to_leaf) - c[osize] = force_context[0]; - j++; - force_pos--; - } osize += crush_choose(map, map->buckets[-1-w[i]], weight, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 7d39f3cb4947..9dda36f7aa9d 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1069,7 +1069,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, pps += poolid; *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, min_t(int, pool->v.size, *num), - -1, osdmap->osd_weight); + osdmap->osd_weight); return osds; } -- cgit v1.2.3 From fc7c3ae5ab9246ad96aab4d0d57f67e9255cfb56 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 May 2012 15:36:35 -0700 Subject: crush: remove parent maps These were used for the ill-fated forcefeed feature. Remove them. Reflects ceph.git commit ebdf80edfecfbd5a842b71fbe5732857994380c1. Reviewed-by: Alex Elder Signed-off-by: Sage Weil --- include/linux/crush/crush.h | 11 ----------- net/ceph/crush/crush.c | 25 ------------------------- net/ceph/osdmap.c | 7 ------- 3 files changed, 43 deletions(-) (limited to 'net/ceph/osdmap.c') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 3f50369a50e8..158a4d25ca83 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -151,16 +151,6 @@ struct crush_map { struct crush_bucket **buckets; struct crush_rule **rules; - /* - * Parent pointers to identify the parent bucket a device or - * bucket in the hierarchy. If an item appears more than - * once, this is the _last_ time it appeared (where buckets - * are processed in bucket id order, from -1 on down to - * -max_buckets. - */ - __u32 *bucket_parents; - __u32 *device_parents; - __s32 max_buckets; __u32 max_rules; __s32 max_devices; @@ -169,7 +159,6 @@ struct crush_map { /* crush.c */ extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos); -extern void crush_calc_parents(struct crush_map *map); extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); extern void crush_destroy_bucket_list(struct crush_bucket_list *b); extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 8dd19a0deedc..2160791acf03 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -46,29 +46,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p) return 0; } -/** - * crush_calc_parents - Calculate parent vectors for the given crush map. - * @map: crush_map pointer - */ -void crush_calc_parents(struct crush_map *map) -{ - int i, b, c; - - for (b = 0; b < map->max_buckets; b++) { - if (map->buckets[b] == NULL) - continue; - for (i = 0; i < map->buckets[b]->size; i++) { - c = map->buckets[b]->items[i]; - BUG_ON(c >= map->max_devices || - c < -map->max_buckets); - if (c >= 0) - map->device_parents[c] = map->buckets[b]->id; - else - map->bucket_parents[-1-c] = map->buckets[b]->id; - } - } -} - void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) { kfree(b->h.perm); @@ -143,8 +120,6 @@ void crush_destroy(struct crush_map *map) kfree(map->rules); } - kfree(map->bucket_parents); - kfree(map->device_parents); kfree(map); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 9dda36f7aa9d..dac448ba68e4 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -161,13 +161,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end) c->max_rules = ceph_decode_32(p); c->max_devices = ceph_decode_32(p); - c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS); - if (c->device_parents == NULL) - goto badmem; - c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS); - if (c->bucket_parents == NULL) - goto badmem; - c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS); if (c->buckets == NULL) goto badmem; -- cgit v1.2.3 From 8b393269008411a612ca549b733b4296e819f2fb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 May 2012 15:37:23 -0700 Subject: crush: warn on do_rule failure If we get an error code from crush_do_rule(), print an error to the console. Reviewed-by: Alex Elder Signed-off-by: Sage Weil --- net/ceph/osdmap.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net/ceph/osdmap.c') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index dac448ba68e4..2592f3cca987 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1027,7 +1027,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned poolid, ps, pps, t; + unsigned poolid, ps, pps, t, r; poolid = le32_to_cpu(pgid.pool); ps = le16_to_cpu(pgid.ps); @@ -1060,9 +1060,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, le32_to_cpu(pool->v.pgp_num), pool->pgp_num_mask); pps += poolid; - *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, - min_t(int, pool->v.size, *num), - osdmap->osd_weight); + r = crush_do_rule(osdmap->crush, ruleno, pps, osds, + min_t(int, pool->v.size, *num), + osdmap->osd_weight); + if (r < 0) { + pr_err("error %d from crush rule: pool %d ruleset %d type %d" + " size %d\n", r, poolid, pool->v.crush_ruleset, + pool->v.type, pool->v.size); + return NULL; + } + *num = r; return osds; } -- cgit v1.2.3 From 6bd9adbdf9ca6a052b0b7455ac67b925eb38cfad Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 21 May 2012 09:45:23 -0700 Subject: libceph: fix pg_temp updates Usually, we are adding pg_temp entries or removing them. Occasionally they update. In that case, osdmap_apply_incremental() was failing because the rbtree entry already exists. Fix by removing the existing entry before inserting a new one. Fixes http://tracker.newdream.net/issues/2446 Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- net/ceph/osdmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ceph/osdmap.c') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 2592f3cca987..1892c523c43c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -883,8 +883,12 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, pglen = ceph_decode_32(p); if (pglen) { - /* insert */ ceph_decode_need(p, end, pglen*sizeof(u32), bad); + + /* removing existing (if any) */ + (void) __remove_pg_mapping(&map->pg_temp, pgid); + + /* insert */ pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); if (!pg) { err = -ENOMEM; -- cgit v1.2.3