diff options
Diffstat (limited to 'net/ceph/crush/mapper.c')
-rw-r--r-- | net/ceph/crush/mapper.c | 118 |
1 files changed, 113 insertions, 5 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a1ef53c04415..5b47736d27d9 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -20,7 +20,7 @@ #include <linux/crush/crush.h> #include <linux/crush/hash.h> -#include <linux/crush/mapper.h> +#include "crush_ln_table.h" /* * Implement the core CRUSH mapping algorithm. @@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, return bucket->h.items[high]; } +// compute 2^44*log2(input+1) +uint64_t crush_ln(unsigned xin) +{ + unsigned x=xin, x1; + int iexpon, index1, index2; + uint64_t RH, LH, LL, xl64, result; + + x++; + + // normalize input + iexpon = 15; + while(!(x&0x18000)) { x<<=1; iexpon--; } + + index1 = (x>>8)<<1; + // RH ~ 2^56/index1 + RH = __RH_LH_tbl[index1 - 256]; + // LH ~ 2^48 * log2(index1/256) + LH = __RH_LH_tbl[index1 + 1 - 256]; + + // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 + xl64 = (int64_t)x * RH; + xl64 >>= 48; + x1 = xl64; + + result = iexpon; + result <<= (12 + 32); + + index2 = x1 & 0xff; + // LL ~ 2^48*log2(1.0+index2/2^15) + LL = __LL_tbl[index2]; + + LH = LH + LL; + + LH >>= (48-12 - 32); + result += LH; + + return result; +} + + +/* + * straw2 + * + * for reference, see: + * + * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables + * + */ + +static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, + int x, int r) +{ + unsigned i, high = 0; + unsigned u; + unsigned w; + __s64 ln, draw, high_draw = 0; + + for (i = 0; i < bucket->h.size; i++) { + w = bucket->item_weights[i]; + if (w) { + u = crush_hash32_3(bucket->h.hash, x, + bucket->h.items[i], r); + u &= 0xffff; + + /* + * for some reason slightly less than 0x10000 produces + * a slightly more accurate distribution... probably a + * rounding effect. + * + * the natural log lookup table maps [0,0xffff] + * (corresponding to real numbers [1/0x10000, 1] to + * [0, 0xffffffffffff] (corresponding to real numbers + * [-11.090355,0]). + */ + ln = crush_ln(u) - 0x1000000000000ll; + + /* + * divide by 16.16 fixed-point weight. note + * that the ln value is negative, so a larger + * weight means a larger (less negative) value + * for draw. + */ + draw = div64_s64(ln, w); + } else { + draw = S64_MIN; + } + + if (i == 0 || draw > high_draw) { + high = i; + high_draw = draw; + } + } + return bucket->h.items[high]; +} + + static int crush_bucket_choose(struct crush_bucket *in, int x, int r) { dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); @@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) case CRUSH_BUCKET_STRAW: return bucket_straw_choose((struct crush_bucket_straw *)in, x, r); + case CRUSH_BUCKET_STRAW2: + return bucket_straw2_choose((struct crush_bucket_straw2 *)in, + x, r); default: dprintk("unknown bucket %d alg %d\n", in->id, in->alg); return in->items[0]; } } + /* * true if device is marked "out" (failed, fully offloaded) * of the cluster @@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map, * @type: the type of item to choose * @out: pointer to output vector * @outpos: our position in that vector + * @out_size: size of the out vector * @tries: number of attempts to make * @recurse_tries: number of attempts to have recursive chooseleaf make * @local_retries: localized retries @@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map, const __u32 *weight, int weight_max, int x, int numrep, int type, int *out, int outpos, + int out_size, unsigned int tries, unsigned int recurse_tries, unsigned int local_retries, @@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map, int item = 0; int itemtype; int collide, reject; + int count = out_size; dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", recurse_to_leaf ? "_LEAF" : "", @@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map, tries, recurse_tries, local_retries, local_fallback_retries, parent_r); - for (rep = outpos; rep < numrep; rep++) { + for (rep = outpos; rep < numrep && count > 0 ; rep++) { /* keep trying until we get a non-out, non-colliding item */ ftotal = 0; skip_rep = 0; @@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map, map->buckets[-1-item], weight, weight_max, x, outpos+1, 0, - out2, outpos, + out2, outpos, count, recurse_tries, 0, local_retries, local_fallback_retries, @@ -463,6 +566,7 @@ reject: dprintk("CHOOSE got %d\n", item); out[outpos] = item; outpos++; + count--; } dprintk("CHOOSE returns %d\n", outpos); @@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map, __u32 step; int i, j; int numrep; + int out_size; /* * the original choose_total_tries value was off by one (it * counted "retries" and not "tries"). add one. @@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map, x, numrep, curstep->arg2, o+osize, j, + result_max-osize, choose_tries, recurse_tries, choose_local_retries, @@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map, c+osize, 0); } else { + out_size = ((numrep < (result_max-osize)) ? + numrep : (result_max-osize)); crush_choose_indep( map, map->buckets[-1-w[i]], weight, weight_max, - x, numrep, numrep, + x, out_size, numrep, curstep->arg2, o+osize, j, choose_tries, @@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map, recurse_to_leaf, c+osize, 0); - osize += numrep; + osize += out_size; } } |