summaryrefslogtreecommitdiff
path: root/net/ceph/crush/mapper.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/crush/mapper.c')
-rw-r--r--net/ceph/crush/mapper.c118
1 files changed, 113 insertions, 5 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a1ef53c04415..5b47736d27d9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,7 +20,7 @@
#include <linux/crush/crush.h>
#include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "crush_ln_table.h"
/*
* Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
return bucket->h.items[high];
}
+// compute 2^44*log2(input+1)
+uint64_t crush_ln(unsigned xin)
+{
+ unsigned x=xin, x1;
+ int iexpon, index1, index2;
+ uint64_t RH, LH, LL, xl64, result;
+
+ x++;
+
+ // normalize input
+ iexpon = 15;
+ while(!(x&0x18000)) { x<<=1; iexpon--; }
+
+ index1 = (x>>8)<<1;
+ // RH ~ 2^56/index1
+ RH = __RH_LH_tbl[index1 - 256];
+ // LH ~ 2^48 * log2(index1/256)
+ LH = __RH_LH_tbl[index1 + 1 - 256];
+
+ // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
+ xl64 = (int64_t)x * RH;
+ xl64 >>= 48;
+ x1 = xl64;
+
+ result = iexpon;
+ result <<= (12 + 32);
+
+ index2 = x1 & 0xff;
+ // LL ~ 2^48*log2(1.0+index2/2^15)
+ LL = __LL_tbl[index2];
+
+ LH = LH + LL;
+
+ LH >>= (48-12 - 32);
+ result += LH;
+
+ return result;
+}
+
+
+/*
+ * straw2
+ *
+ * for reference, see:
+ *
+ * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ *
+ */
+
+static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+ int x, int r)
+{
+ unsigned i, high = 0;
+ unsigned u;
+ unsigned w;
+ __s64 ln, draw, high_draw = 0;
+
+ for (i = 0; i < bucket->h.size; i++) {
+ w = bucket->item_weights[i];
+ if (w) {
+ u = crush_hash32_3(bucket->h.hash, x,
+ bucket->h.items[i], r);
+ u &= 0xffff;
+
+ /*
+ * for some reason slightly less than 0x10000 produces
+ * a slightly more accurate distribution... probably a
+ * rounding effect.
+ *
+ * the natural log lookup table maps [0,0xffff]
+ * (corresponding to real numbers [1/0x10000, 1] to
+ * [0, 0xffffffffffff] (corresponding to real numbers
+ * [-11.090355,0]).
+ */
+ ln = crush_ln(u) - 0x1000000000000ll;
+
+ /*
+ * divide by 16.16 fixed-point weight. note
+ * that the ln value is negative, so a larger
+ * weight means a larger (less negative) value
+ * for draw.
+ */
+ draw = div64_s64(ln, w);
+ } else {
+ draw = S64_MIN;
+ }
+
+ if (i == 0 || draw > high_draw) {
+ high = i;
+ high_draw = draw;
+ }
+ }
+ return bucket->h.items[high];
+}
+
+
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
+ case CRUSH_BUCKET_STRAW2:
+ return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
+ x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
+
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
+ * @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
+ int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
- for (rep = outpos; rep < numrep; rep++) {
+ for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
- out2, outpos,
+ out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+ count--;
}
dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
+ int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
c+osize,
0);
} else {
+ out_size = ((numrep < (result_max-osize)) ?
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
- x, numrep, numrep,
+ x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_to_leaf,
c+osize,
0);
- osize += numrep;
+ osize += out_size;
}
}