From 6d514b4e7737ad75a7e7e0a3f7dde45d46341691 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Mon, 23 Jun 2014 15:11:54 +0200 Subject: lib: crc32: Greatly shrink CRC combining code There's no need for a full 32x32 matrix, when rows before the last are just shifted copies of the rows after them. There's still room for improvement (especially on X86 processors with CRC32 and PCLMUL instructions), but this is a large step in the right direction [which is in particular useful for its current user, namely SCTP checksumming over multiple skb frags[] entries, i.e. in IPVS balancing when other CRC32 offloads are not available]. The internal primitive is now called crc32_generic_shift and takes one less argument; the XOR with crc2 is done in inline wrappers. Signed-off-by: George Spelvin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/crc32.c | 147 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 70 insertions(+), 77 deletions(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index 21a7b2135af6..9af30ff334c5 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -50,30 +50,6 @@ MODULE_AUTHOR("Matt Domsch "); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); -#define GF2_DIM 32 - -static u32 gf2_matrix_times(u32 *mat, u32 vec) -{ - u32 sum = 0; - - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - - return sum; -} - -static void gf2_matrix_square(u32 *square, u32 *mat) -{ - int i; - - for (i = 0; i < GF2_DIM; i++) - square[i] = gf2_matrix_times(mat, mat[i]); -} - #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ @@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) } #endif -/* For conditions of distribution and use, see copyright notice in zlib.h */ -static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2, - u32 polynomial) -{ - u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */ - u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */ - u32 row; - int i; - - if (len2 <= 0) - return crc1; - - /* Put operator for one zero bit in odd */ - odd[0] = polynomial; - row = 1; - for (i = 1; i < GF2_DIM; i++) { - odd[i] = row; - row <<= 1; - } - - gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */ - gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */ - - /* Apply len2 zeros to crc1 (first square will put the operator for one - * zero byte, eight zero bits, in even). - */ - do { - /* Apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - /* If no more bits set, then done */ - if (len2 == 0) - break; - /* Another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - } while (len2 != 0); - - crc1 ^= crc2; - return crc1; -} /** * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II @@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); } #endif -u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2) +EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(__crc32c_le); + +/* + * This multiplies the polynomials x and y modulo the given modulus. + * This follows the "little-endian" CRC convention that the lsbit + * represents the highest power of x, and the msbit represents x^0. + */ +static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus) { - return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE); + u32 product = x & 1 ? y : 0; + int i; + + for (i = 0; i < 31; i++) { + product = (product >> 1) ^ (product & 1 ? modulus : 0); + x >>= 1; + product ^= x & 1 ? y : 0; + } + + return product; } -u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +/** + * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time + * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient) + * @len: The number of bytes. @crc is multiplied by x^(8*@len) + * @polynomial: The modulus used to reduce the result to 32 bits. + * + * It's possible to parallelize CRC computations by computing a CRC + * over separate ranges of a buffer, then summing them. + * This shifts the given CRC by 8*len bits (i.e. produces the same effect + * as appending len bytes of zero to the data), in time proportional + * to log(len). + */ +static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, + u32 polynomial) { - return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE); + u32 power = polynomial; /* CRC of x^32 */ + int i; + + /* Shift up to 32 bits in the simple linear way */ + for (i = 0; i < 8 * (int)(len & 3); i++) + crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0); + + len >>= 2; + if (!len) + return crc; + + for (;;) { + /* "power" is x^(2^i), modulo the polynomial */ + if (len & 1) + crc = gf2_multiply(crc, power, polynomial); + + len >>= 1; + if (!len) + break; + + /* Square power, advancing to x^(2^(i+1)) */ + power = gf2_multiply(power, power, polynomial); + } + + return crc; } -EXPORT_SYMBOL(crc32_le); -EXPORT_SYMBOL(crc32_le_combine); -EXPORT_SYMBOL(__crc32c_le); -EXPORT_SYMBOL(__crc32c_le_combine); + +u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) +{ + return crc32_generic_shift(crc, len, CRCPOLY_LE); +} + +u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) +{ + return crc32_generic_shift(crc, len, CRC32C_POLY_LE); +} +EXPORT_SYMBOL(crc32_le_shift); +EXPORT_SYMBOL(__crc32c_le_shift); /** * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 -- cgit v1.2.3 From 4fa8e03b22df9b34f87906fa29de788bfa628bff Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Mon, 23 Jun 2014 15:11:55 +0200 Subject: lib: crc32: Mark test data __initconst So it gets discarded after the selftest. Signed-off-by: George Spelvin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/crc32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index 9af30ff334c5..af938ab12468 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -344,7 +344,7 @@ EXPORT_SYMBOL(crc32_be); #ifdef CONFIG_CRC32_SELFTEST /* 4096 random bytes */ -static u8 __attribute__((__aligned__(8))) test_buf[] = +static u8 const __aligned(8) test_buf[] __initconst = { 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, @@ -868,7 +868,7 @@ static struct crc_test { u32 crc_le; /* expected crc32_le result */ u32 crc_be; /* expected crc32_be result */ u32 crc32c_le; /* expected crc32c_le result */ -} test[] = +} const test[] __initconst = { {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c}, {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca}, -- cgit v1.2.3 From d8f1c4778e957273c3b5b6e045d8d3af38484ca8 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Mon, 23 Jun 2014 15:11:56 +0200 Subject: lib: crc32: Add some additional __pure annotations In case they help the compiler. Signed-off-by: George Spelvin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/crc32.h | 6 +++--- lib/crc32.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index edf34e876e40..9e8a032c1788 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -8,8 +8,8 @@ #include #include -extern u32 crc32_le(u32 crc, unsigned char const *p, size_t len); -extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); /** * crc32_le_combine - Combine two crc32 check values into one. For two @@ -36,7 +36,7 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) return crc32_le_shift(crc1, len2) ^ crc2; } -extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len); +u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); /** * __crc32c_le_combine - Combine two crc32c check values into one. For two diff --git a/lib/crc32.c b/lib/crc32.c index af938ab12468..9a907d489d95 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -53,7 +53,7 @@ MODULE_LICENSE("GPL"); #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ -static inline u32 +static inline u32 __pure crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { # ifdef __LITTLE_ENDIAN -- cgit v1.2.3 From a69f5edb8ba20c87c5f7c96ec40581f9f51f2910 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Jun 2014 11:20:48 -0700 Subject: mac_pton: Use bool not int return Use bool instead of int as the return type. All uses are tested with !. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/kernel.h | 2 +- lib/net_utils.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4c52907a6d8b..a9e2268ecccb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -501,7 +501,7 @@ static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) extern int hex_to_bin(char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); -int mac_pton(const char *s, u8 *mac); +bool mac_pton(const char *s, u8 *mac); /* * General tracing related utility functions - trace_printk(), diff --git a/lib/net_utils.c b/lib/net_utils.c index 2e3c52c8d050..148fc6e99ef6 100644 --- a/lib/net_utils.c +++ b/lib/net_utils.c @@ -3,24 +3,24 @@ #include #include -int mac_pton(const char *s, u8 *mac) +bool mac_pton(const char *s, u8 *mac) { int i; /* XX:XX:XX:XX:XX:XX */ if (strlen(s) < 3 * ETH_ALEN - 1) - return 0; + return false; /* Don't dirty result unless string is valid MAC. */ for (i = 0; i < ETH_ALEN; i++) { if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1])) - return 0; + return false; if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') - return 0; + return false; } for (i = 0; i < ETH_ALEN; i++) { mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]); } - return 1; + return true; } EXPORT_SYMBOL(mac_pton); -- cgit v1.2.3 From ccc7f4968a18b980994e622006b84e0195754390 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Thu, 17 Jul 2014 19:46:10 +0200 Subject: net: print net_device reg_state in netdev_* unless it's registered This way we'll always know in what status the device is, unless it's running normally (i.e. NETDEV_REGISTERED). Also, emit a warning once in case of a bad reg_state. CC: "David S. Miller" CC: Jason Baron CC: Eric Dumazet CC: Vlad Yasevich CC: stephen hemminger CC: Jerry Chu CC: Ben Hutchings CC: Joe Perches Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 +++++++++++++++++- lib/dynamic_debug.c | 8 +++++--- net/core/dev.c | 8 +++++--- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 70256aa2ae81..8e8fb3ed574b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3388,6 +3388,21 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } +static inline const char *netdev_reg_state(const struct net_device *dev) +{ + switch (dev->reg_state) { + case NETREG_UNINITIALIZED: return " (uninitialized)"; + case NETREG_REGISTERED: return ""; + case NETREG_UNREGISTERING: return " (unregistering)"; + case NETREG_UNREGISTERED: return " (unregistered)"; + case NETREG_RELEASED: return " (released)"; + case NETREG_DUMMY: return " (dummy)"; + } + + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + return " (unknown)"; +} + __printf(3, 4) int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); @@ -3444,7 +3459,8 @@ do { \ * file/line information and a backtrace. */ #define netdev_WARN(dev, format, args...) \ - WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args) + WARN(1, "netdevice: %s%s\n" format, netdev_name(dev), \ + netdev_reg_state(dev), ##args) /* netif printk helpers, similar to netdev_printk */ diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 7288e38e1757..c9afbe2c445a 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -614,13 +614,15 @@ int __dynamic_netdev_dbg(struct _ddebug *descriptor, char buf[PREFIX_SIZE]; res = dev_printk_emit(7, dev->dev.parent, - "%s%s %s %s: %pV", + "%s%s %s %s%s: %pV", dynamic_emit_prefix(descriptor, buf), dev_driver_string(dev->dev.parent), dev_name(dev->dev.parent), - netdev_name(dev), &vaf); + netdev_name(dev), netdev_reg_state(dev), + &vaf); } else if (dev) { - res = printk(KERN_DEBUG "%s: %pV", netdev_name(dev), &vaf); + res = printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev), + netdev_reg_state(dev), &vaf); } else { res = printk(KERN_DEBUG "(NULL net_device): %pV", &vaf); } diff --git a/net/core/dev.c b/net/core/dev.c index 239722af098d..81d61014fd9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6950,12 +6950,14 @@ static int __netdev_printk(const char *level, const struct net_device *dev, if (dev && dev->dev.parent) { r = dev_printk_emit(level[1] - '0', dev->dev.parent, - "%s %s %s: %pV", + "%s %s %s%s: %pV", dev_driver_string(dev->dev.parent), dev_name(dev->dev.parent), - netdev_name(dev), vaf); + netdev_name(dev), netdev_reg_state(dev), + vaf); } else if (dev) { - r = printk("%s%s: %pV", level, netdev_name(dev), vaf); + r = printk("%s%s%s: %pV", level, netdev_name(dev), + netdev_reg_state(dev), vaf); } else { r = printk("%s(NULL net_device): %pV", level, vaf); } -- cgit v1.2.3 From 2695fb552cbef1029aa025a98acb80cc51d66de5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 24 Jul 2014 16:38:21 -0700 Subject: net: filter: rename 'struct sock_filter_int' into 'struct bpf_insn' eBPF is used by socket filtering, seccomp and soon by tracing and exposed to userspace, therefore 'sock_filter_int' name is not accurate. Rename it to 'bpf_insn' Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 50 ++++++++++++++++++++++----------------------- kernel/bpf/core.c | 2 +- kernel/seccomp.c | 2 +- lib/test_bpf.c | 4 ++-- net/core/filter.c | 18 ++++++++-------- 6 files changed, 39 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 99bef86ed6df..71737a83f022 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -214,7 +214,7 @@ struct jit_context { static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { - struct sock_filter_int *insn = bpf_prog->insnsi; + struct bpf_insn *insn = bpf_prog->insnsi; int insn_cnt = bpf_prog->len; u8 temp[64]; int i; diff --git a/include/linux/filter.h b/include/linux/filter.h index c43c8258e682..20dd50ef7271 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -82,7 +82,7 @@ enum { /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ #define BPF_ALU64_REG(OP, DST, SRC) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -90,7 +90,7 @@ enum { .imm = 0 }) #define BPF_ALU32_REG(OP, DST, SRC) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -100,7 +100,7 @@ enum { /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ #define BPF_ALU64_IMM(OP, DST, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ @@ -108,7 +108,7 @@ enum { .imm = IMM }) #define BPF_ALU32_IMM(OP, DST, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ @@ -118,7 +118,7 @@ enum { /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ #define BPF_ENDIAN(TYPE, DST, LEN) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = 0, \ @@ -128,7 +128,7 @@ enum { /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -136,7 +136,7 @@ enum { .imm = 0 }) #define BPF_MOV32_REG(DST, SRC) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -146,7 +146,7 @@ enum { /* Short form of mov, dst_reg = imm32 */ #define BPF_MOV64_IMM(DST, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ @@ -154,7 +154,7 @@ enum { .imm = IMM }) #define BPF_MOV32_IMM(DST, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ @@ -164,7 +164,7 @@ enum { /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -172,7 +172,7 @@ enum { .imm = IMM }) #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -182,7 +182,7 @@ enum { /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ #define BPF_LD_ABS(SIZE, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ .dst_reg = 0, \ .src_reg = 0, \ @@ -192,7 +192,7 @@ enum { /* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ #define BPF_LD_IND(SIZE, SRC, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ .dst_reg = 0, \ .src_reg = SRC, \ @@ -202,7 +202,7 @@ enum { /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -212,7 +212,7 @@ enum { /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -222,7 +222,7 @@ enum { /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = 0, \ @@ -232,7 +232,7 @@ enum { /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ #define BPF_JMP_REG(OP, DST, SRC, OFF) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -242,7 +242,7 @@ enum { /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ @@ -252,7 +252,7 @@ enum { /* Function call */ #define BPF_EMIT_CALL(FUNC) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_JMP | BPF_CALL, \ .dst_reg = 0, \ .src_reg = 0, \ @@ -262,7 +262,7 @@ enum { /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = CODE, \ .dst_reg = DST, \ .src_reg = SRC, \ @@ -272,7 +272,7 @@ enum { /* Program exit */ #define BPF_EXIT_INSN() \ - ((struct sock_filter_int) { \ + ((struct bpf_insn) { \ .code = BPF_JMP | BPF_EXIT, \ .dst_reg = 0, \ .src_reg = 0, \ @@ -298,7 +298,7 @@ enum { /* Macro to invoke filter function. */ #define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) -struct sock_filter_int { +struct bpf_insn { __u8 code; /* opcode */ __u8 dst_reg:4; /* dest register */ __u8 src_reg:4; /* source register */ @@ -330,10 +330,10 @@ struct sk_filter { struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, - const struct sock_filter_int *filter); + const struct bpf_insn *filter); union { struct sock_filter insns[0]; - struct sock_filter_int insnsi[0]; + struct bpf_insn insnsi[0]; struct work_struct work; }; }; @@ -353,7 +353,7 @@ void sk_filter_select_runtime(struct sk_filter *fp); void sk_filter_free(struct sk_filter *fp); int sk_convert_filter(struct sock_filter *prog, int len, - struct sock_filter_int *new_prog, int *new_len); + struct bpf_insn *new_prog, int *new_len); int sk_unattached_filter_create(struct sk_filter **pfp, struct sock_fprog_kern *fprog); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 77a240a1ce11..265a02cc822d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -81,7 +81,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) * keep, 0 for none. @ctx is the data we are operating on, @insn is the * array of filter instructions. */ -static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) +static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 301bbc24739c..565743db5384 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -248,7 +248,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (ret) goto free_prog; - /* Convert 'sock_filter' insns to 'sock_filter_int' insns */ + /* Convert 'sock_filter' insns to 'bpf_insn' insns */ ret = sk_convert_filter(fp, fprog->len, NULL, &new_len); if (ret) goto free_prog; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index c579e0f58818..5f48623ee1a7 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -66,7 +66,7 @@ struct bpf_test { const char *descr; union { struct sock_filter insns[MAX_INSNS]; - struct sock_filter_int insns_int[MAX_INSNS]; + struct bpf_insn insns_int[MAX_INSNS]; } u; __u8 aux; __u8 data[MAX_DATA]; @@ -1807,7 +1807,7 @@ static struct sk_filter *generate_filter(int which, int *err) fp->len = flen; memcpy(fp->insnsi, tests[which].u.insns_int, - fp->len * sizeof(struct sock_filter_int)); + fp->len * sizeof(struct bpf_insn)); sk_filter_select_runtime(fp); break; diff --git a/net/core/filter.c b/net/core/filter.c index 1d0e9492e4fa..f3b2d5e9fe5f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -174,9 +174,9 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) } static bool convert_bpf_extensions(struct sock_filter *fp, - struct sock_filter_int **insnp) + struct bpf_insn **insnp) { - struct sock_filter_int *insn = *insnp; + struct bpf_insn *insn = *insnp; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: @@ -326,7 +326,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * * 2) 2nd pass to remap in two passes: 1st pass finds new * jump offsets, 2nd pass remapping: - * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); + * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); * sk_convert_filter(old_prog, old_len, new_prog, &new_len); * * User BPF's register A is mapped to our BPF register 6, user BPF @@ -336,10 +336,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * ctx == 'struct seccomp_data *'. */ int sk_convert_filter(struct sock_filter *prog, int len, - struct sock_filter_int *new_prog, int *new_len) + struct bpf_insn *new_prog, int *new_len) { int new_flen = 0, pass = 0, target, i; - struct sock_filter_int *new_insn; + struct bpf_insn *new_insn; struct sock_filter *fp; int *addrs = NULL; u8 bpf_src; @@ -365,8 +365,8 @@ do_pass: new_insn++; for (i = 0; i < len; fp++, i++) { - struct sock_filter_int tmp_insns[6] = { }; - struct sock_filter_int *insn = tmp_insns; + struct bpf_insn tmp_insns[6] = { }; + struct bpf_insn *insn = tmp_insns; if (addrs) addrs[i] = new_insn - new_prog; @@ -913,7 +913,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, * representation. */ BUILD_BUG_ON(sizeof(struct sock_filter) != - sizeof(struct sock_filter_int)); + sizeof(struct bpf_insn)); /* Conversion cannot happen on overlapping memory areas, * so we need to keep the user BPF around until the 2nd @@ -945,7 +945,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, fp->len = new_len; - /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ + /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len); if (err) /* 2nd sk_convert_filter() can fail only if it fails -- cgit v1.2.3 From 4ada97abe937cdb3fc029a871d5b0f21aa661a60 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 28 Jul 2014 14:01:38 +0200 Subject: random32: mix in entropy from core to late initcall Currently, we have a 3-stage seeding process in prandom(): Phase 1 is from the early actual initialization of prandom() subsystem which happens during core_initcall() and remains most likely until the beginning of late_initcall() phase. Here, the system might not have enough entropy available for seeding with strong randomness from the random driver. That means, we currently have a 32bit weak LCG() seeding the PRNG status register 1 and mixing that successively into the other 3 registers just to get it up and running. Phase 2 starts with late_initcall() phase resp. when the random driver has initialized its non-blocking pool with enough entropy. At that time, we throw away *all* inner state from its 4 registers and do a full reseed with strong randomness. Phase 3 starts right after that and does a periodic reseed with random slack of status register 1 by a strong random source again. A problem in phase 1 is that during bootup data structures can be initialized, e.g. on module load time, and thus access a weakly seeded prandom and are never changed for the rest of their live-time, thus carrying along the results from a week seed. Lets make sure that current but also future users access a possibly better early seeded prandom. This patch therefore improves phase 1 by trying to make it more 'unpredictable' through mixing in seed from a possible hardware source. Now, the mix-in xors inner state with the outcome of either of the two functions arch_get_random_{,seed}_int(), preferably arch_get_random_seed_int() as it likely represents a non-deterministic random bit generator in hw rather than a cryptographically secure PRNG in hw. However, not all might have the first one, so we use the PRNG as a fallback if available. As we xor the seed into the current state, the worst case would be that a hardware source could be unverifiable compromised or backdoored. In that case nevertheless it would be as good as our original early seeding function prandom_seed_very_weak() since we mix through xor which is entropy preserving. Joint work with Daniel Borkmann. Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- lib/random32.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/random32.c b/lib/random32.c index fa5da61ce7ad..c9b6bf3afe0c 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -40,6 +40,10 @@ #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); +#else +static inline void prandom_state_selftest(void) +{ +} #endif static DEFINE_PER_CPU(struct rnd_state, net_rand_state); @@ -53,8 +57,7 @@ static DEFINE_PER_CPU(struct rnd_state, net_rand_state); */ u32 prandom_u32_state(struct rnd_state *state) { -#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) - +#define TAUSWORTHE(s, a, b, c, d) ((s & c) << d) ^ (((s << a) ^ s) >> b) state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); @@ -147,21 +150,25 @@ static void prandom_warmup(struct rnd_state *state) prandom_u32_state(state); } -static void prandom_seed_very_weak(struct rnd_state *state, u32 seed) +static u32 __extract_hwseed(void) { - /* Note: This sort of seeding is ONLY used in test cases and - * during boot at the time from core_initcall until late_initcall - * as we don't have a stronger entropy source available yet. - * After late_initcall, we reseed entire state, we have to (!), - * otherwise an attacker just needs to search 32 bit space to - * probe for our internal 128 bit state if he knows a couple - * of prandom32 outputs! - */ -#define LCG(x) ((x) * 69069U) /* super-duper LCG */ - state->s1 = __seed(LCG(seed), 2U); - state->s2 = __seed(LCG(state->s1), 8U); - state->s3 = __seed(LCG(state->s2), 16U); - state->s4 = __seed(LCG(state->s3), 128U); + u32 val = 0; + + (void)(arch_get_random_seed_int(&val) || + arch_get_random_int(&val)); + + return val; +} + +static void prandom_seed_early(struct rnd_state *state, u32 seed, + bool mix_with_hwseed) +{ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) + state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); + state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); + state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); + state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); } /** @@ -194,14 +201,13 @@ static int __init prandom_init(void) { int i; -#ifdef CONFIG_RANDOM32_SELFTEST prandom_state_selftest(); -#endif for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); + u32 weak_seed = (i + jiffies) ^ random_get_entropy(); - prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy()); + prandom_seed_early(state, weak_seed, true); prandom_warmup(state); } @@ -210,6 +216,7 @@ static int __init prandom_init(void) core_initcall(prandom_init); static void __prandom_timer(unsigned long dontcare); + static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0); static void __prandom_timer(unsigned long dontcare) @@ -419,7 +426,7 @@ static void __init prandom_state_selftest(void) for (i = 0; i < ARRAY_SIZE(test1); i++) { struct rnd_state state; - prandom_seed_very_weak(&state, test1[i].seed); + prandom_seed_early(&state, test1[i].seed, false); prandom_warmup(&state); if (test1[i].result != prandom_u32_state(&state)) @@ -434,7 +441,7 @@ static void __init prandom_state_selftest(void) for (i = 0; i < ARRAY_SIZE(test2); i++) { struct rnd_state state; - prandom_seed_very_weak(&state, test2[i].seed); + prandom_seed_early(&state, test2[i].seed, false); prandom_warmup(&state); for (j = 0; j < test2[i].iteration - 1; j++) -- cgit v1.2.3 From 7ae457c1e5b45a1b826fad9d62b32191d2bdcfdb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jul 2014 20:34:16 -0700 Subject: net: filter: split 'struct sk_filter' into socket and bpf parts clean up names related to socket filtering and bpf in the following way: - everything that deals with sockets keeps 'sk_*' prefix - everything that is pure BPF is changed to 'bpf_*' prefix split 'struct sk_filter' into struct sk_filter { atomic_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; and struct bpf_prog { u32 jited:1, len:31; struct sock_fprog_kern *orig_prog; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; struct work_struct work; }; }; so that 'struct bpf_prog' can be used independent of sockets and cleans up 'unattached' bpf use cases split SK_RUN_FILTER macro into: SK_RUN_FILTER to be used with 'struct sk_filter *' and BPF_PROG_RUN to be used with 'struct bpf_prog *' __sk_filter_release(struct sk_filter *) gains __bpf_prog_release(struct bpf_prog *) helper function also perform related renames for the functions that work with 'struct bpf_prog *', since they're on the same lines: sk_filter_size -> bpf_prog_size sk_filter_select_runtime -> bpf_prog_select_runtime sk_filter_free -> bpf_prog_free sk_unattached_filter_create -> bpf_prog_create sk_unattached_filter_destroy -> bpf_prog_destroy sk_store_orig_filter -> bpf_prog_store_orig_filter sk_release_orig_filter -> bpf_release_orig_filter __sk_migrate_filter -> bpf_migrate_filter __sk_prepare_filter -> bpf_prepare_filter API for attaching classic BPF to a socket stays the same: sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *) and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program which is used by sockets, tun, af_packet API for 'unattached' BPF programs becomes: bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *) and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 10 ++-- arch/arm/net/bpf_jit_32.c | 8 +-- arch/mips/net/bpf_jit.c | 8 +-- arch/powerpc/net/bpf_jit_comp.c | 8 +-- arch/s390/net/bpf_jit_comp.c | 4 +- arch/sparc/net/bpf_jit_comp.c | 4 +- arch/x86/net/bpf_jit_comp.c | 12 ++--- drivers/isdn/i4l/isdn_ppp.c | 26 +++++---- drivers/net/ppp/ppp_generic.c | 28 +++++----- drivers/net/team/team_mode_loadbalance.c | 14 ++--- include/linux/filter.h | 40 ++++++++------ include/linux/isdn_ppp.h | 4 +- include/uapi/linux/netfilter/xt_bpf.h | 4 +- kernel/bpf/core.c | 30 +++++------ kernel/seccomp.c | 10 ++-- lib/test_bpf.c | 24 ++++----- net/core/filter.c | 92 ++++++++++++++++++-------------- net/core/ptp_classifier.c | 6 +-- net/core/sock_diag.c | 2 +- net/netfilter/xt_bpf.c | 6 +-- net/sched/cls_bpf.c | 12 ++--- 21 files changed, 183 insertions(+), 169 deletions(-) (limited to 'lib') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 712068be8171..c48a9704bda8 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -586,11 +586,11 @@ team driver's classifier for its load-balancing mode, netfilter's xt_bpf extension, PTP dissector/classifier, and much more. They are all internally converted by the kernel into the new instruction set representation and run in the eBPF interpreter. For in-kernel handlers, this all works transparently -by using sk_unattached_filter_create() for setting up the filter, resp. -sk_unattached_filter_destroy() for destroying it. The macro -SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed -code to run the filter. 'filter' is a pointer to struct sk_filter that we -got from sk_unattached_filter_create(), and 'ctx' the given context (e.g. +by using bpf_prog_create() for setting up the filter, resp. +bpf_prog_destroy() for destroying it. The macro +BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed +code to run the filter. 'filter' is a pointer to struct bpf_prog that we +got from bpf_prog_create(), and 'ctx' the given context (e.g. skb pointer). All constraints and restrictions from bpf_check_classic() apply before a conversion to the new layout is being done behind the scenes! diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index fb5503ce016f..a37b989a2f91 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -56,7 +56,7 @@ #define FLAG_NEED_X_RESET (1 << 0) struct jit_ctx { - const struct sk_filter *skf; + const struct bpf_prog *skf; unsigned idx; unsigned prologue_bytes; int ret0_fp_idx; @@ -465,7 +465,7 @@ static inline void update_on_xread(struct jit_ctx *ctx) static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; - const struct sk_filter *prog = ctx->skf; + const struct bpf_prog *prog = ctx->skf; const struct sock_filter *inst; unsigned i, load_order, off, condt; int imm12; @@ -857,7 +857,7 @@ b_epilogue: } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; unsigned tmp_idx; @@ -926,7 +926,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index b87390a56a2f..05a56619ece2 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -131,7 +131,7 @@ * @target: Memory location for the compiled filter */ struct jit_ctx { - const struct sk_filter *skf; + const struct bpf_prog *skf; unsigned int prologue_bytes; u32 idx; u32 flags; @@ -789,7 +789,7 @@ static int pkt_type_offset(void) static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; - const struct sk_filter *prog = ctx->skf; + const struct bpf_prog *prog = ctx->skf; const struct sock_filter *inst; unsigned int i, off, load_order, condt; u32 k, b_off __maybe_unused; @@ -1369,7 +1369,7 @@ jmp_cmp: int bpf_jit_enable __read_mostly; -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; unsigned int alloc_size, tmp_idx; @@ -1423,7 +1423,7 @@ out: kfree(ctx.offsets); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 82e82cadcde5..3afa6f4c1957 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, +static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx) { int i; @@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) /* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, +static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, unsigned int *addrs) { @@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, return 0; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { unsigned int proglen; unsigned int alloclen; @@ -693,7 +693,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index a2cbd875543a..61e45b7c04d7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, return header; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct bpf_binary_header *header = NULL; unsigned long size, prg_len, lit_len; @@ -875,7 +875,7 @@ out: kfree(addrs); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 892a102671ad..1f76c22a6a75 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -354,7 +354,7 @@ do { *prog++ = BR_OPC | WDISP22(OFF); \ * emit_jump() calls with adjusted offsets. */ -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { unsigned int cleanup_addr, proglen, oldproglen = 0; u32 temp[8], *prog, *func, seen = 0, pass; @@ -808,7 +808,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e2ecc1380b3d..5c8cb8043c5a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -211,7 +211,7 @@ struct jit_context { bool seen_ld_abs; }; -static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, +static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { struct bpf_insn *insn = bpf_prog->insnsi; @@ -841,7 +841,7 @@ common_load: ctx->seen_ld_abs = true; /* By design x64 JIT should support all BPF instructions * This error will be seen if new instruction was added * to interpreter, but not to JIT - * or if there is junk in sk_filter + * or if there is junk in bpf_prog */ pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; @@ -862,11 +862,11 @@ common_load: ctx->seen_ld_abs = true; return proglen; } -void bpf_jit_compile(struct sk_filter *prog) +void bpf_jit_compile(struct bpf_prog *prog) { } -void bpf_int_jit_compile(struct sk_filter *prog) +void bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; int proglen, oldproglen = 0; @@ -932,7 +932,7 @@ out: static void bpf_jit_free_deferred(struct work_struct *work) { - struct sk_filter *fp = container_of(work, struct sk_filter, work); + struct bpf_prog *fp = container_of(work, struct bpf_prog, work); unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; @@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work) kfree(fp); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) { INIT_WORK(&fp->work, bpf_jit_free_deferred); diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 62f0688d45a5..c4198fa490bf 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -379,12 +379,12 @@ isdn_ppp_release(int min, struct file *file) #endif #ifdef CONFIG_IPPP_FILTER if (is->pass_filter) { - sk_unattached_filter_destroy(is->pass_filter); + bpf_prog_destroy(is->pass_filter); is->pass_filter = NULL; } if (is->active_filter) { - sk_unattached_filter_destroy(is->active_filter); + bpf_prog_destroy(is->active_filter); is->active_filter = NULL; } #endif @@ -639,12 +639,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) fprog.filter = code; if (is->pass_filter) { - sk_unattached_filter_destroy(is->pass_filter); + bpf_prog_destroy(is->pass_filter); is->pass_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&is->pass_filter, - &fprog); + err = bpf_prog_create(&is->pass_filter, &fprog); else err = 0; kfree(code); @@ -664,12 +663,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) fprog.filter = code; if (is->active_filter) { - sk_unattached_filter_destroy(is->active_filter); + bpf_prog_destroy(is->active_filter); is->active_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&is->active_filter, - &fprog); + err = bpf_prog_create(&is->active_filter, &fprog); else err = 0; kfree(code); @@ -1174,14 +1172,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff * } if (is->pass_filter - && SK_RUN_FILTER(is->pass_filter, skb) == 0) { + && BPF_PROG_RUN(is->pass_filter, skb) == 0) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); kfree_skb(skb); return; } if (!(is->active_filter - && SK_RUN_FILTER(is->active_filter, skb) == 0)) { + && BPF_PROG_RUN(is->active_filter, skb) == 0)) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); lp->huptimer = 0; @@ -1320,14 +1318,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) } if (ipt->pass_filter - && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) { + && BPF_PROG_RUN(ipt->pass_filter, skb) == 0) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); kfree_skb(skb); goto unlock; } if (!(ipt->active_filter - && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) { + && BPF_PROG_RUN(ipt->active_filter, skb) == 0)) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); lp->huptimer = 0; @@ -1517,9 +1515,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp) } drop |= is->pass_filter - && SK_RUN_FILTER(is->pass_filter, skb) == 0; + && BPF_PROG_RUN(is->pass_filter, skb) == 0; drop |= is->active_filter - && SK_RUN_FILTER(is->active_filter, skb) == 0; + && BPF_PROG_RUN(is->active_filter, skb) == 0; skb_push(skb, IPPP_MAX_HEADER - 4); return drop; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 765248b42a0a..fa0d71727894 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -143,8 +143,8 @@ struct ppp { struct sk_buff_head mrq; /* MP: receive reconstruction queue */ #endif /* CONFIG_PPP_MULTILINK */ #ifdef CONFIG_PPP_FILTER - struct sk_filter *pass_filter; /* filter for packets to pass */ - struct sk_filter *active_filter;/* filter for pkts to reset idle */ + struct bpf_prog *pass_filter; /* filter for packets to pass */ + struct bpf_prog *active_filter; /* filter for pkts to reset idle */ #endif /* CONFIG_PPP_FILTER */ struct net *ppp_net; /* the net we belong to */ struct ppp_link_stats stats64; /* 64 bit network stats */ @@ -762,12 +762,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ppp_lock(ppp); if (ppp->pass_filter) { - sk_unattached_filter_destroy(ppp->pass_filter); + bpf_prog_destroy(ppp->pass_filter); ppp->pass_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&ppp->pass_filter, - &fprog); + err = bpf_prog_create(&ppp->pass_filter, + &fprog); else err = 0; kfree(code); @@ -788,12 +788,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ppp_lock(ppp); if (ppp->active_filter) { - sk_unattached_filter_destroy(ppp->active_filter); + bpf_prog_destroy(ppp->active_filter); ppp->active_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&ppp->active_filter, - &fprog); + err = bpf_prog_create(&ppp->active_filter, + &fprog); else err = 0; kfree(code); @@ -1205,7 +1205,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) a four-byte PPP header on each packet */ *skb_push(skb, 2) = 1; if (ppp->pass_filter && - SK_RUN_FILTER(ppp->pass_filter, skb) == 0) { + BPF_PROG_RUN(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) netdev_printk(KERN_DEBUG, ppp->dev, "PPP: outbound frame " @@ -1215,7 +1215,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } /* if this packet passes the active filter, record the time */ if (!(ppp->active_filter && - SK_RUN_FILTER(ppp->active_filter, skb) == 0)) + BPF_PROG_RUN(ppp->active_filter, skb) == 0)) ppp->last_xmit = jiffies; skb_pull(skb, 2); #else @@ -1839,7 +1839,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) *skb_push(skb, 2) = 0; if (ppp->pass_filter && - SK_RUN_FILTER(ppp->pass_filter, skb) == 0) { + BPF_PROG_RUN(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) netdev_printk(KERN_DEBUG, ppp->dev, "PPP: inbound frame " @@ -1848,7 +1848,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) return; } if (!(ppp->active_filter && - SK_RUN_FILTER(ppp->active_filter, skb) == 0)) + BPF_PROG_RUN(ppp->active_filter, skb) == 0)) ppp->last_recv = jiffies; __skb_pull(skb, 2); } else @@ -2829,12 +2829,12 @@ static void ppp_destroy_interface(struct ppp *ppp) #endif /* CONFIG_PPP_MULTILINK */ #ifdef CONFIG_PPP_FILTER if (ppp->pass_filter) { - sk_unattached_filter_destroy(ppp->pass_filter); + bpf_prog_destroy(ppp->pass_filter); ppp->pass_filter = NULL; } if (ppp->active_filter) { - sk_unattached_filter_destroy(ppp->active_filter); + bpf_prog_destroy(ppp->active_filter); ppp->active_filter = NULL; } #endif /* CONFIG_PPP_FILTER */ diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index d7be9b36bce6..a1536d0d83a9 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -58,7 +58,7 @@ struct lb_priv_ex { }; struct lb_priv { - struct sk_filter __rcu *fp; + struct bpf_prog __rcu *fp; lb_select_tx_port_func_t __rcu *select_tx_port_func; struct lb_pcpu_stats __percpu *pcpu_stats; struct lb_priv_ex *ex; /* priv extension */ @@ -174,14 +174,14 @@ static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, struct sk_buff *skb) { - struct sk_filter *fp; + struct bpf_prog *fp; uint32_t lhash; unsigned char *c; fp = rcu_dereference_bh(lb_priv->fp); if (unlikely(!fp)) return 0; - lhash = SK_RUN_FILTER(fp, skb); + lhash = BPF_PROG_RUN(fp, skb); c = (char *) &lhash; return c[0] ^ c[1] ^ c[2] ^ c[3]; } @@ -271,8 +271,8 @@ static void __fprog_destroy(struct sock_fprog_kern *fprog) static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) { struct lb_priv *lb_priv = get_lb_priv(team); - struct sk_filter *fp = NULL; - struct sk_filter *orig_fp = NULL; + struct bpf_prog *fp = NULL; + struct bpf_prog *orig_fp = NULL; struct sock_fprog_kern *fprog = NULL; int err; @@ -281,7 +281,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) ctx->data.bin_val.ptr); if (err) return err; - err = sk_unattached_filter_create(&fp, fprog); + err = bpf_prog_create(&fp, fprog); if (err) { __fprog_destroy(fprog); return err; @@ -300,7 +300,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) if (orig_fp) { synchronize_rcu(); - sk_unattached_filter_destroy(orig_fp); + bpf_prog_destroy(orig_fp); } return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index 7cb9b40e9a2f..a5227ab8ccb1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -296,7 +296,8 @@ enum { }) /* Macro to invoke filter function. */ -#define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#define SK_RUN_FILTER(filter, ctx) \ + (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) struct bpf_insn { __u8 code; /* opcode */ @@ -323,12 +324,10 @@ struct sk_buff; struct sock; struct seccomp_data; -struct sk_filter { - atomic_t refcnt; +struct bpf_prog { u32 jited:1, /* Is our filter JIT'ed? */ len:31; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ - struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { @@ -338,25 +337,32 @@ struct sk_filter { }; }; -static inline unsigned int sk_filter_size(unsigned int proglen) +struct sk_filter { + atomic_t refcnt; + struct rcu_head rcu; + struct bpf_prog *prog; +}; + +#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) + +static inline unsigned int bpf_prog_size(unsigned int proglen) { - return max(sizeof(struct sk_filter), - offsetof(struct sk_filter, insns[proglen])); + return max(sizeof(struct bpf_prog), + offsetof(struct bpf_prog, insns[proglen])); } #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) int sk_filter(struct sock *sk, struct sk_buff *skb); -void sk_filter_select_runtime(struct sk_filter *fp); -void sk_filter_free(struct sk_filter *fp); +void bpf_prog_select_runtime(struct bpf_prog *fp); +void bpf_prog_free(struct bpf_prog *fp); int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len); -int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog_kern *fprog); -void sk_unattached_filter_destroy(struct sk_filter *fp); +int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_detach_filter(struct sock *sk); @@ -369,7 +375,7 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_int_jit_compile(struct sk_filter *fp); +void bpf_int_jit_compile(struct bpf_prog *fp); #define BPF_ANC BIT(15) @@ -423,8 +429,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, #include #include -void bpf_jit_compile(struct sk_filter *fp); -void bpf_jit_free(struct sk_filter *fp); +void bpf_jit_compile(struct bpf_prog *fp); +void bpf_jit_free(struct bpf_prog *fp); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) @@ -438,11 +444,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, #else #include -static inline void bpf_jit_compile(struct sk_filter *fp) +static inline void bpf_jit_compile(struct bpf_prog *fp) { } -static inline void bpf_jit_free(struct sk_filter *fp) +static inline void bpf_jit_free(struct bpf_prog *fp) { kfree(fp); } diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index 8e10f57f109f..a0070c6dfaf8 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -180,8 +180,8 @@ struct ippp_struct { struct slcompress *slcomp; #endif #ifdef CONFIG_IPPP_FILTER - struct sk_filter *pass_filter; /* filter for packets to pass */ - struct sk_filter *active_filter; /* filter for pkts to reset idle */ + struct bpf_prog *pass_filter; /* filter for packets to pass */ + struct bpf_prog *active_filter; /* filter for pkts to reset idle */ #endif unsigned long debug; struct isdn_ppp_compressor *compressor,*decompressor; diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 2ec9fbcd06f9..1fad2c27ac32 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -6,14 +6,14 @@ #define XT_BPF_MAX_NUM_INSTR 64 -struct sk_filter; +struct bpf_prog; struct xt_bpf_info { __u16 bpf_program_num_elem; struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; /* only used in the kernel */ - struct sk_filter *filter __attribute__((aligned(8))); + struct bpf_prog *filter __attribute__((aligned(8))); }; #endif /*_XT_BPF_H */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 188ac5ba3900..7f0dbcbb34af 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -73,15 +73,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) } /** - * __sk_run_filter - run a filter on a given context - * @ctx: buffer to run the filter on - * @insn: filter to apply + * __bpf_prog_run - run eBPF program on a given context + * @ctx: is the data we are operating on + * @insn: is the array of eBPF instructions * - * Decode and apply filter instructions to the skb->data. Return length to - * keep, 0 for none. @ctx is the data we are operating on, @insn is the - * array of filter instructions. + * Decode and execute eBPF instructions. */ -static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn) +static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; @@ -508,29 +506,29 @@ load_byte: return 0; } -void __weak bpf_int_jit_compile(struct sk_filter *prog) +void __weak bpf_int_jit_compile(struct bpf_prog *prog) { } /** - * sk_filter_select_runtime - select execution runtime for BPF program - * @fp: sk_filter populated with internal BPF program + * bpf_prog_select_runtime - select execution runtime for BPF program + * @fp: bpf_prog populated with internal BPF program * * try to JIT internal BPF program, if JIT is not available select interpreter - * BPF program will be executed via SK_RUN_FILTER() macro + * BPF program will be executed via BPF_PROG_RUN() macro */ -void sk_filter_select_runtime(struct sk_filter *fp) +void bpf_prog_select_runtime(struct bpf_prog *fp) { - fp->bpf_func = (void *) __sk_run_filter; + fp->bpf_func = (void *) __bpf_prog_run; /* Probe if internal BPF can be JITed */ bpf_int_jit_compile(fp); } -EXPORT_SYMBOL_GPL(sk_filter_select_runtime); +EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); /* free internal BPF program */ -void sk_filter_free(struct sk_filter *fp) +void bpf_prog_free(struct bpf_prog *fp) { bpf_jit_free(fp); } -EXPORT_SYMBOL_GPL(sk_filter_free); +EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 33a3a97e2b58..2f3fa2cc2eac 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -54,7 +54,7 @@ struct seccomp_filter { atomic_t usage; struct seccomp_filter *prev; - struct sk_filter *prog; + struct bpf_prog *prog; }; /* Limit any path through the tree to 256KB worth of instructions. */ @@ -187,7 +187,7 @@ static u32 seccomp_run_filters(int syscall) * value always takes priority (ignoring the DATA). */ for (f = current->seccomp.filter; f; f = f->prev) { - u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); + u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -260,7 +260,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (!filter) goto free_prog; - filter->prog = kzalloc(sk_filter_size(new_len), + filter->prog = kzalloc(bpf_prog_size(new_len), GFP_KERNEL|__GFP_NOWARN); if (!filter->prog) goto free_filter; @@ -273,7 +273,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) atomic_set(&filter->usage, 1); filter->prog->len = new_len; - sk_filter_select_runtime(filter->prog); + bpf_prog_select_runtime(filter->prog); /* * If there is an existing filter, make it the prev and don't drop its @@ -337,7 +337,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - sk_filter_free(freeme->prog); + bpf_prog_free(freeme->prog); kfree(freeme); } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 5f48623ee1a7..89e0345733bd 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1761,9 +1761,9 @@ static int probe_filter_length(struct sock_filter *fp) return len + 1; } -static struct sk_filter *generate_filter(int which, int *err) +static struct bpf_prog *generate_filter(int which, int *err) { - struct sk_filter *fp; + struct bpf_prog *fp; struct sock_fprog_kern fprog; unsigned int flen = probe_filter_length(tests[which].u.insns); __u8 test_type = tests[which].aux & TEST_TYPE_MASK; @@ -1773,7 +1773,7 @@ static struct sk_filter *generate_filter(int which, int *err) fprog.filter = tests[which].u.insns; fprog.len = flen; - *err = sk_unattached_filter_create(&fp, &fprog); + *err = bpf_prog_create(&fp, &fprog); if (tests[which].aux & FLAG_EXPECTED_FAIL) { if (*err == -EINVAL) { pr_cont("PASS\n"); @@ -1798,7 +1798,7 @@ static struct sk_filter *generate_filter(int which, int *err) break; case INTERNAL: - fp = kzalloc(sk_filter_size(flen), GFP_KERNEL); + fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); if (fp == NULL) { pr_cont("UNEXPECTED_FAIL no memory left\n"); *err = -ENOMEM; @@ -1809,7 +1809,7 @@ static struct sk_filter *generate_filter(int which, int *err) memcpy(fp->insnsi, tests[which].u.insns_int, fp->len * sizeof(struct bpf_insn)); - sk_filter_select_runtime(fp); + bpf_prog_select_runtime(fp); break; } @@ -1817,21 +1817,21 @@ static struct sk_filter *generate_filter(int which, int *err) return fp; } -static void release_filter(struct sk_filter *fp, int which) +static void release_filter(struct bpf_prog *fp, int which) { __u8 test_type = tests[which].aux & TEST_TYPE_MASK; switch (test_type) { case CLASSIC: - sk_unattached_filter_destroy(fp); + bpf_prog_destroy(fp); break; case INTERNAL: - sk_filter_free(fp); + bpf_prog_free(fp); break; } } -static int __run_one(const struct sk_filter *fp, const void *data, +static int __run_one(const struct bpf_prog *fp, const void *data, int runs, u64 *duration) { u64 start, finish; @@ -1840,7 +1840,7 @@ static int __run_one(const struct sk_filter *fp, const void *data, start = ktime_to_us(ktime_get()); for (i = 0; i < runs; i++) - ret = SK_RUN_FILTER(fp, data); + ret = BPF_PROG_RUN(fp, data); finish = ktime_to_us(ktime_get()); @@ -1850,7 +1850,7 @@ static int __run_one(const struct sk_filter *fp, const void *data, return ret; } -static int run_one(const struct sk_filter *fp, struct bpf_test *test) +static int run_one(const struct bpf_prog *fp, struct bpf_test *test) { int err_cnt = 0, i, runs = MAX_TESTRUNS; @@ -1884,7 +1884,7 @@ static __init int test_bpf(void) int i, err_cnt = 0, pass_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { - struct sk_filter *fp; + struct bpf_prog *fp; int err; pr_info("#%d %s ", i, tests[i].descr); diff --git a/net/core/filter.c b/net/core/filter.c index 6ac901613bee..d814b8a89d0f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -810,8 +810,8 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) } EXPORT_SYMBOL(bpf_check_classic); -static int sk_store_orig_filter(struct sk_filter *fp, - const struct sock_fprog *fprog) +static int bpf_prog_store_orig_filter(struct bpf_prog *fp, + const struct sock_fprog *fprog) { unsigned int fsize = bpf_classic_proglen(fprog); struct sock_fprog_kern *fkprog; @@ -831,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp, return 0; } -static void sk_release_orig_filter(struct sk_filter *fp) +static void bpf_release_orig_filter(struct bpf_prog *fp) { struct sock_fprog_kern *fprog = fp->orig_prog; @@ -841,10 +841,16 @@ static void sk_release_orig_filter(struct sk_filter *fp) } } +static void __bpf_prog_release(struct bpf_prog *prog) +{ + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + static void __sk_filter_release(struct sk_filter *fp) { - sk_release_orig_filter(fp); - sk_filter_free(fp); + __bpf_prog_release(fp->prog); + kfree(fp); } /** @@ -872,7 +878,7 @@ static void sk_filter_release(struct sk_filter *fp) void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) { - u32 filter_size = sk_filter_size(fp->len); + u32 filter_size = bpf_prog_size(fp->prog->len); atomic_sub(filter_size, &sk->sk_omem_alloc); sk_filter_release(fp); @@ -883,7 +889,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) */ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) { - u32 filter_size = sk_filter_size(fp->len); + u32 filter_size = bpf_prog_size(fp->prog->len); /* same check as in sock_kmalloc() */ if (filter_size <= sysctl_optmem_max && @@ -895,10 +901,10 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) return false; } -static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) +static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) { struct sock_filter *old_prog; - struct sk_filter *old_fp; + struct bpf_prog *old_fp; int err, new_len, old_len = fp->len; /* We are free to overwrite insns et al right here as it @@ -927,7 +933,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL); + fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -949,7 +955,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) */ goto out_err_free; - sk_filter_select_runtime(fp); + bpf_prog_select_runtime(fp); kfree(old_prog); return fp; @@ -957,11 +963,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) out_err_free: kfree(old_prog); out_err: - __sk_filter_release(fp); + __bpf_prog_release(fp); return ERR_PTR(err); } -static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) { int err; @@ -970,7 +976,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) err = bpf_check_classic(fp->insns, fp->len); if (err) { - __sk_filter_release(fp); + __bpf_prog_release(fp); return ERR_PTR(err); } @@ -983,13 +989,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) * internal BPF translation for the optimized interpreter. */ if (!fp->jited) - fp = __sk_migrate_filter(fp); + fp = bpf_migrate_filter(fp); return fp; } /** - * sk_unattached_filter_create - create an unattached filter + * bpf_prog_create - create an unattached filter * @pfp: the unattached filter that is created * @fprog: the filter program * @@ -998,23 +1004,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) * If an error occurs or there is insufficient memory for the filter * a negative errno code is returned. On success the return is zero. */ -int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog_kern *fprog) +int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) { unsigned int fsize = bpf_classic_proglen(fprog); - struct sk_filter *fp; + struct bpf_prog *fp; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); + fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); - atomic_set(&fp->refcnt, 1); fp->len = fprog->len; /* Since unattached filters are not copied back to user * space through sk_get_filter(), we do not need to hold @@ -1022,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp, */ fp->orig_prog = NULL; - /* __sk_prepare_filter() already takes care of freeing + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = __sk_prepare_filter(fp); + fp = bpf_prepare_filter(fp); if (IS_ERR(fp)) return PTR_ERR(fp); *pfp = fp; return 0; } -EXPORT_SYMBOL_GPL(sk_unattached_filter_create); +EXPORT_SYMBOL_GPL(bpf_prog_create); -void sk_unattached_filter_destroy(struct sk_filter *fp) +void bpf_prog_destroy(struct bpf_prog *fp) { - __sk_filter_release(fp); + __bpf_prog_release(fp); } -EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); +EXPORT_SYMBOL_GPL(bpf_prog_destroy); /** * sk_attach_filter - attach a socket filter @@ -1054,7 +1058,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = bpf_classic_proglen(fprog); - unsigned int sk_fsize = sk_filter_size(fprog->len); + unsigned int bpf_fsize = bpf_prog_size(fprog->len); + struct bpf_prog *prog; int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -1064,29 +1069,36 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(sk_fsize, GFP_KERNEL); - if (!fp) + prog = kmalloc(bpf_fsize, GFP_KERNEL); + if (!prog) return -ENOMEM; - if (copy_from_user(fp->insns, fprog->filter, fsize)) { - kfree(fp); + if (copy_from_user(prog->insns, fprog->filter, fsize)) { + kfree(prog); return -EFAULT; } - fp->len = fprog->len; + prog->len = fprog->len; - err = sk_store_orig_filter(fp, fprog); + err = bpf_prog_store_orig_filter(prog, fprog); if (err) { - kfree(fp); + kfree(prog); return -ENOMEM; } - /* __sk_prepare_filter() already takes care of freeing + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = __sk_prepare_filter(fp); - if (IS_ERR(fp)) - return PTR_ERR(fp); + prog = bpf_prepare_filter(prog); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) { + __bpf_prog_release(prog); + return -ENOMEM; + } + fp->prog = prog; atomic_set(&fp->refcnt, 0); @@ -1142,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, /* We're copying the filter that has been originally attached, * so no conversion/decode needed anymore. */ - fprog = filter->orig_prog; + fprog = filter->prog->orig_prog; ret = fprog->len; if (!len) diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 12ab7b4be609..4eab4a94a59d 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -107,11 +107,11 @@ #include #include -static struct sk_filter *ptp_insns __read_mostly; +static struct bpf_prog *ptp_insns __read_mostly; unsigned int ptp_classify_raw(const struct sk_buff *skb) { - return SK_RUN_FILTER(ptp_insns, skb); + return BPF_PROG_RUN(ptp_insns, skb); } EXPORT_SYMBOL_GPL(ptp_classify_raw); @@ -189,5 +189,5 @@ void __init ptp_classifier_init(void) .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, }; - BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); + BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 57d922320c59..ad704c757bb4 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -68,7 +68,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, if (!filter) goto out; - fprog = filter->orig_prog; + fprog = filter->prog->orig_prog; flen = bpf_classic_proglen(fprog); attr = nla_reserve(skb, attrtype, flen); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index bbffdbdaf603..dffee9d47ec4 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par) program.len = info->bpf_program_num_elem; program.filter = info->bpf_program; - if (sk_unattached_filter_create(&info->filter, &program)) { + if (bpf_prog_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; } @@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; - return SK_RUN_FILTER(info->filter, skb); + return BPF_PROG_RUN(info->filter, skb); } static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; - sk_unattached_filter_destroy(info->filter); + bpf_prog_destroy(info->filter); } static struct xt_match bpf_mt_reg __read_mostly = { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 13f64df2c710..0e30d58149da 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -30,7 +30,7 @@ struct cls_bpf_head { }; struct cls_bpf_prog { - struct sk_filter *filter; + struct bpf_prog *filter; struct sock_filter *bpf_ops; struct tcf_exts exts; struct tcf_result res; @@ -54,7 +54,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, int ret; list_for_each_entry(prog, &head->plist, link) { - int filter_res = SK_RUN_FILTER(prog->filter, skb); + int filter_res = BPF_PROG_RUN(prog->filter, skb); if (filter_res == 0) continue; @@ -92,7 +92,7 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) tcf_unbind_filter(tp, &prog->res); tcf_exts_destroy(tp, &prog->exts); - sk_unattached_filter_destroy(prog->filter); + bpf_prog_destroy(prog->filter); kfree(prog->bpf_ops); kfree(prog); @@ -161,7 +161,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; struct sock_fprog_kern tmp; - struct sk_filter *fp, *fp_old; + struct bpf_prog *fp, *fp_old; u16 bpf_size, bpf_len; u32 classid; int ret; @@ -193,7 +193,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tmp.len = bpf_len; tmp.filter = bpf_ops; - ret = sk_unattached_filter_create(&fp, &tmp); + ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; @@ -211,7 +211,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &prog->exts, &exts); if (fp_old) - sk_unattached_filter_destroy(fp_old); + bpf_prog_destroy(fp_old); if (bpf_old) kfree(bpf_old); -- cgit v1.2.3 From 06ebb06d49486676272a3c030bfeef4bd969a8e6 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 31 Jul 2014 23:00:35 -0400 Subject: iovec: make sure the caller actually wants anything in memcpy_fromiovecend Check for cases when the caller requests 0 bytes instead of running off and dereferencing potentially invalid iovecs. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- lib/iovec.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/iovec.c b/lib/iovec.c index 7a7c2da4cddf..df3abd1eaa4a 100644 --- a/lib/iovec.c +++ b/lib/iovec.c @@ -85,6 +85,10 @@ EXPORT_SYMBOL(memcpy_toiovecend); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len) { + /* No data? Done! */ + if (len == 0) + return 0; + /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { offset -= iov->iov_len; -- cgit v1.2.3 From 7e1e77636e36075ebf118298855268468f1028e8 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 2 Aug 2014 11:47:44 +0200 Subject: lib: Resizable, Scalable, Concurrent Hash Table Generic implementation of a resizable, scalable, concurrent hash table based on [0]. The implementation supports both, fixed size keys specified via an offset and length, or arbitrary keys via own hash and compare functions. Lookups are lockless and protected as RCU read side critical sections. Automatic growing/shrinking based on user configurable watermarks is available while allowing concurrent lookups to take place. Objects to be hashed must include a struct rhash_head. The reason for not using the existing struct hlist_head is that the expansion and shrinking will have two buckets point to a single entry which would lead in obscure reverse chaining behaviour. Code includes a boot selftest if CONFIG_TEST_RHASHTABLE is defined. [0] https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf Signed-off-by: Thomas Graf Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 213 ++++++++++++ lib/Kconfig.debug | 8 + lib/Makefile | 2 +- lib/rhashtable.c | 797 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1019 insertions(+), 1 deletion(-) create mode 100644 include/linux/rhashtable.h create mode 100644 lib/rhashtable.c (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h new file mode 100644 index 000000000000..9cda293c867d --- /dev/null +++ b/include/linux/rhashtable.h @@ -0,0 +1,213 @@ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2014 Thomas Graf + * Copyright (c) 2008-2014 Patrick McHardy + * + * Based on the following paper by Josh Triplett, Paul E. McKenney + * and Jonathan Walpole: + * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf + * + * Code partially derived from nft_hash + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_RHASHTABLE_H +#define _LINUX_RHASHTABLE_H + +#include + +struct rhash_head { + struct rhash_head *next; +}; + +#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL) + +struct bucket_table { + size_t size; + struct rhash_head __rcu *buckets[]; +}; + +typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); + +struct rhashtable; + +/** + * struct rhashtable_params - Hash table construction parameters + * @nelem_hint: Hint on number of elements, should be 75% of desired size + * @key_len: Length of key + * @key_offset: Offset of key in struct to be hashed + * @head_offset: Offset of rhash_head in struct to be hashed + * @hash_rnd: Seed to use while hashing + * @max_shift: Maximum number of shifts while expanding + * @hashfn: Function to hash key + * @obj_hashfn: Function to hash object + * @grow_decision: If defined, may return true if table should expand + * @shrink_decision: If defined, may return true if table should shrink + * @mutex_is_held: Must return true if protecting mutex is held + */ +struct rhashtable_params { + size_t nelem_hint; + size_t key_len; + size_t key_offset; + size_t head_offset; + u32 hash_rnd; + size_t max_shift; + rht_hashfn_t hashfn; + rht_obj_hashfn_t obj_hashfn; + bool (*grow_decision)(const struct rhashtable *ht, + size_t new_size); + bool (*shrink_decision)(const struct rhashtable *ht, + size_t new_size); + int (*mutex_is_held)(void); +}; + +/** + * struct rhashtable - Hash table handle + * @tbl: Bucket table + * @nelems: Number of elements in table + * @shift: Current size (1 << shift) + * @p: Configuration parameters + */ +struct rhashtable { + struct bucket_table __rcu *tbl; + size_t nelems; + size_t shift; + struct rhashtable_params p; +}; + +#ifdef CONFIG_PROVE_LOCKING +int lockdep_rht_mutex_is_held(const struct rhashtable *ht); +#else +static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht) +{ + return 1; +} +#endif /* CONFIG_PROVE_LOCKING */ + +int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); + +u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len); +u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr); + +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t); +bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t); +void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, + struct rhash_head **pprev, gfp_t flags); + +bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); +bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); + +int rhashtable_expand(struct rhashtable *ht, gfp_t flags); +int rhashtable_shrink(struct rhashtable *ht, gfp_t flags); + +void *rhashtable_lookup(const struct rhashtable *ht, const void *key); +void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, + bool (*compare)(void *, void *), void *arg); + +void rhashtable_destroy(const struct rhashtable *ht); + +#define rht_dereference(p, ht) \ + rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) + +#define rht_dereference_rcu(p, ht) \ + rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) + +/* Internal, use rht_obj() instead */ +#define rht_entry(ptr, type, member) container_of(ptr, type, member) +#define rht_entry_safe(ptr, type, member) \ +({ \ + typeof(ptr) __ptr = (ptr); \ + __ptr ? rht_entry(__ptr, type, member) : NULL; \ +}) +#define rht_entry_safe_rcu(ptr, type, member) \ +({ \ + typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ + __ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \ +}) + +#define rht_next_entry_safe(pos, ht, member) \ +({ \ + pos ? rht_entry_safe(rht_dereference((pos)->member.next, ht), \ + typeof(*(pos)), member) : NULL; \ +}) + +/** + * rht_for_each - iterate over hash chain + * @pos: &struct rhash_head to use as a loop cursor. + * @head: head of the hash chain (struct rhash_head *) + * @ht: pointer to your struct rhashtable + */ +#define rht_for_each(pos, head, ht) \ + for (pos = rht_dereference(head, ht); \ + pos; \ + pos = rht_dereference((pos)->next, ht)) + +/** + * rht_for_each_entry - iterate over hash chain of given type + * @pos: type * to use as a loop cursor. + * @head: head of the hash chain (struct rhash_head *) + * @ht: pointer to your struct rhashtable + * @member: name of the rhash_head within the hashable struct. + */ +#define rht_for_each_entry(pos, head, ht, member) \ + for (pos = rht_entry_safe(rht_dereference(head, ht), \ + typeof(*(pos)), member); \ + pos; \ + pos = rht_next_entry_safe(pos, ht, member)) + +/** + * rht_for_each_entry_safe - safely iterate over hash chain of given type + * @pos: type * to use as a loop cursor. + * @n: type * to use for temporary next object storage + * @head: head of the hash chain (struct rhash_head *) + * @ht: pointer to your struct rhashtable + * @member: name of the rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive allows for the looped code to + * remove the loop cursor from the list. + */ +#define rht_for_each_entry_safe(pos, n, head, ht, member) \ + for (pos = rht_entry_safe(rht_dereference(head, ht), \ + typeof(*(pos)), member), \ + n = rht_next_entry_safe(pos, ht, member); \ + pos; \ + pos = n, \ + n = rht_next_entry_safe(pos, ht, member)) + +/** + * rht_for_each_rcu - iterate over rcu hash chain + * @pos: &struct rhash_head to use as a loop cursor. + * @head: head of the hash chain (struct rhash_head *) + * @ht: pointer to your struct rhashtable + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu fkht mutation primitives such as rht_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_rcu(pos, head, ht) \ + for (pos = rht_dereference_rcu(head, ht); \ + pos; \ + pos = rht_dereference_rcu((pos)->next, ht)) + +/** + * rht_for_each_entry_rcu - iterate over rcu hash chain of given type + * @pos: type * to use as a loop cursor. + * @head: head of the hash chain (struct rhash_head *) + * @member: name of the rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu fkht mutation primitives such as rht_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_entry_rcu(pos, head, member) \ + for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \ + pos; \ + pos = rht_entry_safe_rcu((pos)->member.next, \ + typeof(*(pos)), member)) + +#endif /* _LINUX_RHASHTABLE_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7a638aa3545b..f11a2e8f6157 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1550,6 +1550,14 @@ config TEST_STRING_HELPERS config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" +config TEST_RHASHTABLE + bool "Perform selftest on resizable hash table" + default n + help + Enable this option to test the rhashtable functions at boot. + + If unsure, say N. + endmenu # runtime tests config PROVIDE_OHCI1394_DMA_INIT diff --git a/lib/Makefile b/lib/Makefile index ba967a19edba..fd248e4c05ad 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o hash.o + percpu-refcount.o percpu_ida.o hash.o rhashtable.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/rhashtable.c b/lib/rhashtable.c new file mode 100644 index 000000000000..e6940cf16628 --- /dev/null +++ b/lib/rhashtable.c @@ -0,0 +1,797 @@ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2014 Thomas Graf + * Copyright (c) 2008-2014 Patrick McHardy + * + * Based on the following paper: + * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf + * + * Code partially derived from nft_hash + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HASH_DEFAULT_SIZE 64UL +#define HASH_MIN_SIZE 4UL + +#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) + +#ifdef CONFIG_PROVE_LOCKING +int lockdep_rht_mutex_is_held(const struct rhashtable *ht) +{ + return ht->p.mutex_is_held(); +} +EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); +#endif + +/** + * rht_obj - cast hash head to outer object + * @ht: hash table + * @he: hashed node + */ +void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) +{ + return (void *) he - ht->p.head_offset; +} +EXPORT_SYMBOL_GPL(rht_obj); + +static u32 __hashfn(const struct rhashtable *ht, const void *key, + u32 len, u32 hsize) +{ + u32 h; + + h = ht->p.hashfn(key, len, ht->p.hash_rnd); + + return h & (hsize - 1); +} + +/** + * rhashtable_hashfn - compute hash for key of given length + * @ht: hash table to compuate for + * @key: pointer to key + * @len: length of key + * + * Computes the hash value using the hash function provided in the 'hashfn' + * of struct rhashtable_params. The returned value is guaranteed to be + * smaller than the number of buckets in the hash table. + */ +u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len) +{ + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + return __hashfn(ht, key, len, tbl->size); +} +EXPORT_SYMBOL_GPL(rhashtable_hashfn); + +static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize) +{ + if (unlikely(!ht->p.key_len)) { + u32 h; + + h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + + return h & (hsize - 1); + } + + return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize); +} + +/** + * rhashtable_obj_hashfn - compute hash for hashed object + * @ht: hash table to compuate for + * @ptr: pointer to hashed object + * + * Computes the hash value using the hash function `hashfn` respectively + * 'obj_hashfn' depending on whether the hash table is set up to work with + * a fixed length key. The returned value is guaranteed to be smaller than + * the number of buckets in the hash table. + */ +u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr) +{ + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + return obj_hashfn(ht, ptr, tbl->size); +} +EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn); + +static u32 head_hashfn(const struct rhashtable *ht, + const struct rhash_head *he, u32 hsize) +{ + return obj_hashfn(ht, rht_obj(ht, he), hsize); +} + +static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags) +{ + struct bucket_table *tbl; + size_t size; + + size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); + tbl = kzalloc(size, flags); + if (tbl == NULL) + tbl = vzalloc(size); + + if (tbl == NULL) + return NULL; + + tbl->size = nbuckets; + + return tbl; +} + +static void bucket_table_free(const struct bucket_table *tbl) +{ + kvfree(tbl); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @new_size: new table size + */ +bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +{ + /* Expand table when exceeding 75% load */ + return ht->nelems > (new_size / 4 * 3); +} +EXPORT_SYMBOL_GPL(rht_grow_above_75); + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @new_size: new table size + */ +bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +{ + /* Shrink table beneath 30% load */ + return ht->nelems < (new_size * 3 / 10); +} +EXPORT_SYMBOL_GPL(rht_shrink_below_30); + +static void hashtable_chain_unzip(const struct rhashtable *ht, + const struct bucket_table *new_tbl, + struct bucket_table *old_tbl, size_t n) +{ + struct rhash_head *he, *p, *next; + unsigned int h; + + /* Old bucket empty, no work needed. */ + p = rht_dereference(old_tbl->buckets[n], ht); + if (!p) + return; + + /* Advance the old bucket pointer one or more times until it + * reaches a node that doesn't hash to the same bucket as the + * previous node p. Call the previous node p; + */ + h = head_hashfn(ht, p, new_tbl->size); + rht_for_each(he, p->next, ht) { + if (head_hashfn(ht, he, new_tbl->size) != h) + break; + p = he; + } + RCU_INIT_POINTER(old_tbl->buckets[n], p->next); + + /* Find the subsequent node which does hash to the same + * bucket as node P, or NULL if no such node exists. + */ + next = NULL; + if (he) { + rht_for_each(he, he->next, ht) { + if (head_hashfn(ht, he, new_tbl->size) == h) { + next = he; + break; + } + } + } + + /* Set p's next pointer to that subsequent node pointer, + * bypassing the nodes which do not hash to p's bucket + */ + RCU_INIT_POINTER(p->next, next); +} + +/** + * rhashtable_expand - Expand hash table while allowing concurrent lookups + * @ht: the hash table to expand + * @flags: allocation flags + * + * A secondary bucket array is allocated and the hash entries are migrated + * while keeping them on both lists until the end of the RCU grace period. + * + * This function may only be called in a context where it is safe to call + * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * + * The caller must ensure that no concurrent table mutations take place. + * It is however valid to have concurrent lookups if they are RCU protected. + */ +int rhashtable_expand(struct rhashtable *ht, gfp_t flags) +{ + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + struct rhash_head *he; + unsigned int i, h; + bool complete; + + ASSERT_RHT_MUTEX(ht); + + if (ht->p.max_shift && ht->shift >= ht->p.max_shift) + return 0; + + new_tbl = bucket_table_alloc(old_tbl->size * 2, flags); + if (new_tbl == NULL) + return -ENOMEM; + + ht->shift++; + + /* For each new bucket, search the corresponding old bucket + * for the first entry that hashes to the new bucket, and + * link the new bucket to that entry. Since all the entries + * which will end up in the new bucket appear in the same + * old bucket, this constructs an entirely valid new hash + * table, but with multiple buckets "zipped" together into a + * single imprecise chain. + */ + for (i = 0; i < new_tbl->size; i++) { + h = i & (old_tbl->size - 1); + rht_for_each(he, old_tbl->buckets[h], ht) { + if (head_hashfn(ht, he, new_tbl->size) == i) { + RCU_INIT_POINTER(new_tbl->buckets[i], he); + break; + } + } + } + + /* Publish the new table pointer. Lookups may now traverse + * the new table, but they will not benefit from any + * additional efficiency until later steps unzip the buckets. + */ + rcu_assign_pointer(ht->tbl, new_tbl); + + /* Unzip interleaved hash chains */ + do { + /* Wait for readers. All new readers will see the new + * table, and thus no references to the old table will + * remain. + */ + synchronize_rcu(); + + /* For each bucket in the old table (each of which + * contains items from multiple buckets of the new + * table): ... + */ + complete = true; + for (i = 0; i < old_tbl->size; i++) { + hashtable_chain_unzip(ht, new_tbl, old_tbl, i); + if (old_tbl->buckets[i] != NULL) + complete = false; + } + } while (!complete); + + bucket_table_free(old_tbl); + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_expand); + +/** + * rhashtable_shrink - Shrink hash table while allowing concurrent lookups + * @ht: the hash table to shrink + * @flags: allocation flags + * + * This function may only be called in a context where it is safe to call + * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * + * The caller must ensure that no concurrent table mutations take place. + * It is however valid to have concurrent lookups if they are RCU protected. + */ +int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) +{ + struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); + struct rhash_head __rcu **pprev; + unsigned int i; + + ASSERT_RHT_MUTEX(ht); + + if (tbl->size <= HASH_MIN_SIZE) + return 0; + + ntbl = bucket_table_alloc(tbl->size / 2, flags); + if (ntbl == NULL) + return -ENOMEM; + + ht->shift--; + + /* Link each bucket in the new table to the first bucket + * in the old table that contains entries which will hash + * to the new bucket. + */ + for (i = 0; i < ntbl->size; i++) { + ntbl->buckets[i] = tbl->buckets[i]; + + /* Link each bucket in the new table to the first bucket + * in the old table that contains entries which will hash + * to the new bucket. + */ + for (pprev = &ntbl->buckets[i]; *pprev != NULL; + pprev = &rht_dereference(*pprev, ht)->next) + ; + RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); + } + + /* Publish the new, valid hash table */ + rcu_assign_pointer(ht->tbl, ntbl); + + /* Wait for readers. No new readers will have references to the + * old hash table. + */ + synchronize_rcu(); + + bucket_table_free(tbl); + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_shrink); + +/** + * rhashtable_insert - insert object into hash hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @flags: allocation flags (table expansion) + * + * Will automatically grow the table via rhashtable_expand() if the the + * grow_decision function specified at rhashtable_init() returns true. + * + * The caller must ensure that no concurrent table mutations occur. It is + * however valid to have concurrent lookups if they are RCU protected. + */ +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, + gfp_t flags) +{ + struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + u32 hash; + + ASSERT_RHT_MUTEX(ht); + + hash = head_hashfn(ht, obj, tbl->size); + RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); + rcu_assign_pointer(tbl->buckets[hash], obj); + ht->nelems++; + + if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + rhashtable_expand(ht, flags); +} +EXPORT_SYMBOL_GPL(rhashtable_insert); + +/** + * rhashtable_remove_pprev - remove object from hash table given previous element + * @ht: hash table + * @obj: pointer to hash head inside object + * @pprev: pointer to previous element + * @flags: allocation flags (table expansion) + * + * Identical to rhashtable_remove() but caller is alreayd aware of the element + * in front of the element to be deleted. This is in particular useful for + * deletion when combined with walking or lookup. + */ +void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, + struct rhash_head **pprev, gfp_t flags) +{ + struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + + ASSERT_RHT_MUTEX(ht); + + RCU_INIT_POINTER(*pprev, obj->next); + ht->nelems--; + + if (ht->p.shrink_decision && + ht->p.shrink_decision(ht, tbl->size)) + rhashtable_shrink(ht, flags); +} +EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); + +/** + * rhashtable_remove - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @flags: allocation flags (table expansion) + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the the + * shrink_decision function specified at rhashtable_init() returns true. + * + * The caller must ensure that no concurrent table mutations occur. It is + * however valid to have concurrent lookups if they are RCU protected. + */ +bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj, + gfp_t flags) +{ + struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + struct rhash_head __rcu **pprev; + struct rhash_head *he; + u32 h; + + ASSERT_RHT_MUTEX(ht); + + h = head_hashfn(ht, obj, tbl->size); + + pprev = &tbl->buckets[h]; + rht_for_each(he, tbl->buckets[h], ht) { + if (he != obj) { + pprev = &he->next; + continue; + } + + rhashtable_remove_pprev(ht, he, pprev, flags); + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(rhashtable_remove); + +/** + * rhashtable_lookup - lookup key in hash table + * @ht: hash table + * @key: pointer to key + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This lookup function may only be used for fixed key hash table (key_len + * paramter set). It will BUG() if used inappropriately. + * + * Lookups may occur in parallel with hash mutations as long as the lookup is + * guarded by rcu_read_lock(). The caller must take care of this. + */ +void *rhashtable_lookup(const struct rhashtable *ht, const void *key) +{ + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + struct rhash_head *he; + u32 h; + + BUG_ON(!ht->p.key_len); + + h = __hashfn(ht, key, ht->p.key_len, tbl->size); + rht_for_each_rcu(he, tbl->buckets[h], ht) { + if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key, + ht->p.key_len)) + continue; + return (void *) he - ht->p.head_offset; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(rhashtable_lookup); + +/** + * rhashtable_lookup_compare - search hash table with compare function + * @ht: hash table + * @hash: hash value of desired entry + * @compare: compare function, must return true on match + * @arg: argument passed on to compare function + * + * Traverses the bucket chain behind the provided hash value and calls the + * specified compare function for each entry. + * + * Lookups may occur in parallel with hash mutations as long as the lookup is + * guarded by rcu_read_lock(). The caller must take care of this. + * + * Returns the first entry on which the compare function returned true. + */ +void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, + bool (*compare)(void *, void *), void *arg) +{ + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + struct rhash_head *he; + + if (unlikely(hash >= tbl->size)) + return NULL; + + rht_for_each_rcu(he, tbl->buckets[hash], ht) { + if (!compare(rht_obj(ht, he), arg)) + continue; + return (void *) he - ht->p.head_offset; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); + +static size_t rounded_hashtable_size(unsigned int nelem) +{ + return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE); +} + +/** + * rhashtable_init - initialize a new hash table + * @ht: hash table to be initialized + * @params: configuration parameters + * + * Initializes a new hash table based on the provided configuration + * parameters. A table can be configured either with a variable or + * fixed length key: + * + * Configuration Example 1: Fixed length keys + * struct test_obj { + * int key; + * void * my_member; + * struct rhash_head node; + * }; + * + * struct rhashtable_params params = { + * .head_offset = offsetof(struct test_obj, node), + * .key_offset = offsetof(struct test_obj, key), + * .key_len = sizeof(int), + * .hashfn = arch_fast_hash, + * .mutex_is_held = &my_mutex_is_held, + * }; + * + * Configuration Example 2: Variable length keys + * struct test_obj { + * [...] + * struct rhash_head node; + * }; + * + * u32 my_hash_fn(const void *data, u32 seed) + * { + * struct test_obj *obj = data; + * + * return [... hash ...]; + * } + * + * struct rhashtable_params params = { + * .head_offset = offsetof(struct test_obj, node), + * .hashfn = arch_fast_hash, + * .obj_hashfn = my_hash_fn, + * .mutex_is_held = &my_mutex_is_held, + * }; + */ +int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) +{ + struct bucket_table *tbl; + size_t size; + + size = HASH_DEFAULT_SIZE; + + if ((params->key_len && !params->hashfn) || + (!params->key_len && !params->obj_hashfn)) + return -EINVAL; + + if (params->nelem_hint) + size = rounded_hashtable_size(params->nelem_hint); + + tbl = bucket_table_alloc(size, GFP_KERNEL); + if (tbl == NULL) + return -ENOMEM; + + memset(ht, 0, sizeof(*ht)); + ht->shift = ilog2(tbl->size); + memcpy(&ht->p, params, sizeof(*params)); + RCU_INIT_POINTER(ht->tbl, tbl); + + if (!ht->p.hash_rnd) + get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_init); + +/** + * rhashtable_destroy - destroy hash table + * @ht: the hash table to destroy + * + * Frees the bucket array. + */ +void rhashtable_destroy(const struct rhashtable *ht) +{ + const struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + + bucket_table_free(tbl); +} +EXPORT_SYMBOL_GPL(rhashtable_destroy); + +/************************************************************************** + * Self Test + **************************************************************************/ + +#ifdef CONFIG_TEST_RHASHTABLE + +#define TEST_HT_SIZE 8 +#define TEST_ENTRIES 2048 +#define TEST_PTR ((void *) 0xdeadbeef) +#define TEST_NEXPANDS 4 + +static int test_mutex_is_held(void) +{ + return 1; +} + +struct test_obj { + void *ptr; + int value; + struct rhash_head node; +}; + +static int __init test_rht_lookup(struct rhashtable *ht) +{ + unsigned int i; + + for (i = 0; i < TEST_ENTRIES * 2; i++) { + struct test_obj *obj; + bool expected = !(i % 2); + u32 key = i; + + obj = rhashtable_lookup(ht, &key); + + if (expected && !obj) { + pr_warn("Test failed: Could not find key %u\n", key); + return -ENOENT; + } else if (!expected && obj) { + pr_warn("Test failed: Unexpected entry found for key %u\n", + key); + return -EEXIST; + } else if (expected && obj) { + if (obj->ptr != TEST_PTR || obj->value != i) { + pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", + obj->ptr, TEST_PTR, obj->value, i); + return -EINVAL; + } + } + } + + return 0; +} + +static void test_bucket_stats(struct rhashtable *ht, + struct bucket_table *tbl, + bool quiet) +{ + unsigned int cnt, i, total = 0; + struct test_obj *obj; + + for (i = 0; i < tbl->size; i++) { + cnt = 0; + + if (!quiet) + pr_info(" [%#4x/%zu]", i, tbl->size); + + rht_for_each_entry_rcu(obj, tbl->buckets[i], node) { + cnt++; + total++; + if (!quiet) + pr_cont(" [%p],", obj); + } + + if (!quiet) + pr_cont("\n [%#x] first element: %p, chain length: %u\n", + i, tbl->buckets[i], cnt); + } + + pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", + total, ht->nelems, TEST_ENTRIES); +} + +static int __init test_rhashtable(struct rhashtable *ht) +{ + struct bucket_table *tbl; + struct test_obj *obj, *next; + int err; + unsigned int i; + + /* + * Insertion Test: + * Insert TEST_ENTRIES into table with all keys even numbers + */ + pr_info(" Adding %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + struct test_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) { + err = -ENOMEM; + goto error; + } + + obj->ptr = TEST_PTR; + obj->value = i * 2; + + rhashtable_insert(ht, &obj->node, GFP_KERNEL); + } + + rcu_read_lock(); + tbl = rht_dereference_rcu(ht->tbl, ht); + test_bucket_stats(ht, tbl, true); + test_rht_lookup(ht); + rcu_read_unlock(); + + for (i = 0; i < TEST_NEXPANDS; i++) { + pr_info(" Table expansion iteration %u...\n", i); + rhashtable_expand(ht, GFP_KERNEL); + + rcu_read_lock(); + pr_info(" Verifying lookups...\n"); + test_rht_lookup(ht); + rcu_read_unlock(); + } + + for (i = 0; i < TEST_NEXPANDS; i++) { + pr_info(" Table shrinkage iteration %u...\n", i); + rhashtable_shrink(ht, GFP_KERNEL); + + rcu_read_lock(); + pr_info(" Verifying lookups...\n"); + test_rht_lookup(ht); + rcu_read_unlock(); + } + + pr_info(" Deleting %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + u32 key = i * 2; + + obj = rhashtable_lookup(ht, &key); + BUG_ON(!obj); + + rhashtable_remove(ht, &obj->node, GFP_KERNEL); + kfree(obj); + } + + return 0; + +error: + tbl = rht_dereference_rcu(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) + rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node) + kfree(obj); + + return err; +} + +static int __init test_rht_init(void) +{ + struct rhashtable ht; + struct rhashtable_params params = { + .nelem_hint = TEST_HT_SIZE, + .head_offset = offsetof(struct test_obj, node), + .key_offset = offsetof(struct test_obj, value), + .key_len = sizeof(int), + .hashfn = arch_fast_hash, + .mutex_is_held = &test_mutex_is_held, + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + }; + int err; + + pr_info("Running resizable hashtable tests...\n"); + + err = rhashtable_init(&ht, ¶ms); + if (err < 0) { + pr_warn("Test failed: Unable to initialize hashtable: %d\n", + err); + return err; + } + + err = test_rhashtable(&ht); + + rhashtable_destroy(&ht); + + return err; +} + +subsys_initcall(test_rht_init); + +#endif /* CONFIG_TEST_RHASHTABLE */ -- cgit v1.2.3