From ecc6569f3503b39f45bc6b86197b5e0a8533fb72 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:11 +0800 Subject: netfilter: gre: Use consistent GRE_* macros instead of ones defined by netfilter. There are already some GRE_* macros in kernel, so it is unnecessary to define these macros. And remove some useless macros Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/if_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9865c8caedde..fb7337d6b985 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -39,6 +39,7 @@ #define GRE_IS_REC(f) ((f) & GRE_REC) #define GRE_IS_ACK(f) ((f) & GRE_ACK) +#define GRE_VERSION_0 __cpu_to_be16(0x0000) #define GRE_VERSION_1 __cpu_to_be16(0x0001) #define GRE_PROTO_PPP __cpu_to_be16(0x880b) #define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) -- cgit v1.2.3 From 0d9932b2875f568d679f2af33ce610da3903ac11 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Fri, 2 Sep 2016 15:05:57 +0200 Subject: netfilter: nft_numgen: rename until attribute by modulus The _until_ attribute is renamed to _modulus_ as the behaviour is similar to other expresions with number limits (ex. nft_hash). Renaming is possible because there isn't a kernel release yet with these changes. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++-- net/netfilter/nft_numgen.c | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 28ce01d79707..24161e25576d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1126,13 +1126,13 @@ enum nft_trace_types { * enum nft_ng_attributes - nf_tables number generator expression netlink attributes * * @NFTA_NG_DREG: destination register (NLA_U32) - * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32) + * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) * @NFTA_NG_TYPE: operation type (NLA_U32) */ enum nft_ng_attributes { NFTA_NG_UNSPEC, NFTA_NG_DREG, - NFTA_NG_UNTIL, + NFTA_NG_MODULUS, NFTA_NG_TYPE, __NFTA_NG_MAX }; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 294745ecb0fc..f51a3ede3932 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { enum nft_registers dreg:8; - u32 until; + u32 modulus; atomic_t counter; }; @@ -34,7 +34,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, do { oval = atomic_read(&priv->counter); - nval = (oval + 1 < priv->until) ? oval + 1 : 0; + nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); @@ -42,7 +42,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, - [NFTA_NG_UNTIL] = { .type = NLA_U32 }, + [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, }; @@ -52,8 +52,8 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, { struct nft_ng_inc *priv = nft_expr_priv(expr); - priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); - if (priv->until == 0) + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) return -ERANGE; priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); @@ -64,11 +64,11 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, - u32 until, enum nft_ng_types type) + u32 modulus, enum nft_ng_types type) { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until))) + if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; @@ -83,12 +83,12 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_inc *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL); } struct nft_ng_random { enum nft_registers dreg:8; - u32 until; + u32 modulus; }; static void nft_ng_random_eval(const struct nft_expr *expr, @@ -99,7 +99,7 @@ static void nft_ng_random_eval(const struct nft_expr *expr, struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), - priv->until); + priv->modulus); } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -108,8 +108,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, { struct nft_ng_random *priv = nft_expr_priv(expr); - priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); - if (priv->until == 0) + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) return -ERANGE; prandom_init_once(&nft_numgen_prandom_state); @@ -124,7 +124,7 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM); } static struct nft_expr_type nft_ng_type; @@ -149,8 +149,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { u32 type; - if (!tb[NFTA_NG_DREG] || - !tb[NFTA_NG_UNTIL] || + if (!tb[NFTA_NG_DREG] || + !tb[NFTA_NG_MODULUS] || !tb[NFTA_NG_TYPE]) return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 70ca767ea1b2748f45e96192400e515dddbe517c Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 6 Sep 2016 08:44:19 +0200 Subject: netfilter: nft_hash: Add hash offset value Add support to pass through an offset to the hash value. With this feature, the sysadmin is able to generate a hash with a given offset value. Example: meta mark set jhash ip saddr mod 2 seed 0xabcd offset 100 This option generates marks according to the source address from 100 to 101. Signed-off-by: Laura Garcia Liebana --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_hash.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 24161e25576d..8c653bbd1ead 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -731,6 +731,7 @@ enum nft_meta_keys { * @NFTA_HASH_LEN: source data length (NLA_U32) * @NFTA_HASH_MODULUS: modulus value (NLA_U32) * @NFTA_HASH_SEED: seed value (NLA_U32) + * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) */ enum nft_hash_attributes { NFTA_HASH_UNSPEC, @@ -739,6 +740,7 @@ enum nft_hash_attributes { NFTA_HASH_LEN, NFTA_HASH_MODULUS, NFTA_HASH_SEED, + NFTA_HASH_OFFSET, __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 764251d31e46..bd12f7a801c2 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -23,6 +23,7 @@ struct nft_hash { u8 len; u32 modulus; u32 seed; + u32 offset; }; static void nft_hash_eval(const struct nft_expr *expr, @@ -31,10 +32,10 @@ static void nft_hash_eval(const struct nft_expr *expr, { struct nft_hash *priv = nft_expr_priv(expr); const void *data = ®s->data[priv->sreg]; + u32 h; - regs->data[priv->dreg] = - reciprocal_scale(jhash(data, priv->len, priv->seed), - priv->modulus); + h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus); + regs->data[priv->dreg] = h + priv->offset; } static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { @@ -59,6 +60,9 @@ static int nft_hash_init(const struct nft_ctx *ctx, !tb[NFTA_HASH_MODULUS]) return -EINVAL; + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); + priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); @@ -72,6 +76,9 @@ static int nft_hash_init(const struct nft_ctx *ctx, if (priv->modulus <= 1) return -ERANGE; + if (priv->offset + priv->modulus - 1 < U32_MAX) + return -EOVERFLOW; + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); return nft_validate_register_load(priv->sreg, len) && @@ -94,7 +101,9 @@ static int nft_hash_dump(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) goto nla_put_failure; - + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; return 0; nla_put_failure: -- cgit v1.2.3 From dbd2be0646e3239022630c426cbceefa15714bca Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Sep 2016 12:22:18 +0200 Subject: netfilter: nft_dynset: allow to invert match criteria The dynset expression matches if we can fit a new entry into the set. If there is no room for it, then it breaks the rule evaluation. This patch introduces the inversion flag so you can add rules to explicitly drop packets that don't fit into the set. For example: # nft filter input flow table xyz size 4 { ip saddr timeout 120s counter } overflow drop This is useful to provide a replacement for connlimit. For the rule above, every new entry uses the IPv4 address as key in the set, this entry gets a timeout of 120 seconds that gets refresh on every packet seen. If we get new flow and our set already contains 4 entries already, then this packet is dropped. You can already express this in positive logic, assuming default policy to drop: # nft filter input flow table xyz size 4 { ip saddr timeout 10s counter } accept Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nft_dynset.c | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8c653bbd1ead..bc0eb6a1066d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -575,6 +575,10 @@ enum nft_dynset_ops { NFT_DYNSET_OP_UPDATE, }; +enum nft_dynset_flags { + NFT_DYNSET_F_INV = (1 << 0), +}; + /** * enum nft_dynset_attributes - dynset expression attributes * @@ -585,6 +589,7 @@ enum nft_dynset_ops { * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_DYNSET_FLAGS: flags (NLA_U32) */ enum nft_dynset_attributes { NFTA_DYNSET_UNSPEC, @@ -596,6 +601,7 @@ enum nft_dynset_attributes { NFTA_DYNSET_TIMEOUT, NFTA_DYNSET_EXPR, NFTA_DYNSET_PAD, + NFTA_DYNSET_FLAGS, __NFTA_DYNSET_MAX, }; #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 0af26699bf04..e3b83c31da2e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -22,6 +22,7 @@ struct nft_dynset { enum nft_dynset_ops op:8; enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; + bool invert; u64 timeout; struct nft_expr *expr; struct nft_set_binding binding; @@ -82,10 +83,14 @@ static void nft_dynset_eval(const struct nft_expr *expr, if (sexpr != NULL) sexpr->ops->eval(sexpr, regs, pkt); + + if (priv->invert) + regs->verdict.code = NFT_BREAK; return; } out: - regs->verdict.code = NFT_BREAK; + if (!priv->invert) + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { @@ -96,6 +101,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, + [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, }; static int nft_dynset_init(const struct nft_ctx *ctx, @@ -113,6 +119,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; + if (tb[NFTA_DYNSET_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); + + if (flags & ~NFT_DYNSET_F_INV) + return -EINVAL; + if (flags & NFT_DYNSET_F_INV) + priv->invert = true; + } + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], genmask); if (IS_ERR(set)) { @@ -220,6 +235,7 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx, static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_dynset *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0; if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) goto nla_put_failure; @@ -235,6 +251,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: -- cgit v1.2.3 From 8e8118f893138d4cc3d4dbf4163d7497fca54a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 11 Sep 2016 22:55:53 +0200 Subject: netfilter: conntrack: remove packet hotpath stats These counters sit in hot path and do show up in perf, this is especially true for 'found' and 'searched' which get incremented for every packet processed. Information like searched=212030105 new=623431 found=333613 delete=623327 does not seem too helpful nowadays: - on busy systems found and searched will overflow every few hours (these are 32bit integers), other more busy ones every few days. - for debugging there are better methods, such as iptables' trace target, the conntrack log sysctls. Nowadays we also have perf tool. This removes packet path stat counters except those that are expected to be 0 (or close to 0) on a normal system, e.g. 'insert_failed' (race happened) or 'invalid' (proto tracker rejects). The insert stat is retained for the ctnetlink case. The found stat is retained for the tuple-is-taken check when NAT has to determine if it needs to pick a different source address. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 4 ---- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 8 ++++---- net/netfilter/nf_conntrack_core.c | 14 ++------------ net/netfilter/nf_conntrack_netlink.c | 6 +----- net/netfilter/nf_conntrack_standalone.c | 8 ++++---- 5 files changed, 11 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 275505792664..1d1ef4e20512 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -4,13 +4,9 @@ #include struct ip_conntrack_stat { - unsigned int searched; unsigned int found; - unsigned int new; unsigned int invalid; unsigned int ignore; - unsigned int delete; - unsigned int delete_list; unsigned int insert; unsigned int insert_failed; unsigned int drop; diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9df789709abe..6deb8867c5fc 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -231,13 +231,13 @@ enum ctattr_secctx { enum ctattr_stats_cpu { CTA_STATS_UNSPEC, - CTA_STATS_SEARCHED, + CTA_STATS_SEARCHED, /* no longer used */ CTA_STATS_FOUND, - CTA_STATS_NEW, + CTA_STATS_NEW, /* no longer used */ CTA_STATS_INVALID, CTA_STATS_IGNORE, - CTA_STATS_DELETE, - CTA_STATS_DELETE_LIST, + CTA_STATS_DELETE, /* no longer used */ + CTA_STATS_DELETE_LIST, /* no longer used */ CTA_STATS_INSERT, CTA_STATS_INSERT_FAILED, CTA_STATS_DROP, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ac1db4019d5c..8d1ddb9b63ed 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -379,7 +379,6 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -406,7 +405,6 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_del_from_dying_or_unconfirmed_list(ct); - NF_CT_STAT_INC(net, delete); local_bh_enable(); if (ct->master) @@ -438,7 +436,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) nf_ct_add_to_dying_list(ct); - NF_CT_STAT_INC(net, delete_list); local_bh_enable(); } @@ -529,11 +526,8 @@ begin: if (nf_ct_is_dying(ct)) continue; - if (nf_ct_key_equal(h, tuple, zone, net)) { - NF_CT_STAT_INC_ATOMIC(net, found); + if (nf_ct_key_equal(h, tuple, zone, net)) return h; - } - NF_CT_STAT_INC_ATOMIC(net, searched); } /* * if the nulls value we got at the end of this lookup is @@ -798,7 +792,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert); local_bh_enable(); help = nfct_help(ct); @@ -857,7 +850,6 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, rcu_read_unlock(); return 1; } - NF_CT_STAT_INC_ATOMIC(net, searched); } if (get_nulls_value(n) != hash) { @@ -1177,10 +1169,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } spin_unlock(&nf_conntrack_expect_lock); } - if (!exp) { + if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - NF_CT_STAT_INC(net, new); - } /* Now it is inserted into the unconfirmed list, bump refcount */ nf_conntrack_get(&ct->ct_general); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c052b712c49f..27540455dc62 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1984,13 +1984,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) || - nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || - nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) || + if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || - nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) || - nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3d9a316a3c77..7d52f8401afd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -352,13 +352,13 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - st->searched, + 0, st->found, - st->new, + 0, st->invalid, st->ignore, - st->delete, - st->delete_list, + 0, + 0, st->insert, st->insert_failed, st->drop, -- cgit v1.2.3 From 2b03bf732488a3c2e920afe22c03b82cb8477e28 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 13 Sep 2016 13:49:53 +0200 Subject: netfilter: nft_numgen: add number generation offset Add support of an offset value for incremental counter and random. With this option the sysadmin is able to start the counter to a certain value and then apply the generated number. Example: meta mark set numgen inc mod 2 offset 100 This will generate marks with the serie 100, 101, 100, 101, ... Suggested-by: Pablo Neira Ayuso Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_numgen.c | 32 ++++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bc0eb6a1066d..bcfb892ff148 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1136,12 +1136,14 @@ enum nft_trace_types { * @NFTA_NG_DREG: destination register (NLA_U32) * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) * @NFTA_NG_TYPE: operation type (NLA_U32) + * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) */ enum nft_ng_attributes { NFTA_NG_UNSPEC, NFTA_NG_DREG, NFTA_NG_MODULUS, NFTA_NG_TYPE, + NFTA_NG_OFFSET, __NFTA_NG_MAX }; #define NFTA_NG_MAX (__NFTA_NG_MAX - 1) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f173ebec30a7..55bc5ab78d4a 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -23,6 +23,7 @@ struct nft_ng_inc { enum nft_registers dreg:8; u32 modulus; atomic_t counter; + u32 offset; }; static void nft_ng_inc_eval(const struct nft_expr *expr, @@ -37,13 +38,14 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); - regs->data[priv->dreg] = nval; + regs->data[priv->dreg] = nval + priv->offset; } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, + [NFTA_NG_OFFSET] = { .type = NLA_U32 }, }; static int nft_ng_inc_init(const struct nft_ctx *ctx, @@ -52,10 +54,16 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, { struct nft_ng_inc *priv = nft_expr_priv(expr); + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); if (priv->modulus == 0) return -ERANGE; + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, 0); @@ -64,7 +72,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, - u32 modulus, enum nft_ng_types type) + u32 modulus, enum nft_ng_types type, u32 offset) { if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) goto nla_put_failure; @@ -72,6 +80,8 @@ static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, goto nla_put_failure; if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset))) + goto nla_put_failure; return 0; @@ -83,12 +93,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_inc *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL, + priv->offset); } struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; + u32 offset; }; static void nft_ng_random_eval(const struct nft_expr *expr, @@ -97,9 +109,10 @@ static void nft_ng_random_eval(const struct nft_expr *expr, { struct nft_ng_random *priv = nft_expr_priv(expr); struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + u32 val; - regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), - priv->modulus); + val = reciprocal_scale(prandom_u32_state(state), priv->modulus); + regs->data[priv->dreg] = val + priv->offset; } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -108,10 +121,16 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, { struct nft_ng_random *priv = nft_expr_priv(expr); + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); if (priv->modulus == 0) return -ERANGE; + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + prandom_init_once(&nft_numgen_prandom_state); priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); @@ -124,7 +143,8 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); - return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM); + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM, + priv->offset); } static struct nft_expr_type nft_ng_type; -- cgit v1.2.3 From 8061bb54436c19fd16b7c734a69ff60bac26e3e9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 14 Sep 2016 23:41:46 +0800 Subject: netfilter: nft_queue: add _SREG_QNUM attr to select the queue number Currently, the user can specify the queue numbers by _QUEUE_NUM and _QUEUE_TOTAL attributes, this is enough in most situations. But acctually, it is not very flexible, for example: tcp dport 80 mapped to queue0 tcp dport 81 mapped to queue1 tcp dport 82 mapped to queue2 In order to do this thing, we must add 3 nft rules, and more mapping meant more rules ... So take one register to select the queue number, then we can add one simple rule to mapping queues, maybe like this: queue num tcp dport map { 80:0, 81:1, 82:2 ... } Florian Westphal also proposed wider usage scenarios: queue num jhash ip saddr . ip daddr mod ... queue num meta cpu ... queue num meta mark ... The last point is how to load a queue number from sreg, although we can use *(u16*)®s->data[reg] to load the queue number, just like nat expr to load its l4port do. But we will cooperate with hash expr, meta cpu, meta mark expr and so on. They all store the result to u32 type, so cast it to u16 pointer and dereference it will generate wrong result in the big endian system. So just keep it simple, we treat queue number as u32 type, although u16 type is already enough. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 + net/netfilter/nft_queue.c | 102 +++++++++++++++++++++++++++---- 2 files changed, 92 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index bcfb892ff148..1cf41dd838b2 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -894,12 +894,14 @@ enum nft_log_attributes { * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers) */ enum nft_queue_attributes { NFTA_QUEUE_UNSPEC, NFTA_QUEUE_NUM, NFTA_QUEUE_TOTAL, NFTA_QUEUE_FLAGS, + NFTA_QUEUE_SREG_QNUM, __NFTA_QUEUE_MAX }; #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index d16d59959ff6..393d359a1889 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -22,9 +22,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - u16 queuenum; - u16 queues_total; - u16 flags; + enum nft_registers sreg_qnum:8; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -54,26 +55,39 @@ static void nft_queue_eval(const struct nft_expr *expr, regs->verdict.code = ret; } +static void nft_queue_sreg_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue, ret; + + queue = regs->data[priv->sreg_qnum]; + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + regs->verdict.code = ret; +} + static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, + [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 }, }; static int nft_queue_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_queue *priv = nft_expr_priv(expr); u32 maxid; - if (tb[NFTA_QUEUE_NUM] == NULL) - return -EINVAL; - - init_hashrandom(&jhash_initval); priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); - if (tb[NFTA_QUEUE_TOTAL] != NULL) + if (tb[NFTA_QUEUE_TOTAL]) priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); else priv->queues_total = 1; @@ -85,11 +99,34 @@ static int nft_queue_init(const struct nft_ctx *ctx, if (maxid > U16_MAX) return -ERANGE; - if (tb[NFTA_QUEUE_FLAGS] != NULL) { + if (tb[NFTA_QUEUE_FLAGS]) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + } + return 0; +} + +static int nft_queue_sreg_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + int err; + + priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); + err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + if (err < 0) + return err; + + if (tb[NFTA_QUEUE_FLAGS]) { priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); if (priv->flags & ~NFT_QUEUE_FLAG_MASK) return -EINVAL; + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) + return -EOPNOTSUPP; } + return 0; } @@ -108,6 +145,21 @@ nla_put_failure: return -1; } +static int +nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_queue_type; static const struct nft_expr_ops nft_queue_ops = { .type = &nft_queue_type, @@ -117,9 +169,35 @@ static const struct nft_expr_ops nft_queue_ops = { .dump = nft_queue_dump, }; +static const struct nft_expr_ops nft_queue_sreg_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_sreg_eval, + .init = nft_queue_sreg_init, + .dump = nft_queue_sreg_dump, +}; + +static const struct nft_expr_ops * +nft_queue_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM]) + return ERR_PTR(-EINVAL); + + init_hashrandom(&jhash_initval); + + if (tb[NFTA_QUEUE_NUM]) + return &nft_queue_ops; + + if (tb[NFTA_QUEUE_SREG_QNUM]) + return &nft_queue_sreg_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_queue_type __read_mostly = { .name = "queue", - .ops = &nft_queue_ops, + .select_ops = &nft_queue_select_ops, .policy = nft_queue_policy, .maxattr = NFTA_QUEUE_MAX, .owner = THIS_MODULE, -- cgit v1.2.3 From 11d5f15723c9f39d7c131d0149d024c17dbef676 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Thu, 22 Sep 2016 12:43:44 -0400 Subject: netfilter: xt_hashlimit: Create revision 2 to support higher pps rates Create a new revision for the hashlimit iptables extension module. Rev 2 will support higher pps of upto 1 million, Version 1 supports only 10k. To support this we have to increase the size of the variables avg and burst in hashlimit_cfg to 64-bit. Create two new structs hashlimit_cfg2 and xt_hashlimit_mtinfo2 and also create newer versions of all the functions for match, checkentry and destroy. Some of the functions like hashlimit_mt, hashlimit_mt_check etc are very similar in both rev1 and rev2 with only minor changes, so I have split those functions and moved all the common code to a *_common function. Signed-off-by: Vishwanath Pai Signed-off-by: Joshua Hunt Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_hashlimit.h | 23 ++ net/netfilter/xt_hashlimit.c | 330 ++++++++++++++++++++++------ 2 files changed, 285 insertions(+), 68 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h index 6db90372f09c..3efc0ca18345 100644 --- a/include/uapi/linux/netfilter/xt_hashlimit.h +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -6,6 +6,7 @@ /* timings are in milliseconds. */ #define XT_HASHLIMIT_SCALE 10000 +#define XT_HASHLIMIT_SCALE_v2 1000000llu /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 * seconds, or one packet every 59 hours. */ @@ -63,6 +64,20 @@ struct hashlimit_cfg1 { __u8 srcmask, dstmask; }; +struct hashlimit_cfg2 { + __u64 avg; /* Average secs between packets * scale */ + __u64 burst; /* Period multiplier for upper limit. */ + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u8 srcmask, dstmask; +}; + struct xt_hashlimit_mtinfo1 { char name[IFNAMSIZ]; struct hashlimit_cfg1 cfg; @@ -71,4 +86,12 @@ struct xt_hashlimit_mtinfo1 { struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); }; +struct xt_hashlimit_mtinfo2 { + char name[NAME_MAX]; + struct hashlimit_cfg2 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_HASHLIMIT_H */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index e93d9e0a3f35..44a095ecc7b7 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -57,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) /* need to declare this at the top */ static const struct file_operations dl_file_ops_v1; +static const struct file_operations dl_file_ops; /* hash table crap */ struct dsthash_dst { @@ -86,8 +87,8 @@ struct dsthash_ent { unsigned long expires; /* precalculated expiry time */ struct { unsigned long prev; /* last modification */ - u_int32_t credit; - u_int32_t credit_cap, cost; + u_int64_t credit; + u_int64_t credit_cap, cost; } rateinfo; struct rcu_head rcu; }; @@ -98,7 +99,7 @@ struct xt_hashlimit_htable { u_int8_t family; bool rnd_initialized; - struct hashlimit_cfg1 cfg; /* config */ + struct hashlimit_cfg2 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ @@ -114,6 +115,30 @@ struct xt_hashlimit_htable { struct hlist_head hash[0]; /* hashtable itself */ }; +static int +cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) +{ + if (revision == 1) { + struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; + + to->mode = cfg->mode; + to->avg = cfg->avg; + to->burst = cfg->burst; + to->size = cfg->size; + to->max = cfg->max; + to->gc_interval = cfg->gc_interval; + to->expire = cfg->expire; + to->srcmask = cfg->srcmask; + to->dstmask = cfg->dstmask; + } else if (revision == 2) { + memcpy(to, from, sizeof(struct hashlimit_cfg2)); + } else { + return -EINVAL; + } + + return 0; +} + static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */ static struct kmem_cache *hashlimit_cachep __read_mostly; @@ -215,16 +240,18 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, - u_int8_t family) +static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, + const char *name, u_int8_t family, + struct xt_hashlimit_htable **out_hinfo, + int revision) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - unsigned int size; - unsigned int i; + unsigned int size, i; + int ret; - if (minfo->cfg.size) { - size = minfo->cfg.size; + if (cfg->size) { + size = cfg->size; } else { size = (totalram_pages << PAGE_SHIFT) / 16384 / sizeof(struct list_head); @@ -238,10 +265,14 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, sizeof(struct list_head) * size); if (hinfo == NULL) return -ENOMEM; - minfo->hinfo = hinfo; + *out_hinfo = hinfo; /* copy match config into hashtable config */ - memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); + ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2); + + if (ret) + return ret; + hinfo->cfg.size = size; if (hinfo->cfg.max == 0) hinfo->cfg.max = 8 * hinfo->cfg.size; @@ -255,17 +286,18 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; - hinfo->name = kstrdup(minfo->name, GFP_KERNEL); + hinfo->name = kstrdup(name, GFP_KERNEL); if (!hinfo->name) { vfree(hinfo); return -ENOMEM; } spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create_data(minfo->name, 0, + hinfo->pde = proc_create_data(name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - &dl_file_ops_v1, hinfo); + (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops, + hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -399,6 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -408,8 +441,11 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) +#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32)) #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) +#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1) +#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ) #define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1) /* in byte mode, the lowest possible rate is one packet/second. @@ -425,15 +461,24 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) } /* Precision saver. */ -static u32 user2credits(u32 user) +static u64 user2credits(u64 user, int revision) { - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) - /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) *\ - HZ * CREDITS_PER_JIFFY_v1; + if (revision == 1) { + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) + /* Divide first. */ + return (user / XT_HASHLIMIT_SCALE) *\ + HZ * CREDITS_PER_JIFFY_v1; + + return (user * HZ * CREDITS_PER_JIFFY_v1) \ + / XT_HASHLIMIT_SCALE; + } else { + if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + return (user / XT_HASHLIMIT_SCALE_v2) *\ + HZ * CREDITS_PER_JIFFY; - return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2; + } } static u32 user2credits_byte(u32 user) @@ -443,10 +488,11 @@ static u32 user2credits_byte(u32 user) return (u32) (us >> 32); } -static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, + u32 mode, int revision) { unsigned long delta = now - dh->rateinfo.prev; - u32 cap; + u64 cap, cpj; if (delta == 0) return; @@ -454,7 +500,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) dh->rateinfo.prev = now; if (mode & XT_HASHLIMIT_BYTES) { - u32 tmp = dh->rateinfo.credit; + u64 tmp = dh->rateinfo.credit; dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; cap = CREDITS_PER_JIFFY_BYTES * HZ; if (tmp >= dh->rateinfo.credit) {/* overflow */ @@ -462,7 +508,9 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) return; } } else { - dh->rateinfo.credit += delta * CREDITS_PER_JIFFY_v1; + cpj = (revision == 1) ? + CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY; + dh->rateinfo.credit += delta * cpj; cap = dh->rateinfo.credit_cap; } if (dh->rateinfo.credit > cap) @@ -470,7 +518,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) } static void rateinfo_init(struct dsthash_ent *dh, - struct xt_hashlimit_htable *hinfo) + struct xt_hashlimit_htable *hinfo, int revision) { dh->rateinfo.prev = jiffies; if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { @@ -479,8 +527,8 @@ static void rateinfo_init(struct dsthash_ent *dh, dh->rateinfo.credit_cap = hinfo->cfg.burst; } else { dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + hinfo->cfg.burst, revision); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision); dh->rateinfo.credit_cap = dh->rateinfo.credit; } } @@ -604,15 +652,15 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) } static bool -hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, + struct xt_hashlimit_htable *hinfo, + const struct hashlimit_cfg2 *cfg, int revision) { - const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; - struct xt_hashlimit_htable *hinfo = info->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; bool race = false; - u32 cost; + u64 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -627,18 +675,18 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) } else if (race) { /* Already got an entry, update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } else { dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); + rateinfo_init(dh, hinfo, revision); } } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } - if (info->cfg.mode & XT_HASHLIMIT_BYTES) + if (cfg->mode & XT_HASHLIMIT_BYTES) cost = hashlimit_byte_cost(skb->len, dh); else cost = dh->rateinfo.cost; @@ -648,70 +696,126 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); - return !(info->cfg.mode & XT_HASHLIMIT_INVERT); + return !(cfg->mode & XT_HASHLIMIT_INVERT); } spin_unlock(&dh->lock); rcu_read_unlock_bh(); /* default match is underlimit - so over the limit, we need to invert */ - return info->cfg.mode & XT_HASHLIMIT_INVERT; + return cfg->mode & XT_HASHLIMIT_INVERT; hotdrop: par->hotdrop = true; return false; } -static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +static bool +hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + struct hashlimit_cfg2 cfg = {}; + int ret; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_common(skb, par, hinfo, &cfg, 1); +} + +static bool +hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + + return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2); +} + +static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, + struct xt_hashlimit_htable **hinfo, + struct hashlimit_cfg2 *cfg, + const char *name, int revision) { struct net *net = par->net; - struct xt_hashlimit_mtinfo1 *info = par->matchinfo; int ret; - if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) - return -EINVAL; - if (info->name[sizeof(info->name)-1] != '\0') + if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; if (par->family == NFPROTO_IPV4) { - if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) + if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; } else { - if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128) + if (cfg->srcmask > 128 || cfg->dstmask > 128) return -EINVAL; } - if (info->cfg.mode & ~XT_HASHLIMIT_ALL) { + if (cfg->mode & ~XT_HASHLIMIT_ALL) { pr_info("Unknown mode mask %X, kernel too old?\n", - info->cfg.mode); + cfg->mode); return -EINVAL; } /* Check for overflow. */ - if (info->cfg.mode & XT_HASHLIMIT_BYTES) { - if (user2credits_byte(info->cfg.avg) == 0) { - pr_info("overflow, rate too high: %u\n", info->cfg.avg); + if (cfg->mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(cfg->avg) == 0) { + pr_info("overflow, rate too high: %llu\n", cfg->avg); return -EINVAL; } - } else if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); + } else if (cfg->burst == 0 || + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); return -ERANGE; } mutex_lock(&hashlimit_mutex); - info->hinfo = htable_find_get(net, info->name, par->family); - if (info->hinfo == NULL) { - ret = htable_create_v1(net, info, par->family); + *hinfo = htable_find_get(net, name, par->family); + if (*hinfo == NULL) { + ret = htable_create(net, cfg, name, par->family, + hinfo, revision); if (ret < 0) { mutex_unlock(&hashlimit_mutex); return ret; } } mutex_unlock(&hashlimit_mutex); + return 0; } +static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct hashlimit_cfg2 cfg = {}; + int ret; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_check_common(par, &info->hinfo, + &cfg, info->name, 1); +} + +static int hashlimit_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, + info->name, 2); +} + static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -719,6 +823,13 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) htable_put(info->hinfo); } +static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + htable_put(info->hinfo); +} + static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", @@ -730,6 +841,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV4, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "hashlimit", @@ -741,6 +862,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy_v1, .me = THIS_MODULE, }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV6, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #endif }; @@ -787,18 +918,12 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, - struct seq_file *s) +static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; - - spin_lock(&ent->lock); - /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies, ht->cfg.mode); - switch (family) { case NFPROTO_IPV4: - seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", + seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip.src, ntohs(ent->dst.src_port), @@ -809,7 +934,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: - seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", + seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip6.src, ntohs(ent->dst.src_port), @@ -822,6 +947,34 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, default: BUG(); } +} + +static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1); + + dl_seq_print(ent, family, s); + + spin_unlock(&ent->lock); + return seq_has_overflowed(s); +} + +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2); + + dl_seq_print(ent, family, s); + spin_unlock(&ent->lock); return seq_has_overflowed(s); } @@ -840,6 +993,20 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) return 0; } +static int dl_seq_show(struct seq_file *s, void *v) +{ + struct xt_hashlimit_htable *htable = s->private; + unsigned int *bucket = (unsigned int *)v; + struct dsthash_ent *ent; + + if (!hlist_empty(&htable->hash[*bucket])) { + hlist_for_each_entry(ent, &htable->hash[*bucket], node) + if (dl_seq_real_show(ent, htable->family, s)) + return -1; + } + return 0; +} + static const struct seq_operations dl_seq_ops_v1 = { .start = dl_seq_start, .next = dl_seq_next, @@ -847,6 +1014,13 @@ static const struct seq_operations dl_seq_ops_v1 = { .show = dl_seq_show_v1 }; +static const struct seq_operations dl_seq_ops = { + .start = dl_seq_start, + .next = dl_seq_next, + .stop = dl_seq_stop, + .show = dl_seq_show +}; + static int dl_proc_open_v1(struct inode *inode, struct file *file) { int ret = seq_open(file, &dl_seq_ops_v1); @@ -858,6 +1032,18 @@ static int dl_proc_open_v1(struct inode *inode, struct file *file) return ret; } +static int dl_proc_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &dl_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + + sf->private = PDE_DATA(inode); + } + return ret; +} + static const struct file_operations dl_file_ops_v1 = { .owner = THIS_MODULE, .open = dl_proc_open_v1, @@ -866,6 +1052,14 @@ static const struct file_operations dl_file_ops_v1 = { .release = seq_release }; +static const struct file_operations dl_file_ops = { + .owner = THIS_MODULE, + .open = dl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + static int __net_init hashlimit_proc_net_init(struct net *net) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); -- cgit v1.2.3 From 0f3cd9b3697708c86a825ae3cedabf7be6fd3e72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Sep 2016 15:23:33 +0200 Subject: netfilter: nf_tables: add range expression Inverse ranges != [a,b] are not currently possible because rules are composites of && operations, and we need to express this: data < a || data > b This patch adds a new range expression. Positive ranges can be already through two cmp expressions: cmp(sreg, data, >=) cmp(sreg, data, <=) This new range expression provides an alternative way to express this. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 3 + include/uapi/linux/netfilter/nf_tables.h | 29 +++++++ net/netfilter/Makefile | 3 +- net/netfilter/nf_tables_core.c | 7 +- net/netfilter/nft_range.c | 138 +++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nft_range.c (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a9060dd99db7..00f4f6b1b1ba 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops; int nft_cmp_module_init(void); void nft_cmp_module_exit(void); +int nft_range_module_init(void); +void nft_range_module_exit(void); + int nft_lookup_module_init(void); void nft_lookup_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1cf41dd838b2..c6c4477c136b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,35 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + enum nft_lookup_flags { NFT_LOOKUP_F_INV = (1 << 0), }; diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0c8581100ac6..c23c3c84416f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o +nf_tables-objs += nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67259cefef06..7c94ce0080d5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -263,8 +263,13 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err7; - return 0; + err = nft_range_module_init(); + if (err < 0) + goto err8; + return 0; +err8: + nft_dynset_module_exit(); err7: nft_payload_module_exit(); err6: diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c new file mode 100644 index 000000000000..c6d5358482d1 --- /dev/null +++ b/net/netfilter/nft_range.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_range_expr { + struct nft_data data_from; + struct nft_data data_to; + enum nft_registers sreg:8; + u8 len; + enum nft_range_ops op:8; +}; + +static void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + bool mismatch; + int d1, d2; + + d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); + d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); + switch (priv->op) { + case NFT_RANGE_EQ: + mismatch = (d1 < 0 || d2 > 0); + break; + case NFT_RANGE_NEQ: + mismatch = (d1 >= 0 && d2 <= 0); + break; + } + + if (mismatch) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { + [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_OP] = { .type = NLA_U32 }, + [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, + [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_range_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc_from, desc_to; + int err; + + err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), + &desc_from, tb[NFTA_RANGE_FROM_DATA]); + if (err < 0) + return err; + + err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), + &desc_to, tb[NFTA_RANGE_TO_DATA]); + if (err < 0) + goto err1; + + if (desc_from.len != desc_to.len) { + err = -EINVAL; + goto err2; + } + + priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); + err = nft_validate_register_load(priv->sreg, desc_from.len); + if (err < 0) + goto err2; + + priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + priv->len = desc_from.len; + return 0; +err2: + nft_data_uninit(&priv->data_to, desc_to.type); +err1: + nft_data_uninit(&priv->data_from, desc_from.type); + return err; +} + +static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from, + NFT_DATA_VALUE, priv->len) < 0 || + nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_range_type; +static const struct nft_expr_ops nft_range_ops = { + .type = &nft_range_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), + .eval = nft_range_eval, + .init = nft_range_init, + .dump = nft_range_dump, +}; + +static struct nft_expr_type nft_range_type __read_mostly = { + .name = "range", + .ops = &nft_range_ops, + .policy = nft_range_policy, + .maxattr = NFTA_RANGE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_range_module_init(void) +{ + return nft_register_expr(&nft_range_type); +} + +void nft_range_module_exit(void) +{ + nft_unregister_expr(&nft_range_type); +} -- cgit v1.2.3 From ff107d27761ff4b644c82c209e004ec9c8fbbc22 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Sep 2016 16:35:56 +0800 Subject: netfilter: nft_log: complete NFTA_LOG_FLAGS attr support NFTA_LOG_FLAGS attribute is already supported, but the related NF_LOG_XXX flags are not exposed to the userspace. So we cannot explicitly enable log flags to log uid, tcp sequence, ip options and so on, i.e. such rule "nft add rule filter output log uid" is not supported yet. So move NF_LOG_XXX macro definitions to the uapi/../nf_log.h. In order to keep consistent with other modules, change NF_LOG_MASK to refer to all supported log flags. On the other hand, add a new NF_LOG_DEFAULT_MASK to refer to the original default log flags. Finally, if user specify the unsupported log flags or NFTA_LOG_GROUP and NFTA_LOG_FLAGS are set at the same time, report EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 11 +++-------- include/uapi/linux/netfilter/nf_log.h | 12 ++++++++++++ net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 4 ++-- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_log.c | 9 ++++++++- 10 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 include/uapi/linux/netfilter/nf_log.h (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index ee07dc8b0a7b..309cd267be4f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -2,15 +2,10 @@ #define _NF_LOG_H #include +#include -/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will - * disappear once iptables is replaced with pkttables. Please DO NOT use them - * for any new code! */ -#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ -#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ -#define NF_LOG_IPOPT 0x04 /* Log IP options */ -#define NF_LOG_UID 0x08 /* Log UID owning local socket */ -#define NF_LOG_MASK 0x0f +/* Log tcp sequence, tcp options, ip options and uid owning local socket */ +#define NF_LOG_DEFAULT_MASK 0x0f /* This flag indicates that copy_len field in nf_loginfo is set */ #define NF_LOG_F_COPY_LEN 0x1 diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h new file mode 100644 index 000000000000..8be21e02387d --- /dev/null +++ b/include/uapi/linux/netfilter/nf_log.h @@ -0,0 +1,12 @@ +#ifndef _NETFILTER_NF_LOG_H +#define _NETFILTER_NF_LOG_H + +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define NF_LOG_MASK 0x2f + +#endif /* _NETFILTER_NF_LOG_H */ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 152300d164ac..9a11086ba6ff 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, if (loginfo->type == NF_LOG_TYPE_LOG) bitmask = loginfo->u.log.logflags; else - bitmask = NF_LOG_MASK; + bitmask = NF_LOG_DEFAULT_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f993545a3373..7c00ce90adb8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = 4, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 8945c2653814..b24795e2ee6d 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 20f225593a8b..5b571e1b5f15 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 552fac2f390a..55aacea24396 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index c1bcf699a23d..f6aee2895fee 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7c94ce0080d5..0dd5c695482f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 24a73bb26e94..1b01404bb33f 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) + if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_FLAGS] != NULL) + return -EINVAL; + } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + if (li->u.log.logflags & ~NF_LOG_MASK) { + err = -EINVAL; + goto err1; + } } break; case NF_LOG_TYPE_ULOG: -- cgit v1.2.3