From 5ebb335dcbe63470c88c4f80f2d571089543b638 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 21 Mar 2015 15:19:15 +0000 Subject: netfilter: nf_tables: move struct net pointer to base chain The network namespace is only needed for base chains to get at the gencursor. Also convert to possible_net_t. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d756af559977..ace67a549b30 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -449,7 +449,6 @@ enum nft_chain_flags { * * @rules: list of rules in the chain * @list: used internally - * @net: net namespace that this chain belongs to * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain @@ -460,7 +459,6 @@ enum nft_chain_flags { struct nft_chain { struct list_head rules; struct list_head list; - struct net *net; struct nft_table *table; u64 handle; u32 use; @@ -512,6 +510,7 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -519,6 +518,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; + possible_net_t pnet; const struct nf_chain_type *type; u8 policy; struct nft_stats __percpu *stats; -- cgit v1.2.3 From 49f7b33e63fec9d16e7ee62ba8f8ab4159cbdc26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:45 +0000 Subject: rhashtable: provide len to obj_hashfn nftables sets will be converted to use so called setextensions, moving the key to a non-fixed position. To hash it, the obj_hashfn must be used, however it so far doesn't receive the length parameter. Pass the key length to obj_hashfn() and convert existing users. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/rhashtable.h | 6 ++++-- lib/rhashtable.c | 2 +- net/netlink/af_netlink.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99f2e49a8a07..e23d242d1230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -88,7 +88,7 @@ struct rhashtable_compare_arg { }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); @@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn( const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? - rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4b7b7e672b93..4898442b837f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -691,7 +691,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * struct rhash_head node; * }; * - * u32 my_hash_fn(const void *data, u32 seed) + * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4caa809dbbe0..19909d0786a2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3127,7 +3127,7 @@ static struct pernet_operations __net_initdata netlink_net_ops = { .exit = netlink_net_exit, }; -static inline u32 netlink_hash(const void *data, u32 seed) +static inline u32 netlink_hash(const void *data, u32 len, u32 seed) { const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; -- cgit v1.2.3 From 3ac4c07a24007f0f45d2082b745508768a8e21cf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:49 +0000 Subject: netfilter: nf_tables: add set extensions Add simple set extension infrastructure for maintaining variable sized and optional per element data. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 105 ++++++++++++++++++++++++++++++++++++++ net/netfilter/nf_tables_api.c | 16 ++++++ 2 files changed, 121 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ace67a549b30..038f8a67ca1f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -311,6 +311,111 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); +/** + * enum nft_set_extensions - set extension type IDs + * + * @NFT_SET_EXT_KEY: element key + * @NFT_SET_EXT_DATA: mapping data + * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_NUM: number of extension types + */ +enum nft_set_extensions { + NFT_SET_EXT_KEY, + NFT_SET_EXT_DATA, + NFT_SET_EXT_FLAGS, + NFT_SET_EXT_NUM +}; + +/** + * struct nft_set_ext_type - set extension type + * + * @len: fixed part length of the extension + * @align: alignment requirements of the extension + */ +struct nft_set_ext_type { + u8 len; + u8 align; +}; + +extern const struct nft_set_ext_type nft_set_ext_types[]; + +/** + * struct nft_set_ext_tmpl - set extension template + * + * @len: length of extension area + * @offset: offsets of individual extension types + */ +struct nft_set_ext_tmpl { + u16 len; + u8 offset[NFT_SET_EXT_NUM]; +}; + +/** + * struct nft_set_ext - set extensions + * + * @offset: offsets of individual extension types + * @data: beginning of extension data + */ +struct nft_set_ext { + u8 offset[NFT_SET_EXT_NUM]; + char data[0]; +}; + +static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) +{ + memset(tmpl, 0, sizeof(*tmpl)); + tmpl->len = sizeof(struct nft_set_ext); +} + +static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) +{ + tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); + BUG_ON(tmpl->len > U8_MAX); + tmpl->offset[id] = tmpl->len; + tmpl->len += nft_set_ext_types[id].len + len; +} + +static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +{ + nft_set_ext_add_length(tmpl, id, 0); +} + +static inline void nft_set_ext_init(struct nft_set_ext *ext, + const struct nft_set_ext_tmpl *tmpl) +{ + memcpy(ext->offset, tmpl->offset, sizeof(ext->offset)); +} + +static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return !!ext->offset[id]; +} + +static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) +{ + return ext && __nft_set_ext_exists(ext, id); +} + +static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id) +{ + return (void *)ext + ext->offset[id]; +} + +static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_KEY); +} + +static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_DATA); +} + +static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_FLAGS); +} /** * struct nft_expr_type - nf_tables expression type diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0b969b66cb77..972c47f6e823 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2827,6 +2827,22 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nf_tables_set_destroy(ctx, set); } +const struct nft_set_ext_type nft_set_ext_types[] = { + [NFT_SET_EXT_KEY] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_DATA] = { + .len = sizeof(struct nft_data), + .align = __alignof__(struct nft_data), + }, + [NFT_SET_EXT_FLAGS] = { + .len = sizeof(u8), + .align = __alignof__(u8), + }, +}; +EXPORT_SYMBOL_GPL(nft_set_ext_types); + /* * Set elements */ -- cgit v1.2.3 From fe2811ebeb97a7a76de0b2b35f13600169508393 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:50 +0000 Subject: netfilter: nf_tables: convert hash and rbtree to set extensions The set implementations' private struct will only contain the elements needed to maintain the search structure, all other elements are moved to the set extensions. Element allocation and initialization is performed centrally by nf_tables_api instead of by the different set implementations' ->insert() functions. A new "elemsize" member in the set ops specifies the amount of memory to reserve for internal usage. Destruction will also be moved out of the set implementations by a following patch. Except for element allocation, the patch is a simple conversion to using data from the extension area. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 14 +++-- net/netfilter/nf_tables_api.c | 119 ++++++++++++++++++++++++++++---------- net/netfilter/nft_hash.c | 56 +++++------------- net/netfilter/nft_rbtree.c | 64 +++++++------------- 4 files changed, 132 insertions(+), 121 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 038f8a67ca1f..ef3457c1cb62 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -140,8 +140,7 @@ struct nft_userdata { * * @cookie: implementation specific element cookie * @key: element key - * @data: element data (maps only) - * @flags: element flags (end of interval) + * @priv: element private data and extensions * * The cookie can be used to store a handle to the element for subsequent * removal. @@ -149,8 +148,7 @@ struct nft_userdata { struct nft_set_elem { void *cookie; struct nft_data key; - struct nft_data data; - u32 flags; + void *priv; }; struct nft_set; @@ -214,6 +212,7 @@ struct nft_set_estimate { * @destroy: destroy private data of set instance * @list: nf_tables_set_ops list node * @owner: module reference + * @elemsize: element private size * @features: features supported by the implementation */ struct nft_set_ops { @@ -241,6 +240,7 @@ struct nft_set_ops { struct list_head list; struct module *owner; + unsigned int elemsize; u32 features; }; @@ -417,6 +417,12 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } +static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, + void *elem) +{ + return elem + set->ops->elemsize; +} + /** * struct nft_expr_type - nf_tables expression type * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 972c47f6e823..99cb884b985f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2771,10 +2771,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, + return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE); } @@ -2889,6 +2890,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -2896,20 +2898,20 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, - set->klen) < 0) + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext), + NFT_DATA_VALUE, set->klen) < 0) goto nla_put_failure; - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && - nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, set->dlen) < 0) goto nla_put_failure; - if (elem->flags != 0) - if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, + htonl(*nft_set_ext_flags(ext)))) + goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -3130,15 +3132,42 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +static void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + gfp_t gfp) +{ + struct nft_set_ext *ext; + void *elem; + + elem = kzalloc(set->ops->elemsize + tmpl->len, gfp); + if (elem == NULL) + return NULL; + + ext = nft_set_elem_ext(set, elem); + nft_set_ext_init(ext, tmpl); + + memcpy(nft_set_ext_key(ext), key, set->klen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + memcpy(nft_set_ext_data(ext), data, set->dlen); + + return elem; +} + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u32 flags; int err; if (set->size && set->nelems == set->size) @@ -3152,22 +3181,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) return -EINVAL; - elem.flags = 0; + nft_set_ext_prepare(&tmpl); + + flags = 0; if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { - elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); - if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (flags & ~NFT_SET_ELEM_INTERVAL_END) return -EINVAL; if (!(set->flags & NFT_SET_INTERVAL) && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if (flags != 0) + nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && - !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; if (nla[NFTA_SET_ELEM_DATA] != NULL && - elem.flags & NFT_SET_ELEM_INTERVAL_END) + flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) @@ -3185,8 +3218,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (set->ops->get(set, &elem) == 0) goto err2; + nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); + if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; @@ -3203,29 +3238,42 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, }; err = nft_validate_data_load(&bind_ctx, dreg, - &elem.data, d2.type); + &data, d2.type); if (err < 0) goto err3; } + + nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); } + err = -ENOMEM; + elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); + if (elem.priv == NULL) + goto err3; + + ext = nft_set_elem_ext(set, elem.priv); + if (flags) + *nft_set_ext_flags(ext) = flags; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) - goto err3; + goto err4; err = set->ops->insert(set, &elem); if (err < 0) - goto err4; + goto err5; nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; -err4: +err5: kfree(trans); +err4: + kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&elem.data, d2.type); + nft_data_uninit(&data, d2.type); err2: nft_data_uninit(&elem.key, d1.type); err1: @@ -3557,6 +3605,7 @@ static int nf_tables_commit(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; + struct nft_set_ext *ext; /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -3641,14 +3690,16 @@ static int nf_tables_commit(struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + ext = nft_set_elem_ext(te->set, te->elem.priv); + nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); te->set->ops->get(te->set, &te->elem); nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), + te->set->dtype); te->set->ops->remove(te->set, &te->elem); nft_trans_destroy(trans); break; @@ -3691,6 +3742,7 @@ static int nf_tables_abort(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; + struct nft_set_ext *ext; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3752,11 +3804,13 @@ static int nf_tables_abort(struct sk_buff *skb) case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; + ext = nft_set_elem_ext(te->set, te->elem.priv); + te->set->ops->get(te->set, &te->elem); nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->set->flags & NFT_SET_MAP && - !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(&te->elem.data, te->set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), + te->set->dtype); te->set->ops->remove(te->set, &te->elem); nft_trans_destroy(trans); break; @@ -3836,13 +3890,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { - if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_data *data; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) return 0; - switch (elem->data.verdict) { + data = nft_set_ext_data(ext); + switch (data->verdict) { case NFT_JUMP: case NFT_GOTO: - return nf_tables_check_loops(ctx, elem->data.chain); + return nf_tables_check_loops(ctx, data->chain); default: return 0; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index dc96a7e94f80..15951a823d1d 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -29,8 +29,7 @@ struct nft_hash { struct nft_hash_elem { struct rhash_head node; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; struct nft_hash_cmp_arg { @@ -51,7 +50,7 @@ static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) { const struct nft_hash_elem *he = data; - return jhash(&he->key, len, seed); + return jhash(nft_set_ext_key(&he->ext), len, seed); } static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, @@ -60,7 +59,7 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, const struct nft_hash_cmp_arg *x = arg->key; const struct nft_hash_elem *he = ptr; - if (nft_data_cmp(&he->key, x->key, x->set->klen)) + if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; return 0; } @@ -78,7 +77,7 @@ static bool nft_hash_lookup(const struct nft_set *set, he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); + nft_data_copy(data, nft_set_ext_data(&he->ext)); return !!he; } @@ -87,43 +86,22 @@ static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_hash_elem *he = elem->priv; struct nft_hash_cmp_arg arg = { .set = set, .key = &elem->key, }; - unsigned int size; - int err; - - if (elem->flags != 0) - return -EINVAL; - - size = sizeof(*he); - if (set->flags & NFT_SET_MAP) - size += sizeof(he->data[0]); - - he = kzalloc(size, GFP_KERNEL); - if (he == NULL) - return -ENOMEM; - - nft_data_copy(&he->key, &elem->key); - if (set->flags & NFT_SET_MAP) - nft_data_copy(he->data, &elem->data); - - err = rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); - if (err) - kfree(he); - return err; + return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); } static void nft_hash_elem_destroy(const struct nft_set *set, struct nft_hash_elem *he) { - nft_data_uninit(&he->key, NFT_DATA_VALUE); + nft_data_uninit(nft_set_ext_key(&he->ext), NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) - nft_data_uninit(he->data, set->dtype); + nft_data_uninit(nft_set_ext_data(&he->ext), set->dtype); kfree(he); } @@ -150,10 +128,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) if (!he) return -ENOENT; - elem->cookie = he; - elem->flags = 0; - if (set->flags & NFT_SET_MAP) - nft_data_copy(&elem->data, he->data); + elem->priv = he; return 0; } @@ -162,7 +137,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_elem *he; + struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; int err; @@ -192,10 +167,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; + elem.priv = he; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -254,9 +226,6 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, unsigned int esize; esize = sizeof(struct nft_hash_elem); - if (features & NFT_SET_MAP) - esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); - if (desc->size) { est->size = sizeof(struct nft_hash) + roundup_pow_of_two(desc->size * 4 / 3) * @@ -278,6 +247,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 2c75361077f7..ebf6e60df41c 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -26,9 +26,7 @@ struct nft_rbtree { struct nft_rbtree_elem { struct rb_node node; - u16 flags; - struct nft_data key; - struct nft_data data[]; + struct nft_set_ext ext; }; static bool nft_rbtree_lookup(const struct nft_set *set, @@ -45,7 +43,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = parent->rb_left; interval = rbe; @@ -53,10 +51,12 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: - if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(&rbe->ext) & + NFT_SET_ELEM_INTERVAL_END) goto out; if (set->flags & NFT_SET_MAP) - nft_data_copy(data, rbe->data); + nft_data_copy(data, nft_set_ext_data(&rbe->ext)); spin_unlock_bh(&nft_rbtree_lock); return true; @@ -75,10 +75,10 @@ out: static void nft_rbtree_elem_destroy(const struct nft_set *set, struct nft_rbtree_elem *rbe) { - nft_data_uninit(&rbe->key, NFT_DATA_VALUE); + nft_data_uninit(nft_set_ext_key(&rbe->ext), NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_uninit(rbe->data, set->dtype); + nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(&rbe->ext), set->dtype); kfree(rbe); } @@ -96,7 +96,9 @@ static int __nft_rbtree_insert(const struct nft_set *set, while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &new->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), + nft_set_ext_key(&new->ext), + set->klen); if (d < 0) p = &parent->rb_left; else if (d > 0) @@ -112,31 +114,13 @@ static int __nft_rbtree_insert(const struct nft_set *set, static int nft_rbtree_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe; - unsigned int size; + struct nft_rbtree_elem *rbe = elem->priv; int err; - size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP && - !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) - size += sizeof(rbe->data[0]); - - rbe = kzalloc(size, GFP_KERNEL); - if (rbe == NULL) - return -ENOMEM; - - rbe->flags = elem->flags; - nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(rbe->data, &elem->data); - spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); - if (err < 0) - kfree(rbe); - spin_unlock_bh(&nft_rbtree_lock); + return err; } @@ -162,17 +146,15 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) parent = parent->rb_right; else { elem->cookie = rbe; - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem->data, rbe->data); - elem->flags = rbe->flags; + elem->priv = rbe; return 0; } } @@ -184,7 +166,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_rbtree *priv = nft_set_priv(set); - const struct nft_rbtree_elem *rbe; + struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; @@ -194,11 +176,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, goto cont; rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP && - !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) - nft_data_copy(&elem.data, rbe->data); - elem.flags = rbe->flags; + elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -245,9 +223,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, unsigned int nsize; nsize = sizeof(struct nft_rbtree_elem); - if (features & NFT_SET_MAP) - nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); - if (desc->size) est->size = sizeof(struct nft_rbtree) + desc->size * nsize; else @@ -260,6 +235,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, -- cgit v1.2.3 From 61edafbb47e9f46fb850035b1f8f062564445704 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:47 +0000 Subject: netfilter: nf_tables: consolide set element destruction With the conversion to set extensions, it is now possible to consolidate the different set element destruction functions. The set implementations' ->remove() functions are changed to only take the element out of their internal data structures. Elements will be freed in a batched fashion after the global transaction's completion RCU grace period. This reduces the amount of grace periods required for nft_hash from N to zero additional ones, additionally this guarantees that the set elements' extensions of all implementations can be used under RCU protection. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++++-------------- net/netfilter/nft_hash.c | 18 ++++-------------- net/netfilter/nft_rbtree.c | 14 +------------- 4 files changed, 27 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ef3457c1cb62..6ac63323afd2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -423,6 +423,8 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +void nft_set_elem_destroy(const struct nft_set *set, void *elem); + /** * struct nft_expr_type - nf_tables expression type * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 99cb884b985f..b35512f1934c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3155,6 +3155,18 @@ static void *nft_set_elem_init(const struct nft_set *set, return elem; } +void nft_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_uninit(nft_set_ext_data(ext), set->dtype); + + kfree(elem); +} +EXPORT_SYMBOL_GPL(nft_set_elem_destroy); + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3596,6 +3608,10 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_DELSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3605,7 +3621,6 @@ static int nf_tables_commit(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; - struct nft_set_ext *ext; /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -3690,18 +3705,12 @@ static int nf_tables_commit(struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - ext = nft_set_elem_ext(te->set, te->elem.priv); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), - te->set->dtype); te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); break; } } @@ -3733,6 +3742,10 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWSET: nft_set_destroy(nft_trans_set(trans)); break; + case NFT_MSG_NEWSETELEM: + nft_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); + break; } kfree(trans); } @@ -3742,7 +3755,6 @@ static int nf_tables_abort(struct sk_buff *skb) struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; - struct nft_set_ext *ext; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3804,15 +3816,9 @@ static int nf_tables_abort(struct sk_buff *skb) case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; - ext = nft_set_elem_ext(te->set, te->elem.priv); te->set->ops->get(te->set, &te->elem); - nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), - te->set->dtype); te->set->ops->remove(te->set, &te->elem); - nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: nft_trans_elem_set(trans)->nelems++; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 15951a823d1d..94bf25def37f 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -96,23 +96,12 @@ static int nft_hash_insert(const struct nft_set *set, nft_hash_params); } -static void nft_hash_elem_destroy(const struct nft_set *set, - struct nft_hash_elem *he) -{ - nft_data_uninit(nft_set_ext_key(&he->ext), NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(nft_set_ext_data(&he->ext), set->dtype); - kfree(he); -} - static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params); - synchronize_rcu(); - kfree(elem->cookie); } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) @@ -208,16 +197,17 @@ static int nft_hash_init(const struct nft_set *set, return rhashtable_init(&priv->ht, ¶ms); } -static void nft_free_element(void *ptr, void *arg) +static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_hash_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr); } static void nft_hash_destroy(const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); - rhashtable_free_and_destroy(&priv->ht, nft_free_element, (void *)set); + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + (void *)set); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index ebf6e60df41c..332c6afc77e9 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -72,17 +72,6 @@ out: return false; } -static void nft_rbtree_elem_destroy(const struct nft_set *set, - struct nft_rbtree_elem *rbe) -{ - nft_data_uninit(nft_set_ext_key(&rbe->ext), NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP && - nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(&rbe->ext), set->dtype); - - kfree(rbe); -} - static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree_elem *new) { @@ -133,7 +122,6 @@ static void nft_rbtree_remove(const struct nft_set *set, spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); - kfree(rbe); } static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) @@ -213,7 +201,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_rbtree_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe); } } -- cgit v1.2.3 From b2832dd6621bf73eb8ad38389a94bd83a5983886 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:48 +0000 Subject: netfilter: nf_tables: return set extensions from ->lookup() Return the extension area from the ->lookup() function to allow to consolidate common actions. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- net/netfilter/nft_hash.c | 6 +++--- net/netfilter/nft_lookup.c | 6 +++++- net/netfilter/nft_rbtree.c | 7 +++---- 4 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6ac63323afd2..f190d26bda7d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -200,6 +200,8 @@ struct nft_set_estimate { enum nft_set_class class; }; +struct nft_set_ext; + /** * struct nft_set_ops - nf_tables set operations * @@ -218,7 +220,7 @@ struct nft_set_estimate { struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data); + const struct nft_set_ext **ext); int (*get)(const struct nft_set *set, struct nft_set_elem *elem); int (*insert)(const struct nft_set *set, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 94bf25def37f..5bee82195ef5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -66,7 +66,7 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data) + const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; @@ -76,8 +76,8 @@ static bool nft_hash_lookup(const struct nft_set *set, }; he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he && set->flags & NFT_SET_MAP) - nft_data_copy(data, nft_set_ext_data(&he->ext)); + if (he != NULL) + *ext = &he->ext; return !!he; } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9615b8b9fb37..a5f30b8760ea 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -31,9 +31,13 @@ static void nft_lookup_eval(const struct nft_expr *expr, { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; - if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + if (set->ops->lookup(set, &data[priv->sreg], &ext)) { + if (set->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext)); return; + } data[NFT_REG_VERDICT].verdict = NFT_BREAK; } diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 332c6afc77e9..cbba755ebebc 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -31,7 +31,7 @@ struct nft_rbtree_elem { static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_data *key, - struct nft_data *data) + const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; @@ -55,10 +55,9 @@ found: *nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) goto out; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, nft_set_ext_data(&rbe->ext)); - spin_unlock_bh(&nft_rbtree_lock); + + *ext = &rbe->ext; return true; } } -- cgit v1.2.3 From ea4bd995b0f2fc5677ff8085e92a5d2544b9937c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:49 +0000 Subject: netfilter: nf_tables: add transaction helper functions Add some helper functions for building the genmask as preparation for set transactions. Also add a little documentation how this stuff actually works. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 28 ++++++++++++++++++++++++++++ net/netfilter/nf_tables_api.c | 17 ++++++----------- net/netfilter/nf_tables_core.c | 6 +----- 3 files changed, 35 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f190d26bda7d..4c46a325874e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -720,6 +720,34 @@ void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +/* + * The gencursor defines two generations, the currently active and the + * next one. Objects contain a bitmask of 2 bits specifying the generations + * they're active in. A set bit means they're inactive in the generation + * represented by that bit. + * + * New objects start out as inactive in the current and active in the + * next generation. When committing the ruleset the bitmask is cleared, + * meaning they're active in all generations. When removing an object, + * it is set inactive in the next generation. After committing the ruleset, + * the objects are removed. + */ +static inline unsigned int nft_gencursor_next(const struct net *net) +{ + return net->nft.gencursor + 1 == 1 ? 1 : 0; +} + +static inline u8 nft_genmask_next(const struct net *net) +{ + return 1 << nft_gencursor_next(net); +} + +static inline u8 nft_genmask_cur(const struct net *net) +{ + /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */ + return 1 << ACCESS_ONCE(net->nft.gencursor); +} + /** * struct nft_trans - nf_tables object update in transaction * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b35512f1934c..66fa5e935a55 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx) static inline bool nft_rule_is_active(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << net->nft.gencursor)) == 0; -} - -static inline int gencursor_next(struct net *net) -{ - return net->nft.gencursor+1 == 1 ? 1 : 0; + return (rule->genmask & nft_genmask_cur(net)) == 0; } static inline int nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) { - return (rule->genmask & (1 << gencursor_next(net))) == 0; + return (rule->genmask & nft_genmask_next(net)) == 0; } static inline void nft_rule_activate_next(struct net *net, struct nft_rule *rule) { /* Now inactive, will be active in the future */ - rule->genmask = (1 << net->nft.gencursor); + rule->genmask = nft_genmask_cur(net); } static inline void nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) { - rule->genmask = (1 << gencursor_next(net)); + rule->genmask = nft_genmask_next(net); } static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) { - rule->genmask &= ~(1 << gencursor_next(net)); + rule->genmask &= ~nft_genmask_next(net); } static int @@ -3626,7 +3621,7 @@ static int nf_tables_commit(struct sk_buff *skb) while (++net->nft.base_seq == 0); /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); + net->nft.gencursor = nft_gencursor_next(net); /* Make sure all packets have left the previous generation before * purging old rules. diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 4429008fe99d..ef4dfcbaf149 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -121,11 +121,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_stats *stats; int rulenum; - /* - * Cache cursor to avoid problems in case that the cursor is updated - * while traversing the ruleset. - */ - unsigned int gencursor = ACCESS_ONCE(net->nft.gencursor); + unsigned int gencursor = nft_genmask_cur(net); do_chain: rulenum = 0; -- cgit v1.2.3 From cc02e457bb86f7b6ffee3651bab22d104b60effb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 14:08:50 +0000 Subject: netfilter: nf_tables: implement set transaction support Set elements are the last object type not supporting transaction support. Implement similar to the existing rule transactions: The global transaction counter keeps track of two generations, current and next. Each element contains a bitmask specifying in which generations it is inactive. New elements start out as inactive in the current generation and active in the next. On commit, the previous next generation becomes the current generation and the element becomes active. The bitmask is then cleared to indicate that the element is active in all future generations. If the transaction is aborted, the element is removed from the set before it becomes active. When removing an element, it gets marked as inactive in the next generation. On commit the next generation becomes active and the therefor the element inactive. It is then taken out of then set and released. On abort, the element is marked as active for the next generation again. Lookups ignore elements not active in the current generation. The current set types (hash/rbtree) both use a field in the extension area to store the generation mask. This (currently) does not require any additional memory since we have some free space in there. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 33 +++++++++++++++++++++------ net/netfilter/nf_tables_api.c | 33 ++++++++++++++++----------- net/netfilter/nft_hash.c | 38 +++++++++++++++++++++++-------- net/netfilter/nft_rbtree.c | 48 +++++++++++++++++++++++++++++++-------- 4 files changed, 112 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4c46a325874e..b8cd60dcb4e1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -138,15 +138,10 @@ struct nft_userdata { /** * struct nft_set_elem - generic representation of set elements * - * @cookie: implementation specific element cookie * @key: element key * @priv: element private data and extensions - * - * The cookie can be used to store a handle to the element for subsequent - * removal. */ struct nft_set_elem { - void *cookie; struct nft_data key; void *priv; }; @@ -207,6 +202,8 @@ struct nft_set_ext; * * @lookup: look up an element within the set * @insert: insert new element into set + * @activate: activate new element in the next generation + * @deactivate: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -221,10 +218,12 @@ struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext); - int (*get)(const struct nft_set *set, - struct nft_set_elem *elem); int (*insert)(const struct nft_set *set, const struct nft_set_elem *elem); + void (*activate)(const struct nft_set *set, + const struct nft_set_elem *elem); + void * (*deactivate)(const struct nft_set *set, + const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, @@ -261,6 +260,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @nelems: number of elements * @policy: set parameterization (see enum nft_set_policies) * @ops: set ops + * @pnet: network namespace * @flags: set flags * @klen: key length * @dlen: data length @@ -277,6 +277,7 @@ struct nft_set { u16 policy; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; + possible_net_t pnet; u16 flags; u8 klen; u8 dlen; @@ -355,10 +356,12 @@ struct nft_set_ext_tmpl { /** * struct nft_set_ext - set extensions * + * @genmask: generation mask * @offset: offsets of individual extension types * @data: beginning of extension data */ struct nft_set_ext { + u8 genmask; u8 offset[NFT_SET_EXT_NUM]; char data[0]; }; @@ -748,6 +751,22 @@ static inline u8 nft_genmask_cur(const struct net *net) return 1 << ACCESS_ONCE(net->nft.gencursor); } +/* + * Set element transaction helpers + */ + +static inline bool nft_set_elem_active(const struct nft_set_ext *ext, + u8 genmask) +{ + return !(ext->genmask & genmask); +} + +static inline void nft_set_elem_change_active(const struct nft_set *set, + struct nft_set_ext *ext) +{ + ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); +} + /** * struct nft_trans - nf_tables object update in transaction * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 66fa5e935a55..5604c2df05d1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2690,6 +2690,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, goto err2; INIT_LIST_HEAD(&set->bindings); + write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -3221,10 +3222,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - err = -EEXIST; - if (set->ops->get(set, &elem) == 0) - goto err2; - nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -3266,6 +3263,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (trans == NULL) goto err4; + ext->genmask = nft_genmask_cur(ctx->net); err = set->ops->insert(set, &elem); if (err < 0) goto err5; @@ -3353,19 +3351,24 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) goto err2; - err = set->ops->get(set, &elem); - if (err < 0) - goto err2; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) { err = -ENOMEM; goto err2; } + elem.priv = set->ops->deactivate(set, &elem); + if (elem.priv == NULL) { + err = -ENOENT; + goto err3; + } + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; + +err3: + kfree(trans); err2: nft_data_uninit(&elem.key, desc.type); err1: @@ -3692,9 +3695,11 @@ static int nf_tables_commit(struct sk_buff *skb) NFT_MSG_DELSET, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + + te->set->ops->activate(te->set, &te->elem); + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_NEWSETELEM, 0); nft_trans_destroy(trans); break; @@ -3704,7 +3709,6 @@ static int nf_tables_commit(struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->get(te->set, &te->elem); te->set->ops->remove(te->set, &te->elem); break; } @@ -3812,11 +3816,14 @@ static int nf_tables_abort(struct sk_buff *skb) nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; - te->set->ops->get(te->set, &te->elem); te->set->ops->remove(te->set, &te->elem); break; case NFT_MSG_DELSETELEM: + te = (struct nft_trans_elem *)trans->data; + nft_trans_elem_set(trans)->nelems++; + te->set->ops->activate(te->set, &te->elem); + nft_trans_destroy(trans); break; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 5bee82195ef5..c7e1a9d7d46f 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -35,6 +35,7 @@ struct nft_hash_elem { struct nft_hash_cmp_arg { const struct nft_set *set; const struct nft_data *key; + u8 genmask; }; static const struct rhashtable_params nft_hash_params; @@ -61,6 +62,8 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; return 0; } @@ -71,6 +74,7 @@ static bool nft_hash_lookup(const struct nft_set *set, struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(read_pnet(&set->pnet)), .set = set, .key = key, }; @@ -88,6 +92,7 @@ static int nft_hash_insert(const struct nft_set *set, struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he = elem->priv; struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, .key = &elem->key, }; @@ -96,30 +101,39 @@ static int nft_hash_insert(const struct nft_set *set, nft_hash_params); } -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_hash_activate(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; - rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params); + nft_set_elem_change_active(set, &he->ext); } -static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +static void *nft_hash_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, .key = &elem->key, }; he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (!he) - return -ENOENT; + if (he != NULL) + nft_set_elem_change_active(set, &he->ext); - elem->priv = he; + return he; +} - return 0; +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, @@ -129,6 +143,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti); @@ -155,6 +170,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&he->ext, genmask)) + goto cont; elem.priv = he; @@ -241,8 +258,9 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, - .get = nft_hash_get, .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index cbba755ebebc..42d0ca45fb9e 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -29,6 +29,7 @@ struct nft_rbtree_elem { struct nft_set_ext ext; }; + static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext) @@ -36,6 +37,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; const struct rb_node *parent; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; spin_lock_bh(&nft_rbtree_lock); @@ -51,6 +53,10 @@ static bool nft_rbtree_lookup(const struct nft_set *set, parent = parent->rb_right; else { found: + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) @@ -77,6 +83,7 @@ static int __nft_rbtree_insert(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -91,8 +98,11 @@ static int __nft_rbtree_insert(const struct nft_set *set, p = &parent->rb_left; else if (d > 0) p = &parent->rb_right; - else - return -EEXIST; + else { + if (nft_set_elem_active(&rbe->ext, genmask)) + return -EEXIST; + p = &parent->rb_left; + } } rb_link_node(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); @@ -116,18 +126,28 @@ static void nft_rbtree_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->cookie; + struct nft_rbtree_elem *rbe = elem->priv; spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); spin_unlock_bh(&nft_rbtree_lock); } -static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +static void nft_rbtree_activate(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe = elem->priv; + + nft_set_elem_change_active(set, &rbe->ext); +} + +static void *nft_rbtree_deactivate(const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int d; while (parent != NULL) { @@ -140,12 +160,15 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) else if (d > 0) parent = parent->rb_right; else { - elem->cookie = rbe; - elem->priv = rbe; - return 0; + if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; + } + nft_set_elem_change_active(set, &rbe->ext); + return rbe; } } - return -ENOENT; + return NULL; } static void nft_rbtree_walk(const struct nft_ctx *ctx, @@ -156,13 +179,17 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; + u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (iter->count < iter->skip) goto cont; + if (!nft_set_elem_active(&rbe->ext, genmask)) + goto cont; - rbe = rb_entry(node, struct nft_rbtree_elem, node); elem.priv = rbe; iter->err = iter->fn(ctx, set, iter, &elem); @@ -228,7 +255,8 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, - .get = nft_rbtree_get, + .deactivate = nft_rbtree_deactivate, + .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP, -- cgit v1.2.3