From 9fd6452d67fb2acda12e5914e2ad371f067f3465 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 21 May 2017 12:52:55 +0200 Subject: netfilter: conntrack: rename nf_ct_iterate_cleanup There are several places where we needlesly call nf_ct_iterate_cleanup, we should instead iterate the full table at module unload time. This is a leftover from back when the conntrack table got duplicated per net namespace. So rename nf_ct_iterate_cleanup to nf_ct_iterate_cleanup_net. A later patch will then add a non-net variant. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8ece3612d0cd..f21180ea4558 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -225,9 +225,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report); struct nf_conntrack_zone; -- cgit v1.2.3 From 2843fb69980b84dfa939733c91dceae533aa89e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 21 May 2017 12:52:57 +0200 Subject: netfilter: conntrack: add nf_ct_iterate_destroy sledgehammer to be used on module unload (to remove affected conntracks from all namespaces). It will also flag all unconfirmed conntracks as dying, i.e. they will not be committed to main table. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 4 ++ net/netfilter/nf_conntrack_core.c | 87 ++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f21180ea4558..48407569585d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -229,6 +229,10 @@ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report); +/* also set unconfirmed conntracks as dying. Only use in module exit path. */ +void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), + void *data); + struct nf_conntrack_zone; void nf_conntrack_free(struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 08733685d732..7ecee79c78b8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1586,7 +1586,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; @@ -1603,8 +1603,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); - if (net_eq(nf_ct_net(ct), net) && - iter(ct, data)) + if (iter(ct, data)) goto found; } } @@ -1621,6 +1620,39 @@ found: return ct; } +static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report) +{ + struct nf_conn *ct; + unsigned int bucket = 0; + + might_sleep(); + + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + /* Time to push up daises... */ + + nf_ct_delete(ct, portid, report); + nf_ct_put(ct); + cond_resched(); + } +} + +struct iter_data { + int (*iter)(struct nf_conn *i, void *data); + void *data; + struct net *net; +}; + +static int iter_net_only(struct nf_conn *i, void *data) +{ + struct iter_data *d = data; + + if (!net_eq(d->net, nf_ct_net(i))) + return 0; + + return d->iter(i, d->data); +} + static void __nf_ct_unconfirmed_destroy(struct net *net) { @@ -1653,8 +1685,7 @@ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report) { - struct nf_conn *ct; - unsigned int bucket = 0; + struct iter_data d; might_sleep(); @@ -1663,21 +1694,51 @@ void nf_ct_iterate_cleanup_net(struct net *net, __nf_ct_unconfirmed_destroy(net); + d.iter = iter; + d.data = data; + d.net = net; + synchronize_net(); - while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { - /* Time to push up daises... */ + nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); +} +EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); - nf_ct_delete(ct, portid, report); - nf_ct_put(ct); - cond_resched(); +/** + * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table + * @iter: callback to invoke for each conntrack + * @data: data to pass to @iter + * + * Like nf_ct_iterate_cleanup, but first marks conntracks on the + * unconfirmed list as dying (so they will not be inserted into + * main table). + */ +void +nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) +{ + struct net *net; + + rtnl_lock(); + for_each_net(net) { + if (atomic_read(&net->ct.count) == 0) + continue; + __nf_ct_unconfirmed_destroy(net); } + rtnl_unlock(); + + /* a conntrack could have been unlinked from unconfirmed list + * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). + * This makes sure its inserted into conntrack table. + */ + synchronize_net(); + + nf_ct_iterate_cleanup(iter, data, 0, 0); } -EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); +EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); static int kill_all(struct nf_conn *i, void *data) { - return 1; + return net_eq(nf_ct_net(i), data); } void nf_ct_free_hashtable(void *hash, unsigned int size) @@ -1742,7 +1803,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { - nf_ct_iterate_cleanup_net(net, kill_all, NULL, 0, 0); + nf_ct_iterate_cleanup(kill_all, net, 0, 0); if (atomic_read(&net->ct.count) != 0) busy = 1; } -- cgit v1.2.3 From 2b664957c27fe708035b217c908edd1048be355e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 22 May 2017 17:47:51 +0100 Subject: netfilter: nf_tables: select set backend flavour depending on description This patch adds the infrastructure to support several implementations of the same set type. This selection will be based on the set description and the features available for this set. This allow us to select set backend implementation that will result in better performance numbers. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 26 +++++++++++++---- net/netfilter/nf_tables_api.c | 59 ++++++++++++++++++++++++--------------- net/netfilter/nft_set_bitmap.c | 10 +++++-- net/netfilter/nft_set_hash.c | 10 +++++-- net/netfilter/nft_set_rbtree.c | 10 +++++-- 5 files changed, 80 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8a8bab8d7b15..f27012098846 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -281,6 +281,23 @@ struct nft_set_estimate { enum nft_set_class space; }; +/** + * struct nft_set_type - nf_tables set type + * + * @select_ops: function to select nft_set_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @owner: module reference + */ +struct nft_set_type { + const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, + const struct nft_set_desc *desc, + u32 flags); + const struct nft_set_ops *ops; + struct list_head list; + struct module *owner; +}; + struct nft_set_ext; struct nft_expr; @@ -297,8 +314,6 @@ struct nft_expr; * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance - * @list: nf_tables_set_ops list node - * @owner: module reference * @elemsize: element private size * @features: features supported by the implementation */ @@ -345,14 +360,13 @@ struct nft_set_ops { const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); - struct list_head list; - struct module *owner; unsigned int elemsize; u32 features; + const struct nft_set_type *type; }; -int nft_register_set(struct nft_set_ops *ops); -void nft_unregister_set(struct nft_set_ops *ops); +int nft_register_set(struct nft_set_type *type); +void nft_unregister_set(struct nft_set_type *type); /** * struct nft_set - nf_tables set instance diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index da314be0c048..c0b2b19607e1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2377,64 +2377,77 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, * Sets */ -static LIST_HEAD(nf_tables_set_ops); +static LIST_HEAD(nf_tables_set_types); -int nft_register_set(struct nft_set_ops *ops) +int nft_register_set(struct nft_set_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&ops->list, &nf_tables_set_ops); + list_add_tail_rcu(&type->list, &nf_tables_set_types); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_set); -void nft_unregister_set(struct nft_set_ops *ops) +void nft_unregister_set(struct nft_set_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del_rcu(&ops->list); + list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_set); +#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ + NFT_SET_TIMEOUT | NFT_SET_OBJECT) + +static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags) +{ + return (flags & ops->features) == (flags & NFT_SET_FEATURES); +} + /* * Select a set implementation based on the data characteristics and the * given policy. The total memory use might not be known if no size is * given, in that case the amount of memory per element is used. */ static const struct nft_set_ops * -nft_select_set_ops(const struct nlattr * const nla[], +nft_select_set_ops(const struct nft_ctx *ctx, + const struct nlattr * const nla[], const struct nft_set_desc *desc, enum nft_set_policies policy) { const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; - u32 features; + const struct nft_set_type *type; + u32 flags = 0; #ifdef CONFIG_MODULES - if (list_empty(&nf_tables_set_ops)) { + if (list_empty(&nf_tables_set_types)) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-set"); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (!list_empty(&nf_tables_set_ops)) + if (!list_empty(&nf_tables_set_types)) return ERR_PTR(-EAGAIN); } #endif - features = 0; - if (nla[NFTA_SET_FLAGS] != NULL) { - features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT | - NFT_SET_OBJECT; - } + if (nla[NFTA_SET_FLAGS] != NULL) + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); bops = NULL; best.size = ~0; best.lookup = ~0; best.space = ~0; - list_for_each_entry(ops, &nf_tables_set_ops, list) { - if ((ops->features & features) != features) + list_for_each_entry(type, &nf_tables_set_types, list) { + if (!type->select_ops) + ops = type->ops; + else + ops = type->select_ops(ctx, desc, flags); + if (!ops) + continue; + + if (!nft_set_ops_candidate(ops, flags)) continue; - if (!ops->estimate(desc, features, &est)) + if (!ops->estimate(desc, flags, &est)) continue; switch (policy) { @@ -2465,10 +2478,10 @@ nft_select_set_ops(const struct nlattr * const nla[], break; } - if (!try_module_get(ops->owner)) + if (!try_module_get(type->owner)) continue; if (bops != NULL) - module_put(bops->owner); + module_put(bops->type->owner); bops = ops; best = est; @@ -3029,7 +3042,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(nla, &desc, policy); + ops = nft_select_set_ops(&ctx, nla, &desc, policy); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -3089,14 +3102,14 @@ err3: err2: kfree(set); err1: - module_put(ops->owner); + module_put(ops->type->owner); return err; } static void nft_set_destroy(struct nft_set *set) { set->ops->destroy(set); - module_put(set->ops->owner); + module_put(set->ops->type->owner); kfree(set); } diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b988162b5b15..87d17691278f 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -278,7 +278,9 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_bitmap_type; static struct nft_set_ops nft_bitmap_ops __read_mostly = { + .type = &nft_bitmap_type, .privsize = nft_bitmap_privsize, .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, @@ -291,17 +293,21 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = { .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, +}; + +static struct nft_set_type nft_bitmap_type __read_mostly = { + .ops = &nft_bitmap_ops, .owner = THIS_MODULE, }; static int __init nft_bitmap_module_init(void) { - return nft_register_set(&nft_bitmap_ops); + return nft_register_set(&nft_bitmap_type); } static void __exit nft_bitmap_module_exit(void) { - nft_unregister_set(&nft_bitmap_ops); + nft_unregister_set(&nft_bitmap_type); } module_init(nft_bitmap_module_init); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 7c21e3da0d88..4ba0717408d9 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -379,7 +379,9 @@ static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_hash_type; static struct nft_set_ops nft_rhash_ops __read_mostly = { + .type = &nft_hash_type, .privsize = nft_rhash_privsize, .elemsize = offsetof(struct nft_rhash_elem, ext), .estimate = nft_rhash_estimate, @@ -394,17 +396,21 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = { .update = nft_rhash_update, .walk = nft_rhash_walk, .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, +}; + +static struct nft_set_type nft_hash_type __read_mostly = { + .ops = &nft_rhash_ops, .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_set(&nft_rhash_ops); + return nft_register_set(&nft_hash_type); } static void __exit nft_hash_module_exit(void) { - nft_unregister_set(&nft_rhash_ops); + nft_unregister_set(&nft_hash_type); } module_init(nft_hash_module_init); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fbfb3cbb3916..29d41d378339 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -295,7 +295,9 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_rbtree_type; static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .type = &nft_rbtree_type, .privsize = nft_rbtree_privsize, .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, @@ -309,17 +311,21 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static struct nft_set_type nft_rbtree_type __read_mostly = { + .ops = &nft_rbtree_ops, .owner = THIS_MODULE, }; static int __init nft_rbtree_module_init(void) { - return nft_register_set(&nft_rbtree_ops); + return nft_register_set(&nft_rbtree_type); } static void __exit nft_rbtree_module_exit(void) { - nft_unregister_set(&nft_rbtree_ops); + nft_unregister_set(&nft_rbtree_type); } module_init(nft_rbtree_module_init); -- cgit v1.2.3 From 347b408d59e7eadcd09f97eba96fa4c270eb3b23 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 22 May 2017 17:47:54 +0100 Subject: netfilter: nf_tables: pass set description to ->privsize The new non-resizable hashtable variant needs this to calculate the size of the bucket array. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nft_set_bitmap.c | 3 ++- net/netfilter/nft_set_hash.c | 3 ++- net/netfilter/nft_set_rbtree.c | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f27012098846..bd5be0d691d5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -351,7 +351,8 @@ struct nft_set_ops { struct nft_set *set, struct nft_set_iter *iter); - unsigned int (*privsize)(const struct nlattr * const nla[]); + unsigned int (*privsize)(const struct nlattr * const nla[], + const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0b2b19607e1..2969016d8cad 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3052,7 +3052,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, size = 0; if (ops->privsize != NULL) - size = ops->privsize(nla); + size = ops->privsize(nla, &desc); err = -ENOMEM; set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 87d17691278f..734989c40579 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -236,7 +236,8 @@ static inline u32 nft_bitmap_total_size(u32 klen) return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); } -static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[]) +static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 4ba0717408d9..455a11ce8cd0 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -321,7 +321,8 @@ schedule: nft_set_gc_interval(set)); } -static unsigned int nft_rhash_privsize(const struct nlattr * const nla[]) +static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rhash); } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 29d41d378339..491e805d3ca2 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -251,7 +251,8 @@ cont: read_unlock_bh(&priv->lock); } -static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rbtree); } -- cgit v1.2.3 From 7866cc57b51c1e118e5d78d1a8f721f378eec5c4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 May 2017 11:38:12 +0200 Subject: netns: add and use net_ns_barrier Quoting Joe Stringer: If a user loads nf_conntrack_ftp, sends FTP traffic through a network namespace, destroys that namespace then unloads the FTP helper module, then the kernel will crash. Events that lead to the crash: 1. conntrack is created with ftp helper in netns x 2. This netns is destroyed 3. netns destruction is scheduled 4. netns destruction wq starts, removes netns from global list 5. ftp helper is unloaded, which resets all helpers of the conntracks via for_each_net() but because netns is already gone from list the for_each_net() loop doesn't include it, therefore all of these conntracks are unaffected. 6. helper module unload finishes 7. netns wq invokes destructor for rmmod'ed helper CC: "Eric W. Biederman" Reported-by: Joe Stringer Signed-off-by: Florian Westphal Acked-by: David S. Miller Acked-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 3 +++ net/core/net_namespace.c | 17 +++++++++++++++++ net/netfilter/nf_conntrack_core.c | 9 +++++++++ 3 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fe80bb48ab1f..a24a57593202 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -158,6 +158,7 @@ extern struct net init_net; struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net); +void net_ns_barrier(void); #else /* CONFIG_NET_NS */ #include #include @@ -168,6 +169,8 @@ static inline struct net *copy_net_ns(unsigned long flags, return ERR_PTR(-EINVAL); return old_net; } + +static inline void net_ns_barrier(void) {} #endif /* CONFIG_NET_NS */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1934efd4a9d4..1f15abb1d733 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -482,6 +482,23 @@ static void cleanup_net(struct work_struct *work) net_drop_ns(net); } } + +/** + * net_ns_barrier - wait until concurrent net_cleanup_work is done + * + * cleanup_net runs from work queue and will first remove namespaces + * from the global list, then run net exit functions. + * + * Call this in module exit path to make sure that all netns + * ->exit ops have been invoked before the function is removed. + */ +void net_ns_barrier(void) +{ + mutex_lock(&net_mutex); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL(net_ns_barrier); + static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c3bd9b086dcc..9979f46c81dc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1720,6 +1720,8 @@ EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); * Like nf_ct_iterate_cleanup, but first marks conntracks on the * unconfirmed list as dying (so they will not be inserted into * main table). + * + * Can only be called in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) @@ -1734,6 +1736,13 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) } rtnl_unlock(); + /* Need to wait for netns cleanup worker to finish, if its + * running -- it might have deleted a net namespace from + * the global list, so our __nf_ct_unconfirmed_destroy() might + * not have affected all namespaces. + */ + net_ns_barrier(); + /* a conntrack could have been unlinked from unconfirmed list * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). * This makes sure its inserted into conntrack table. -- cgit v1.2.3 From b7b5fda4686874c9b9b8c27ba9d57a8534f48a70 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 14 Jun 2017 11:54:07 +0200 Subject: netfilter: conntrack: use NFPROTO_MAX to size array We don't support anything larger than NFPROTO_MAX, so we can shrink this a bit: text data dec hex filename old: 8259 1096 9355 248b net/netfilter/nf_conntrack_proto.o new: 8259 624 8883 22b3 net/netfilter/nf_conntrack_proto.o Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ++-- net/netfilter/nf_conntrack_proto.c | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e01559b4d781..6d14b36e3a49 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -71,7 +71,7 @@ struct nf_conntrack_l3proto { struct module *me; }; -extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ @@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * __nf_ct_l3proto_find(u_int16_t l3proto) { - if (unlikely(l3proto >= AF_MAX)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO)) return &nf_conntrack_l3proto_generic; return rcu_dereference(nf_ct_l3protos[l3proto]); } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 6a36623e897c..1dcad229c3cc 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -28,8 +28,8 @@ #include #include -static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); @@ -68,7 +68,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, struct nf_conntrack_l4proto * __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) { - if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL)) return &nf_conntrack_l4proto_generic; return rcu_dereference(nf_ct_protos[l3proto][l4proto]); @@ -212,7 +212,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) int ret = 0; struct nf_conntrack_l3proto *old; - if (proto->l3proto >= AF_MAX) + if (proto->l3proto >= NFPROTO_NUMPROTO) return -EBUSY; if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size) @@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { - BUG_ON(proto->l3proto >= AF_MAX); + BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); mutex_lock(&nf_ct_proto_mutex); BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], @@ -341,7 +341,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) { int ret = 0; - if (l4proto->l3proto >= PF_MAX) + if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)) return -EBUSY; if ((l4proto->to_nlattr && !l4proto->nlattr_size) || @@ -423,7 +423,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) { - BUG_ON(l4proto->l3proto >= PF_MAX); + BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)); BUG_ON(rcu_dereference_protected( nf_ct_protos[l4proto->l3proto][l4proto->l4proto], @@ -556,7 +556,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net) int nf_conntrack_proto_init(void) { unsigned int i; - for (i = 0; i < AF_MAX; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) rcu_assign_pointer(nf_ct_l3protos[i], &nf_conntrack_l3proto_generic); return 0; @@ -566,6 +566,6 @@ void nf_conntrack_proto_fini(void) { unsigned int i; /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) + for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) kfree(nf_ct_protos[i]); } -- cgit v1.2.3