diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/netfilter/iptable_raw.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 1 | ||||
-rw-r--r-- | net/netfilter/ipset/ip_set_hash_gen.h | 4 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_conn.c | 4 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 154 | ||||
-rw-r--r-- | net/netfilter/nf_nat_core.c | 17 | ||||
-rw-r--r-- | net/netfilter/nf_nat_masquerade.c | 168 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 30 | ||||
-rw-r--r-- | net/netfilter/nft_compat.c | 17 | ||||
-rw-r--r-- | net/netfilter/xt_LOG.c | 10 | ||||
-rw-r--r-- | net/netfilter/xt_NFLOG.c | 10 |
11 files changed, 270 insertions, 147 deletions
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index b88e0f36cd05..8265c6765705 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -42,7 +42,7 @@ iptable_raw_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *rawtable_ops __read_mostly; -static int __net_init iptable_raw_table_init(struct net *net) +static int iptable_raw_table_init(struct net *net) { struct ipt_replace *repl; const struct xt_table *table = &packet_raw; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index de2cf3943b91..a579ea14a69b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ + acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6186358eac7c..6e391308431d 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -130,11 +130,11 @@ htable_size(u8 hbits) { size_t hsize; - /* We must fit both into u32 in jhash and size_t */ + /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ if (hbits > 31) return 0; hsize = jhash_size(hbits); - if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *) + if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) < hsize) return 0; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c100c6b112c8..2c467c422dc6 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void) int idx; /* Compute size and mask */ + if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) { + pr_info("conn_tab_bits not in [8, 20]. Using default value\n"); + ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; + } ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 94e18fb9690d..770a63103c7a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +/* serialize hash resizes and nf_ct_iterate_cleanup */ +static DEFINE_MUTEX(nf_conntrack_mutex); + #define GC_SCAN_INTERVAL (120u * HZ) #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) -#define MAX_CHAINLEN 64u +#define MIN_CHAINLEN 8u +#define MAX_CHAINLEN (32u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; @@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly; static siphash_key_t nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, + unsigned int zoneid, const struct net *net) { struct { struct nf_conntrack_man src; union nf_inet_addr dst_addr; + unsigned int zone; u32 net_mix; u16 dport; u16 proto; @@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, /* The direction must be ignored, so handle usable members manually. */ combined.src = tuple->src; combined.dst_addr = tuple->dst.u3; + combined.zone = zoneid; combined.net_mix = net_hash_mix(net); combined.dport = (__force __u16)tuple->dst.u.all; combined.proto = tuple->dst.protonum; @@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash) static u32 __hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple, + unsigned int zoneid, unsigned int size) { - return reciprocal_scale(hash_conntrack_raw(tuple, net), size); + return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size); } static u32 hash_conntrack(const struct net *net, - const struct nf_conntrack_tuple *tuple) + const struct nf_conntrack_tuple *tuple, + unsigned int zoneid) { - return scale_hash(hash_conntrack_raw(tuple, net)); + return scale_hash(hash_conntrack_raw(tuple, zoneid, net)); } static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, @@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL)); reply_hash = hash_conntrack(net, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); clean_from_lists(ct); @@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { - return __nf_conntrack_find_get(net, zone, tuple, - hash_conntrack_raw(tuple, net)); + unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL); + struct nf_conntrack_tuple_hash *thash; + + thash = __nf_conntrack_find_get(net, zone, tuple, + hash_conntrack_raw(tuple, zone_id, net)); + + if (thash) + return thash; + + rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY); + if (rid != zone_id) + return __nf_conntrack_find_get(net, zone, tuple, + hash_conntrack_raw(tuple, rid, net)); + return thash; } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); @@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; + unsigned int max_chainlen; unsigned int chainlen = 0; unsigned int sequence; int err = -EEXIST; @@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL)); reply_hash = hash_conntrack(net, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); + max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN); + /* See if there's one in the list already, including reverse */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; - if (chainlen++ > MAX_CHAINLEN) + if (chainlen++ > max_chainlen) goto chaintoolong; } @@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; - if (chainlen++ > MAX_CHAINLEN) + if (chainlen++ > max_chainlen) goto chaintoolong; } @@ -1103,8 +1131,8 @@ drop: int __nf_conntrack_confirm(struct sk_buff *skb) { + unsigned int chainlen = 0, sequence, max_chainlen; const struct nf_conntrack_zone *zone; - unsigned int chainlen = 0, sequence; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; hash = scale_hash(hash); reply_hash = hash_conntrack(net, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related @@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) goto dying; } + max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ @@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; - if (chainlen++ > MAX_CHAINLEN) + if (chainlen++ > max_chainlen) goto chaintoolong; } @@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; - if (chainlen++ > MAX_CHAINLEN) { + if (chainlen++ > max_chainlen) { chaintoolong: nf_ct_add_to_dying_list(ct); NF_CT_STAT_INC(net, chaintoolong); @@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, rcu_read_lock(); begin: nf_conntrack_get_ht(&ct_hash, &hsize); - hash = __hash_conntrack(net, tuple, hsize); + hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); @@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl, struct nf_conntrack_tuple_hash *h; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; + u32 hash, zone_id, rid; struct nf_conn *ct; - u32 hash; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, @@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl, /* look for tuple match */ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); - hash = hash_conntrack_raw(&tuple, state->net); + + zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL); + hash = hash_conntrack_raw(&tuple, zone_id, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); + + if (!h) { + rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY); + if (zone_id != rid) { + u32 tmp = hash_conntrack_raw(&tuple, rid, state->net); + + h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp); + } + } + if (!h) { h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); @@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), spinlock_t *lockp; for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket]; + + if (hlist_nulls_empty(hslot)) + continue; + lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; local_bh_disable(); nf_conntrack_lock(lockp); - if (*bucket < nf_conntrack_htable_size) { - hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) - continue; - /* All nf_conn objects are added to hash table twice, one - * for original direction tuple, once for the reply tuple. - * - * Exception: In the IPS_NAT_CLASH case, only the reply - * tuple is added (the original tuple already existed for - * a different object). - * - * We only need to call the iterator once for each - * conntrack, so we just use the 'reply' direction - * tuple while iterating. - */ - ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - goto found; - } + hlist_nulls_for_each_entry(h, n, hslot, hnnode) { + if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) + continue; + /* All nf_conn objects are added to hash table twice, one + * for original direction tuple, once for the reply tuple. + * + * Exception: In the IPS_NAT_CLASH case, only the reply + * tuple is added (the original tuple already existed for + * a different object). + * + * We only need to call the iterator once for each + * conntrack, so we just use the 'reply' direction + * tuple while iterating. + */ + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } spin_unlock(lockp); local_bh_enable(); @@ -2264,26 +2308,20 @@ found: static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report) { - unsigned int bucket = 0, sequence; + unsigned int bucket = 0; struct nf_conn *ct; might_sleep(); - for (;;) { - sequence = read_seqcount_begin(&nf_conntrack_generation); - - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { - /* Time to push up daises... */ + mutex_lock(&nf_conntrack_mutex); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + /* Time to push up daises... */ - nf_ct_delete(ct, portid, report); - nf_ct_put(ct); - cond_resched(); - } - - if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) - break; - bucket = 0; + nf_ct_delete(ct, portid, report); + nf_ct_put(ct); + cond_resched(); } + mutex_unlock(&nf_conntrack_mutex); } struct iter_data { @@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize) if (!hash) return -ENOMEM; + mutex_lock(&nf_conntrack_mutex); old_size = nf_conntrack_htable_size; if (old_size == hashsize) { + mutex_unlock(&nf_conntrack_mutex); kvfree(hash); return 0; } @@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize) for (i = 0; i < nf_conntrack_htable_size; i++) { while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { + unsigned int zone_id; + h = hlist_nulls_entry(nf_conntrack_hash[i].first, struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); + + zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h)); bucket = __hash_conntrack(nf_ct_net(ct), - &h->tuple, hashsize); + &h->tuple, zone_id, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } @@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize) nf_conntrack_all_unlock(); local_bh_enable(); + mutex_unlock(&nf_conntrack_mutex); + synchronize_net(); kvfree(old_hash); return 0; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 7008961f5cb0..273117683922 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) /* We keep an extra hash for each conntrack, for fast searching. */ static unsigned int -hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, + const struct nf_conntrack_zone *zone, + const struct nf_conntrack_tuple *tuple) { unsigned int hash; struct { struct nf_conntrack_man src; u32 net_mix; u32 protonum; + u32 zone; } __aligned(SIPHASH_ALIGNMENT) combined; get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); @@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) /* Original src, to ensure we map it consistently if poss. */ combined.src = tuple->src; - combined.net_mix = net_hash_mix(n); + combined.net_mix = net_hash_mix(net); combined.protonum = tuple->dst.protonum; + /* Zone ID can be used provided its valid for both directions */ + if (zone->dir == NF_CT_DEFAULT_ZONE_DIR) + combined.zone = zone->id; + hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd); return reciprocal_scale(hash, nf_nat_htable_size); @@ -272,7 +279,7 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range2 *range) { - unsigned int h = hash_by_src(net, tuple); + unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn *ct; hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { @@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct, unsigned int srchash; spinlock_t *lock; - srchash = hash_by_src(net, + srchash = hash_by_src(net, nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS]; spin_lock_bh(lock); @@ -788,7 +795,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct) { unsigned int h; - h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); hlist_del_rcu(&ct->nat_bysource); spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 8e8a65d46345..acd73f717a08 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -9,8 +9,19 @@ #include <net/netfilter/nf_nat_masquerade.h> +struct masq_dev_work { + struct work_struct work; + struct net *net; + union nf_inet_addr addr; + int ifindex; + int (*iter)(struct nf_conn *i, void *data); +}; + +#define MAX_MASQ_WORKER_COUNT 16 + static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt __read_mostly; +static atomic_t masq_worker_count __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); -static int device_cmp(struct nf_conn *i, void *ifindex) +static void iterate_cleanup_work(struct work_struct *work) +{ + struct masq_dev_work *w; + + w = container_of(work, struct masq_dev_work, work); + + nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); + + put_net(w->net); + kfree(w); + atomic_dec(&masq_worker_count); + module_put(THIS_MODULE); +} + +/* Iterate conntrack table in the background and remove conntrack entries + * that use the device/address being removed. + * + * In case too many work items have been queued already or memory allocation + * fails iteration is skipped, conntrack entries will time out eventually. + */ +static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, + int ifindex, + int (*iter)(struct nf_conn *i, void *data), + gfp_t gfp_flags) +{ + struct masq_dev_work *w; + + if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) + return; + + net = maybe_get_net(net); + if (!net) + return; + + if (!try_module_get(THIS_MODULE)) + goto err_module; + + w = kzalloc(sizeof(*w), gfp_flags); + if (w) { + /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ + atomic_inc(&masq_worker_count); + + INIT_WORK(&w->work, iterate_cleanup_work); + w->ifindex = ifindex; + w->net = net; + w->iter = iter; + if (addr) + w->addr = *addr; + schedule_work(&w->work); + return; + } + + module_put(THIS_MODULE); + err_module: + put_net(net); +} + +static int device_cmp(struct nf_conn *i, void *arg) { const struct nf_conn_nat *nat = nfct_nat(i); + const struct masq_dev_work *w = arg; if (!nat) return 0; - return nat->masq_index == (int)(long)ifindex; + return nat->masq_index == w->ifindex; } static int masq_device_event(struct notifier_block *this, @@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this, * and forget them. */ - nf_ct_iterate_cleanup_net(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_nat_masq_schedule(net, NULL, dev->ifindex, + device_cmp, GFP_KERNEL); } return NOTIFY_DONE; @@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this, static int inet_cmp(struct nf_conn *ct, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct net_device *dev = ifa->ifa_dev->dev; struct nf_conntrack_tuple *tuple; + struct masq_dev_work *w = ptr; - if (!device_cmp(ct, (void *)(long)dev->ifindex)) + if (!device_cmp(ct, ptr)) return 0; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - return ifa->ifa_address == tuple->dst.u3.ip; + return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); } static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; - struct net *net = dev_net(idev->dev); + const struct in_ifaddr *ifa = ptr; + const struct in_device *idev; + const struct net_device *dev; + union nf_inet_addr addr; + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have * no work to do. Otherwise this is an individual address removal * and we have to perform the flush. */ + idev = ifa->ifa_dev; if (idev->dead) return NOTIFY_DONE; - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); + memset(&addr, 0, sizeof(addr)); + + addr.ip = ifa->ifa_address; + + dev = idev->dev; + nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, + inet_cmp, GFP_KERNEL); return NOTIFY_DONE; } @@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = { }; #if IS_ENABLED(CONFIG_IPV6) -static atomic_t v6_worker_count __read_mostly; - static int nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, @@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); -struct masq_dev_work { - struct work_struct work; - struct net *net; - struct in6_addr addr; - int ifindex; -}; - -static int inet6_cmp(struct nf_conn *ct, void *work) -{ - struct masq_dev_work *w = (struct masq_dev_work *)work; - struct nf_conntrack_tuple *tuple; - - if (!device_cmp(ct, (void *)(long)w->ifindex)) - return 0; - - tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); -} - -static void iterate_cleanup_work(struct work_struct *work) -{ - struct masq_dev_work *w; - - w = container_of(work, struct masq_dev_work, work); - - nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); - - put_net(w->net); - kfree(w); - atomic_dec(&v6_worker_count); - module_put(THIS_MODULE); -} - /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). * * Defer it to the system workqueue. @@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this, { struct inet6_ifaddr *ifa = ptr; const struct net_device *dev; - struct masq_dev_work *w; - struct net *net; + union nf_inet_addr addr; - if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) + if (event != NETDEV_DOWN) return NOTIFY_DONE; dev = ifa->idev->dev; - net = maybe_get_net(dev_net(dev)); - if (!net) - return NOTIFY_DONE; - if (!try_module_get(THIS_MODULE)) - goto err_module; + memset(&addr, 0, sizeof(addr)); - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - atomic_inc(&v6_worker_count); - - INIT_WORK(&w->work, iterate_cleanup_work); - w->ifindex = dev->ifindex; - w->net = net; - w->addr = ifa->addr; - schedule_work(&w->work); + addr.in6 = ifa->addr; - return NOTIFY_DONE; - } - - module_put(THIS_MODULE); - err_module: - put_net(net); + nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, + GFP_ATOMIC); return NOTIFY_DONE; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 081437dd75b7..b9546defdc28 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4336,7 +4336,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (ops->privsize != NULL) size = ops->privsize(nla, &desc); alloc_size = sizeof(*set) + size + udlen; - if (alloc_size < size) + if (alloc_size < size || alloc_size > INT_MAX) return -ENOMEM; set = kvzalloc(alloc_size, GFP_KERNEL); if (!set) @@ -9599,7 +9599,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table) table->use--; nf_tables_chain_destroy(&ctx); } - list_del(&table->list); nf_tables_table_destroy(&ctx); } @@ -9612,6 +9611,8 @@ static void __nft_release_tables(struct net *net) if (nft_table_has_owner(table)) continue; + list_del(&table->list); + __nft_release_table(net, table); } } @@ -9619,31 +9620,38 @@ static void __nft_release_tables(struct net *net) static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct nft_table *table, *to_delete[8]; struct nftables_pernet *nft_net; struct netlink_notify *n = ptr; - struct nft_table *table, *nt; struct net *net = n->net; - bool release = false; + unsigned int deleted; + bool restart = false; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; nft_net = nft_pernet(net); + deleted = 0; mutex_lock(&nft_net->commit_mutex); +again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && n->portid == table->nlpid) { __nft_release_hook(net, table); - release = true; + list_del_rcu(&table->list); + to_delete[deleted++] = table; + if (deleted >= ARRAY_SIZE(to_delete)) + break; } } - if (release) { + if (deleted) { + restart = deleted >= ARRAY_SIZE(to_delete); synchronize_rcu(); - list_for_each_entry_safe(table, nt, &nft_net->tables, list) { - if (nft_table_has_owner(table) && - n->portid == table->nlpid) - __nft_release_table(net, table); - } + while (deleted) + __nft_release_table(net, to_delete[--deleted]); + + if (restart) + goto again; } mutex_unlock(&nft_net->commit_mutex); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 272bcdb1392d..f69cc73c5813 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,6 +19,7 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_arp/arp_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_log.h> /* Used for matches where *info is larger than X byte */ #define NFT_MATCH_LARGE_THRESH 192 @@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_compat_wait_for_destructors(); ret = xt_check_target(&par, size, proto, inv); - if (ret < 0) + if (ret < 0) { + if (ret == -ENOENT) { + const char *modname = NULL; + + if (strcmp(target->name, "LOG") == 0) + modname = "nf_log_syslog"; + else if (strcmp(target->name, "NFLOG") == 0) + modname = "nfnetlink_log"; + + if (modname && + nft_request_module(ctx->net, "%s", modname) == -EAGAIN) + return -EAGAIN; + } + return ret; + } /* The standard target cannot be used */ if (!target->target) diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 2ff75f7637b0..f39244f9c0ed 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) static int log_tg_check(const struct xt_tgchk_param *par) { const struct xt_log_info *loginfo = par->targinfo; + int ret; if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6) return -EINVAL; @@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); + ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); + if (ret != 0 && !par->nft_compat) { + request_module("%s", "nf_log_syslog"); + + ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); + } + + return ret; } static void log_tg_destroy(const struct xt_tgdtor_param *par) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index fb5793208059..e660c3710a10 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) static int nflog_tg_check(const struct xt_tgchk_param *par) { const struct xt_nflog_info *info = par->targinfo; + int ret; if (info->flags & ~XT_NFLOG_MASK) return -EINVAL; if (info->prefix[sizeof(info->prefix) - 1] != '\0') return -EINVAL; - return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); + ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); + if (ret != 0 && !par->nft_compat) { + request_module("%s", "nfnetlink_log"); + + ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); + } + + return ret; } static void nflog_tg_destroy(const struct xt_tgdtor_param *par) |