summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 00:45:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 00:45:08 +0300
commitaae3dbb4776e7916b6cd442d00159bea27a695c1 (patch)
treed074c5d783a81e7e2e084b1eba77f57459da7e37 /net/netfilter
parentec3604c7a5aae8953545b0d05495357009a960e5 (diff)
parent66bed8465a808400eb14562510e26c8818082cb8 (diff)
downloadlinux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support ipv6 checksum offload in sunvnet driver, from Shannon Nelson. 2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric Dumazet. 3) Allow generic XDP to work on virtual devices, from John Fastabend. 4) Add bpf device maps and XDP_REDIRECT, which can be used to build arbitrary switching frameworks using XDP. From John Fastabend. 5) Remove UFO offloads from the tree, gave us little other than bugs. 6) Remove the IPSEC flow cache, from Florian Westphal. 7) Support ipv6 route offload in mlxsw driver. 8) Support VF representors in bnxt_en, from Sathya Perla. 9) Add support for forward error correction modes to ethtool, from Vidya Sagar Ravipati. 10) Add time filter for packet scheduler action dumping, from Jamal Hadi Salim. 11) Extend the zerocopy sendmsg() used by virtio and tap to regular sockets via MSG_ZEROCOPY. From Willem de Bruijn. 12) Significantly rework value tracking in the BPF verifier, from Edward Cree. 13) Add new jump instructions to eBPF, from Daniel Borkmann. 14) Rework rtnetlink plumbing so that operations can be run without taking the RTNL semaphore. From Florian Westphal. 15) Support XDP in tap driver, from Jason Wang. 16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal. 17) Add Huawei hinic ethernet driver. 18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan Delalande. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits) i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq i40e: avoid NVM acquire deadlock during NVM update drivers: net: xgene: Remove return statement from void function drivers: net: xgene: Configure tx/rx delay for ACPI drivers: net: xgene: Read tx/rx delay for ACPI rocker: fix kcalloc parameter order rds: Fix non-atomic operation on shared flag variable net: sched: don't use GFP_KERNEL under spin lock vhost_net: correctly check tx avail during rx busy polling net: mdio-mux: add mdio_mux parameter to mdio_mux_init() rxrpc: Make service connection lookup always check for retry net: stmmac: Delete dead code for MDIO registration gianfar: Fix Tx flow control deactivation cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6 cxgb4: Fix pause frame count in t4_get_port_stats cxgb4: fix memory leak tun: rename generic_xdp to skb_xdp tun: reserve extra headroom only when XDP is set net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping net: dsa: bcm_sf2: Advertise number of egress queues ...
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c351
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c46
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_expect.c70
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c34
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c106
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c90
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c18
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c103
-rw-r--r--net/netfilter/nf_internals.h10
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_redirect.c6
-rw-r--r--net/netfilter/nf_queue.c68
-rw-r--r--net/netfilter/nf_sockopt.c2
-rw-r--r--net/netfilter/nf_tables_api.c515
-rw-r--r--net/netfilter/nf_tables_core.c28
-rw-r--r--net/netfilter/nf_tables_trace.c42
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c22
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c21
-rw-r--r--net/netfilter/nft_counter.c20
-rw-r--r--net/netfilter/nft_ct.c18
-rw-r--r--net/netfilter/nft_exthdr.c213
-rw-r--r--net/netfilter/nft_fib_netdev.c87
-rw-r--r--net/netfilter/nft_limit.c148
-rw-r--r--net/netfilter/nft_objref.c7
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_quota.c20
-rw-r--r--net/netfilter/nft_rt.c73
-rw-r--r--net/netfilter/nft_set_rbtree.c49
-rw-r--r--net/netfilter/x_tables.c14
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_NETMAP.c8
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_addrtype.c3
-rw-r--r--net/netfilter/xt_connlimit.c26
-rw-r--r--net/netfilter/xt_hashlimit.c285
-rw-r--r--net/netfilter/xt_nat.c20
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_recent.c2
59 files changed, 1898 insertions, 880 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
help
This option enables packet forwarding for the "netdev" family.
+config NFT_FIB_NETDEV
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables netdev fib lookups support"
+ help
+ This option allows using the FIB expression from the netdev table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
endif # NF_TABLES_NETDEV
endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..d3891c93edd6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..04fe25abc5f6 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT 1024
+
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+ struct nf_hook_entries *e;
+ size_t alloc = sizeof(*e) +
+ sizeof(struct nf_hook_entry) * num +
+ sizeof(struct nf_hook_ops *) * num;
+
+ if (num == 0)
+ return NULL;
+
+ e = kvzalloc(alloc, GFP_KERNEL);
+ if (e)
+ e->num_hook_entries = num;
+ return e;
+}
+
+static unsigned int accept_all(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+ .hook = accept_all,
+ .priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+ const struct nf_hook_ops *reg)
+{
+ unsigned int i, alloc_entries, nhooks, old_entries;
+ struct nf_hook_ops **orig_ops = NULL;
+ struct nf_hook_ops **new_ops;
+ struct nf_hook_entries *new;
+ bool inserted = false;
+
+ alloc_entries = 1;
+ old_entries = old ? old->num_hook_entries : 0;
+
+ if (old) {
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+
+ for (i = 0; i < old_entries; i++) {
+ if (orig_ops[i] != &dummy_ops)
+ alloc_entries++;
+ }
+ }
+
+ if (alloc_entries > MAX_HOOK_COUNT)
+ return ERR_PTR(-E2BIG);
+
+ new = allocate_hook_entries_size(alloc_entries);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+
+ i = 0;
+ nhooks = 0;
+ while (i < old_entries) {
+ if (orig_ops[i] == &dummy_ops) {
+ ++i;
+ continue;
+ }
+ if (inserted || reg->priority > orig_ops[i]->priority) {
+ new_ops[nhooks] = (void *)orig_ops[i];
+ new->hooks[nhooks] = old->hooks[i];
+ i++;
+ } else {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ inserted = true;
+ }
+ nhooks++;
+ }
+
+ if (!inserted) {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ }
+
+ return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+ struct nf_hook_ops **orig_ops;
+ int prio = INT_MIN;
+ size_t i = 0;
+
+ orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+ for (i = 0; i < hooks->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+
+ WARN_ON(orig_ops[i]->priority < prio);
+
+ if (orig_ops[i]->priority > prio)
+ prio = orig_ops[i]->priority;
+ }
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+ struct nf_hook_entries *old, *new = NULL;
+ unsigned int i, j, skip = 0, hook_entries;
+ struct nf_hook_ops **orig_ops;
+ struct nf_hook_ops **new_ops;
+
+ old = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!old))
+ return NULL;
+
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ skip++;
+ }
+
+ /* if skip == hook_entries all hooks have been removed */
+ hook_entries = old->num_hook_entries;
+ if (skip == hook_entries)
+ goto out_assign;
+
+ if (WARN_ON(skip == 0))
+ return NULL;
+
+ hook_entries -= skip;
+ new = allocate_hook_entries_size(hook_entries);
+ if (!new)
+ return NULL;
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+ for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+ new->hooks[j] = old->hooks[i];
+ new_ops[j] = (void *)orig_ops[i];
+ j++;
+ }
+ hooks_validate(new);
+out_assign:
+ rcu_assign_pointer(*pp, new);
+ return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
if (reg->pf != NFPROTO_NETDEV)
return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
return &reg->dev->nf_hooks_ingress;
}
#endif
+ WARN_ON_ONCE(1);
return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *entry, *p;
+ struct nf_hook_entries *p, *new_hooks;
+ struct nf_hook_entries __rcu **pp;
if (reg->pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!pp)
return -EINVAL;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- nf_hook_entry_init(entry, reg);
-
mutex_lock(&nf_hook_mutex);
- /* Find the spot in the list */
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (reg->priority < nf_hook_entry_priority(p))
- break;
- }
- rcu_assign_pointer(entry->next, p);
- rcu_assign_pointer(*pp, entry);
+ p = nf_entry_dereference(*pp);
+ new_hooks = nf_hook_entries_grow(p, reg);
+
+ if (!IS_ERR(new_hooks))
+ rcu_assign_pointer(*pp, new_hooks);
mutex_unlock(&nf_hook_mutex);
+ if (IS_ERR(new_hooks))
+ return PTR_ERR(new_hooks);
+
+ hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
+ synchronize_net();
+ BUG_ON(p == new_hooks);
+ kvfree(p);
return 0;
}
EXPORT_SYMBOL(nf_register_net_hook);
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *p;
-
- pp = nf_hook_entry_head(net, reg);
- if (WARN_ON_ONCE(!pp))
- return NULL;
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+ unsigned int i;
- mutex_lock(&nf_hook_mutex);
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (nf_hook_entry_ops(p) == reg) {
- rcu_assign_pointer(*pp, p->next);
- break;
- }
- }
- mutex_unlock(&nf_hook_mutex);
- if (!p) {
- WARN(1, "nf_unregister_net_hook: hook not found!\n");
- return NULL;
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] != unreg)
+ continue;
+ WRITE_ONCE(old->hooks[i].hook, accept_all);
+ WRITE_ONCE(orig_ops[i], &dummy_ops);
+ found = true;
+ break;
}
+
+ if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
- net_dec_ingress_queue();
+ if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
#endif
-
- return p;
+ } else {
+ WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ }
}
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
unsigned int nfq;
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+ return;
+
+ mutex_lock(&nf_hook_mutex);
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p)) {
+ mutex_unlock(&nf_hook_mutex);
+ return;
+ }
+
+ __nf_unregister_net_hook(p, reg);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
if (!p)
return;
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
nfq = nf_queue_nf_hook_drop(net);
if (nfq)
synchronize_net();
- kfree(p);
+ kvfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entry *to_free[16];
- unsigned int i, n, nfq;
+ struct nf_hook_entries *to_free[16], *p;
+ struct nf_hook_entries __rcu **pp;
+ unsigned int i, j, n;
+
+ mutex_lock(&nf_hook_mutex);
+ for (i = 0; i < hookcount; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p))
+ continue;
+ __nf_unregister_net_hook(p, &reg[i]);
+ }
+ mutex_unlock(&nf_hook_mutex);
do {
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
- for (i = 0; i < n; i++)
- to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+ mutex_lock(&nf_hook_mutex);
- synchronize_net();
+ for (i = 0, j = 0; i < hookcount && j < n; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (!p)
+ continue;
+
+ to_free[j] = __nf_hook_entries_try_shrink(pp);
+ if (to_free[j])
+ ++j;
+ }
+
+ mutex_unlock(&nf_hook_mutex);
+
+ if (j) {
+ unsigned int nfq;
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
synchronize_net();
- for (i = 0; i < n; i++)
- kfree(to_free[i]);
+ /* need 2nd synchronize_net() if nfqueue is used, skb
+ * can get reinjected right before nf_queue_hook_drop()
+ */
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
+
+ for (i = 0; i < j; i++)
+ kvfree(to_free[i]);
+ }
reg += n;
hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry *entry)
+ const struct nf_hook_entries *e, unsigned int s)
{
unsigned int verdict;
int ret;
- do {
- verdict = nf_hook_entry_hookfn(entry, skb, state);
+ for (; s < e->num_hook_entries; s++) {
+ verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
- entry = rcu_dereference(entry->next);
break;
case NF_DROP:
kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
ret = -EPERM;
return ret;
case NF_QUEUE:
- ret = nf_queue(skb, state, &entry, verdict);
- if (ret == 1 && entry)
+ ret = nf_queue(skb, state, e, s, verdict);
+ if (ret == 1)
continue;
return ret;
default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
*/
return 0;
}
- } while (entry);
+ }
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..5cb7cac9177d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
if (!pptr)
return NULL;
- rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
&iph->saddr, pptr[0]);
if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
pptr[0], pptr[1]);
}
}
- rcu_read_unlock();
return cp;
}
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (dest) {
struct ip_vs_dest_dst *dest_dst;
- rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
- rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
#endif
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c2307b6e..4f940d7eb2f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- rcu_read_lock();
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- rcu_read_unlock();
return true;
}
}
- rcu_read_unlock();
return false;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be76d15..3e17d32b629d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
- rcu_read_lock();
mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start - data,
end - start,
buf, buf_len);
- rcu_read_unlock();
if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4adaddf..e1efa446b305 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -38,7 +38,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -53,7 +52,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,11 +65,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -526,12 +522,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -544,11 +538,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
-out:
+
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 12dc8d5bc37d..121a321b91be 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
}
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
@@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e494e9a88c7f..30e11cd6aa8a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 2eab1e0400f4..90d396814798 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
&iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
@@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
@@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
@@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
LeaveFunction(10);
kfree_skb(skb);
- rcu_read_unlock();
return NF_STOLEN;
}
#endif
@@ -1035,7 +1023,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1043,10 +1030,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
rt = skb_rtable(skb);
tdev = rt->dst.dev;
@@ -1095,7 +1080,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1104,7 +1088,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1127,7 +1110,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
@@ -1136,10 +1118,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
@@ -1185,7 +1165,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1194,7 +1173,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1213,17 +1191,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
ip_send_check(ip_hdr(skb));
@@ -1231,14 +1206,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1252,7 +1225,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
NULL, ipvsh, 0,
@@ -1261,23 +1233,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1322,7 +1290,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
@@ -1368,12 +1335,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
@@ -1414,7 +1379,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
@@ -1460,12 +1424,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4e99cca61612..ecc3ab784633 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
goto out;
- rcu_read_lock();
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
@@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
}
} endfor_ifa(in_dev);
}
- rcu_read_unlock();
if (mask == 0)
goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 51390febd5e3..01130392b7c0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include "nf_internals.h"
+
#define NF_CONNTRACK_VERSION "0.5.0"
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -254,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
int ret;
@@ -404,22 +406,19 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
- NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+ WARN_ON(atomic_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
}
- rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
- rcu_read_unlock();
-
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
@@ -701,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
@@ -763,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* connections for unconfirmed conns. But packet copies and
* REJECT will give spurious warnings here.
*/
- /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
* confirmed us.
*/
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1090,7 +1088,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1167,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+ WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
@@ -1183,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
@@ -1295,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
@@ -1351,10 +1349,10 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
@@ -1421,7 +1419,7 @@ repeat:
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1475,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
struct nf_conn_help *help = nfct_help(ct);
/* Should be unconfirmed, so not in hash table yet */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Altering reply tuple of %p to ", ct);
nf_ct_dump_tuple(newreply);
@@ -1497,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(skb);
+ WARN_ON(!skb);
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
@@ -1695,6 +1693,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
}
}
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+ might_sleep();
+
+ if (atomic_read(&net->ct.count) > 0) {
+ __nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
+ synchronize_net();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
@@ -1706,14 +1716,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
if (atomic_read(&net->ct.count) == 0)
return;
- __nf_ct_unconfirmed_destroy(net);
-
d.iter = iter;
d.data = data;
d.net = net;
- synchronize_net();
-
nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1739,6 +1745,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&net->ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
}
rtnl_unlock();
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 899c2c36da13..64778f9a8548 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
- NF_CT_ASSERT(master_help);
- NF_CT_ASSERT(!timer_pending(&exp->timeout));
+ WARN_ON(!master_help);
+ WARN_ON(timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
net->ct.expect_count--;
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
/* two references : one for hash insert, one for the timer */
refcount_add(2, &exp->use);
- hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
- master_help->expecting[exp->class]++;
-
- hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- net->ct.expect_count++;
-
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
}
add_timer(&exp->timeout);
+ hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
+
+ hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+ net->ct.expect_count++;
+
NF_CT_STAT_INC(net, expect_create);
}
@@ -474,6 +474,60 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data,
+ u32 portid, int report)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+
+ if (!net_eq(nf_ct_exp_net(exp), net))
+ continue;
+
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, portid, report);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 6c605e88ebae..9fe0ddc333fb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
old = ct->ext;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conntrack_expect *exp;
- const struct hlist_node *next;
- unsigned int i;
+ struct nf_conn_help *help = nfct_help(exp->master);
+ const struct nf_conntrack_helper *me = data;
+ const struct nf_conntrack_helper *this;
+
+ if (exp->helper == me)
+ return true;
+ this = rcu_dereference_protected(help->helper,
+ lockdep_is_held(&nf_conntrack_expect_lock));
+ return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
mutex_lock(&nf_ct_helper_mutex);
hlist_del_rcu(&me->hnode);
nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
*/
synchronize_rcu();
- /* Get rid of expectations */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i], hnode) {
- struct nf_conn_help *help = nfct_help(exp->master);
- if ((rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_expect_lock)
- ) == me || exp->helper == me))
- nf_ct_remove_expect(exp);
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
+ nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..397e6911214f 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.get_l4proto = generic_get_l4proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7999e70c3bfb..de4053d84364 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
static char __initdata version[] = "0.93";
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_l3proto *l3proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -109,9 +109,9 @@ nla_put_failure:
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
int ret;
@@ -535,17 +535,16 @@ nla_put_failure:
static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
- size_t len = 0;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ size_t len;
- rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- len += l3proto->nla_size;
+ len = l3proto->nla_size;
+ len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
- rcu_read_unlock();
return len;
}
@@ -664,7 +663,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -736,8 +734,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
- rcu_read_unlock();
-
nlmsg_end(skb, nlh);
err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
@@ -747,7 +743,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -941,8 +936,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1585,8 +1580,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
const struct nlattr * const cda[])
{
const struct nlattr *attr = cda[CTA_PROTOINFO];
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTOINFO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int err = 0;
err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2213,7 +2208,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -2272,11 +2266,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
goto nla_put_failure;
- rcu_read_unlock();
return 0;
nla_put_failure:
- rcu_read_unlock();
return -ENOSPC;
}
@@ -2483,11 +2475,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
- int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
+ int ret;
memset(&m, 0xFF, sizeof(m));
memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
@@ -2661,17 +2653,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- rcu_read_unlock();
nlmsg_end(skb, nlh);
nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -2910,6 +2899,21 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+ const struct nf_conn_help *m_help;
+ const char *name = data;
+
+ m_help = nfct_help(exp->master);
+
+ return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+ return true;
+}
+
static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const cda[],
@@ -2918,10 +2922,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct hlist_node *next;
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
- unsigned int i;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
@@ -2961,49 +2963,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
nf_ct_expect_put(exp);
} else if (cda[CTA_EXPECT_HELP_NAME]) {
char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
- struct nf_conn_help *m_help;
-
- /* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
- m_help = nfct_help(exp->master);
- if (!strcmp(m_help->helper->name, name) &&
- del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_name, name,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
-
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
}
return 0;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6959e93063d4..11562f2a08bb 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- rcu_read_lock();
nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
nf_nat_pptp_expectfn(ct, exp);
@@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("not found\n");
}
}
- rcu_read_unlock();
}
static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad229c3cc..b3e489c859ec 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -65,7 +65,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
}
#endif
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +77,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
/* this is guaranteed to always return a valid protocol helper, since
* it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto)
{
struct nf_conntrack_l3proto *p;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
+ const struct nf_conntrack_l3proto *p;
int ret;
- struct nf_conntrack_l3proto *p;
retry: p = nf_ct_l3proto_find_get(l3proto);
if (p == &nf_conntrack_l3proto_generic) {
@@ -173,10 +173,10 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
- struct nf_conntrack_l4proto *p;
+ const struct nf_conntrack_l4proto *p;
rcu_read_lock();
p = __nf_ct_l4proto_find(l3num, l4num);
@@ -188,7 +188,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
{
module_put(p->me);
}
@@ -196,28 +196,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
static int kill_l3proto(struct nf_conn *i, void *data)
{
- return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+ return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
}
static int kill_l4proto(struct nf_conn *i, void *data)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = data;
return nf_ct_protonum(i) == l4proto->l4proto &&
nf_ct_l3num(i) == l4proto->l3proto;
}
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
{
int ret = 0;
struct nf_conntrack_l3proto *old;
if (proto->l3proto >= NFPROTO_NUMPROTO)
return -EBUSY;
-
- if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (proto->tuple_to_nlattr && proto->nla_size == 0)
return -EINVAL;
-
+#endif
mutex_lock(&nf_ct_proto_mutex);
old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +226,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
goto out_unlock;
}
- if (proto->nlattr_tuple_size)
- proto->nla_size = 3 * proto->nlattr_tuple_size();
-
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
@@ -238,21 +235,7 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- if (nf_conntrack_default_on == 0)
- return 0;
-
- return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
@@ -266,27 +249,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l3proto, proto);
+ nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-void nf_ct_l3proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- /*
- * nf_conntrack_default_on *might* have registered hooks.
- * ->net_ns_put must cope with more puts() than get(), i.e.
- * if nf_conntrack_default_on was 0 at time of
- * nf_ct_l3proto_pernet_register invocation this net_ns_put()
- * should be a noop.
- */
- if (proto->net_ns_put)
- proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
if (l4proto->get_net_proto) {
/* statically built-in protocols use static per-net */
@@ -301,7 +269,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int err = 0;
@@ -324,8 +292,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
- struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ struct nf_proto_net *pn,
+ const struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
@@ -395,7 +363,7 @@ out_unlock:
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
int nf_ct_l4proto_pernet_register_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nf_proto_net *pn = NULL;
@@ -420,7 +388,7 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -433,7 +401,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
&nf_conntrack_l4proto_generic);
}
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
mutex_lock(&nf_ct_proto_mutex);
__nf_ct_l4proto_unregister_one(l4proto);
@@ -444,7 +412,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
@@ -469,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
}
if (i != num_proto) {
ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
- pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
- ver, l4proto[i]->name, ver);
+ pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+ ver, l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
@@ -478,7 +446,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
+ struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL;
@@ -490,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
- pr_err("nf_conntrack_%s%d: pernet registration failed\n",
- l4proto[i]->name,
+ pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+ l4proto[i]->l4proto,
l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
@@ -514,8 +482,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
- unsigned int num_proto)
+ struct nf_conntrack_l4proto *const l4proto[],
+ unsigned int num_proto)
{
while (num_proto-- != 0)
nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..0f5a4d79f6b8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -469,7 +469,7 @@ static unsigned int *dccp_get_timeouts(struct net *net)
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
- u_int8_t pf, unsigned int hooknum,
+ u_int8_t pf,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -623,18 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
return false;
}
-static void dccp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.dccp.port),
- ntohs(tuple->dst.u.dccp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -880,7 +874,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -888,8 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
@@ -916,7 +910,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -924,8 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868bad33a7..9cd40700842e 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
static bool nf_generic_should_process(u8 proto)
{
switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
- case IPPROTO_SCTP:
- return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
- case IPPROTO_DCCP:
- return false;
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
case IPPROTO_GRE:
return false;
#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
- case IPPROTO_UDPLITE:
- return false;
-#endif
default:
return true;
}
@@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static unsigned int *generic_get_timeouts(struct net *net)
{
return &(generic_pernet(net)->timeout);
@@ -79,7 +61,6 @@ static int generic_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeout)
{
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
@@ -187,10 +168,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.packet = generic_packet,
.get_timeouts = generic_get_timeouts,
.new = generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a3feb5..09a90484c27d 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
return true;
}
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* print private data for conntrack */
static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
@@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
(ct->proto.gre.timeout / HZ),
(ct->proto.gre.stream_timeout / HZ));
}
+#endif
static unsigned int *gre_get_timeouts(struct net *net)
{
@@ -252,7 +245,6 @@ static int gre_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is a GRE connection.
@@ -364,11 +356,11 @@ static int gre_init_net(struct net *net, u_int16_t proto)
static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
- .name = "gre",
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
+#endif
.get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d2eec4..6303a88af12b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
}
+#endif
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
@@ -314,7 +307,6 @@ static int sctp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
enum sctp_conntrack new_state, old_state;
@@ -791,11 +783,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
@@ -828,11 +820,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7dfd83e..cba1c6ffe51a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
+#endif
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
@@ -810,7 +803,6 @@ static int tcp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -1556,11 +1548,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
@@ -1594,11 +1586,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6178ca..8af734cd1a94 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
return udp_pernet(net)->timeouts;
@@ -83,7 +74,6 @@ static int udp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is some kind of UDP
@@ -313,11 +303,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -347,11 +335,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -381,11 +367,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -415,11 +399,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d38af4274335..4dbb5bad4363 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
tuple.dst.u3 = *daddr;
tuple.dst.u.udp.port = port;
- rcu_read_lock();
do {
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
@@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
goto err1;
}
- if (skip_expect) {
- rcu_read_unlock();
+ if (skip_expect)
return NF_ACCEPT;
- }
rtp_exp = nf_ct_expect_alloc(ct);
if (rtp_exp == NULL)
@@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
err2:
nf_ct_expect_put(rtp_exp);
err1:
- rcu_read_unlock();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9043e0..5a101caa3e12 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
- l3proto->print_tuple(s, tuple);
- l4proto->print_tuple(s, tuple);
+ switch (l3proto->l3proto) {
+ case NFPROTO_IPV4:
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ seq_printf(s, "src=%pI6 dst=%pI6 ",
+ tuple->src.u3.ip6, tuple->dst.u3.ip6);
+ break;
+ default:
+ break;
+ }
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_TCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
+ break;
+ case IPPROTO_UDPLITE: /* fallthrough */
+ case IPPROTO_UDP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+
+ break;
+ case IPPROTO_DCCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.dccp.port),
+ ntohs(tuple->dst.u.dccp.port));
+ break;
+ case IPPROTO_SCTP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+ break;
+ case IPPROTO_ICMPV6:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_GRE:
+ seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+ break;
+ default:
+ break;
+ }
}
EXPORT_SYMBOL_GPL(print_tuple);
@@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+static const char* l3proto_name(u16 proto)
+{
+ switch (proto) {
+ case AF_INET: return "ipv4";
+ case AF_INET6: return "ipv6";
+ }
+
+ return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+ switch (proto) {
+ case IPPROTO_ICMP: return "icmp";
+ case IPPROTO_TCP: return "tcp";
+ case IPPROTO_UDP: return "udp";
+ case IPPROTO_DCCP: return "dccp";
+ case IPPROTO_GRE: return "gre";
+ case IPPROTO_SCTP: return "sctp";
+ case IPPROTO_UDPLITE: return "udplite";
+ }
+
+ return "unknown";
+}
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -208,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
struct net *net = seq_file_net(s);
int ret = 0;
- NF_CT_ASSERT(ct);
+ WARN_ON(!ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
return 0;
@@ -225,14 +304,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- NF_CT_ASSERT(l3proto);
+ WARN_ON(!l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- NF_CT_ASSERT(l4proto);
+ WARN_ON(!l4proto);
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u %ld ",
- l3proto->name, nf_ct_l3num(ct),
- l4proto->name, nf_ct_protonum(ct),
+ l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
@@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255;
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
static int
nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "nf_conntrack_default_on",
- .data = &nf_conntrack_default_on,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742da83af..49f87ec093a3 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -5,17 +5,11 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict);
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
/* nf_log.c */
int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b1d3740ae36a..40573aa6c133 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -414,8 +414,8 @@ nf_nat_setup_info(struct nf_conn *ct,
if (nf_ct_is_confirmed(ct))
return NF_ACCEPT;
- NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
- maniptype == NF_NAT_MANIP_DST);
+ WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+ maniptype != NF_NAT_MANIP_DST);
BUG_ON(nf_nat_initialized(ct, maniptype));
/* What we've got will look like inverse of reply. Normally
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 86067560a318..25b06b959118 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
__be32 newdst;
struct nf_nat_range newrange;
- NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
- hooknum == NF_INET_LOCAL_OUT);
+ WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+ hooknum != NF_INET_LOCAL_OUT);
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
/* Local packets: make them go to loopback */
if (hooknum == NF_INET_LOCAL_OUT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c9d154..f7e21953b1de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
return count;
}
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
- struct nf_hook_entry *hook_entry, unsigned int queuenum)
+ const struct nf_hook_entries *entries,
+ unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
.state = *state,
- .hook = hook_entry,
+ .hook_index = index,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -162,18 +164,16 @@ err:
/* Packets leaving via this function must come back through nf_reinject(). */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict)
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict)
{
- struct nf_hook_entry *entry = *entryp;
int ret;
- ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+ ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ESRCH &&
- (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
- *entryp = rcu_dereference(entry->next);
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
return 1;
- }
kfree_skb(skb);
}
@@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
static unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
- struct nf_hook_entry **entryp)
+ const struct nf_hook_entries *hooks,
+ unsigned int *index)
{
- unsigned int verdict;
+ const struct nf_hook_entry *hook;
+ unsigned int verdict, i = *index;
- do {
+ while (i < hooks->num_hook_entries) {
+ hook = &hooks->hooks[i];
repeat:
- verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+ verdict = nf_hook_entry_hookfn(hook, skb, state);
if (verdict != NF_ACCEPT) {
if (verdict != NF_REPEAT)
return verdict;
goto repeat;
}
- *entryp = rcu_dereference((*entryp)->next);
- } while (*entryp);
+ i++;
+ }
+ *index = i;
return NF_ACCEPT;
}
+/* Caller must hold rcu read-side lock */
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
- struct nf_hook_entry *hook_entry = entry->hook;
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
const struct nf_afinfo *afinfo;
+ const struct net *net;
+ unsigned int i;
int err;
+ u8 pf;
+
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+ hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
nf_queue_entry_release_refs(entry);
+ i = entry->hook_index;
+ if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+ kfree_skb(skb);
+ kfree(entry);
+ return;
+ }
+
+ hook_entry = &hooks->hooks[i];
+
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT)
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
if (verdict == NF_ACCEPT) {
- hook_entry = rcu_dereference(hook_entry->next);
- if (hook_entry)
next_hook:
- verdict = nf_iterate(skb, &entry->state, &hook_entry);
+ ++i;
+ verdict = nf_iterate(skb, &entry->state, hooks, &i);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
-okfn:
local_bh_disable();
entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, &entry->state, &hook_entry, verdict);
- if (err == 1) {
- if (hook_entry)
- goto next_hook;
- goto okfn;
- }
+ err = nf_queue(skb, &entry->state, hooks, i, verdict);
+ if (err == 1)
+ goto next_hook;
break;
case NF_STOLEN:
break;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e58b362..d2a9e6b5d01f 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -33,7 +33,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
reg->get_optmin, reg->get_optmax))) {
- NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..929927171426 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table == NULL)
goto err2;
- nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+ table->name = nla_strdup(name, GFP_KERNEL);
+ if (table->name == NULL)
+ goto err3;
+
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err3;
+ goto err4;
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
+err4:
+ kfree(table->name);
err3:
kfree(table);
err2:
@@ -855,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
if (IS_ERR(table))
return PTR_ERR(table);
+ if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ table->use > 0)
+ return -EBUSY;
+
ctx.afi = afi;
ctx.table = table;
@@ -865,6 +874,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
+ kfree(ctx->table->name);
kfree(ctx->table);
module_put(ctx->afi->owner);
}
@@ -1240,10 +1250,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
module_put(basechain->type->owner);
free_percpu(basechain->stats);
+ if (basechain->stats)
+ static_branch_dec(&nft_counters_enabled);
if (basechain->ops[0].dev != NULL)
dev_put(basechain->ops[0].dev);
+ kfree(chain->name);
kfree(basechain);
} else {
+ kfree(chain->name);
kfree(chain);
}
}
@@ -1325,155 +1339,18 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
dev_put(hook->dev);
}
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ u8 policy, bool create)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nlattr * uninitialized_var(name);
- struct nft_af_info *afi;
- struct nft_table *table;
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats __percpu *stats;
+ struct net *net = ctx->net;
struct nft_chain *chain;
- struct nft_base_chain *basechain = NULL;
- u8 genmask = nft_genmask_next(net);
- int family = nfmsg->nfgen_family;
- u8 policy = NF_ACCEPT;
- u64 handle = 0;
unsigned int i;
- struct nft_stats __percpu *stats;
int err;
- bool create;
- struct nft_ctx ctx;
-
- create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
-
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
- if (IS_ERR(table))
- return PTR_ERR(table);
-
- chain = NULL;
- name = nla[NFTA_CHAIN_NAME];
-
- if (nla[NFTA_CHAIN_HANDLE]) {
- handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
- chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
- if (IS_ERR(chain))
- return PTR_ERR(chain);
- } else {
- chain = nf_tables_chain_lookup(table, name, genmask);
- if (IS_ERR(chain)) {
- if (PTR_ERR(chain) != -ENOENT)
- return PTR_ERR(chain);
- chain = NULL;
- }
- }
-
- if (nla[NFTA_CHAIN_POLICY]) {
- if (chain != NULL &&
- !nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- if (chain == NULL &&
- nla[NFTA_CHAIN_HOOK] == NULL)
- return -EOPNOTSUPP;
-
- policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
- switch (policy) {
- case NF_DROP:
- case NF_ACCEPT:
- break;
- default:
- return -EINVAL;
- }
- }
-
- if (chain != NULL) {
- struct nft_stats *stats = NULL;
- struct nft_trans *trans;
-
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
-
- if (nla[NFTA_CHAIN_HOOK]) {
- struct nft_base_chain *basechain;
- struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
-
- if (!nft_is_base_chain(chain))
- return -EBUSY;
-
- err = nft_chain_parse_hook(net, nla, afi, &hook,
- create);
- if (err < 0)
- return err;
-
- basechain = nft_base_chain(chain);
- if (basechain->type != hook.type) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
-
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
- }
- nft_chain_release_hook(&hook);
- }
-
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- struct nft_chain *chain2;
-
- chain2 = nf_tables_chain_lookup(table,
- nla[NFTA_CHAIN_NAME],
- genmask);
- if (IS_ERR(chain2))
- return PTR_ERR(chain2);
- }
-
- if (nla[NFTA_CHAIN_COUNTERS]) {
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats))
- return PTR_ERR(stats);
- }
-
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
- trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
- sizeof(struct nft_trans_chain));
- if (trans == NULL) {
- free_percpu(stats);
- return -ENOMEM;
- }
-
- nft_trans_chain_stats(trans) = stats;
- nft_trans_chain_update(trans) = true;
-
- if (nla[NFTA_CHAIN_POLICY])
- nft_trans_chain_policy(trans) = policy;
- else
- nft_trans_chain_policy(trans) = -1;
-
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- nla_strlcpy(nft_trans_chain_name(trans), name,
- NFT_CHAIN_MAXNAMELEN);
- }
- list_add_tail(&trans->list, &net->nft.commit_list);
- return 0;
- }
if (table->use == UINT_MAX)
return -EOVERFLOW;
@@ -1504,14 +1381,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return PTR_ERR(stats);
}
basechain->stats = stats;
- } else {
- stats = netdev_alloc_pcpu_stats(struct nft_stats);
- if (stats == NULL) {
- nft_chain_release_hook(&hook);
- kfree(basechain);
- return -ENOMEM;
- }
- rcu_assign_pointer(basechain->stats, stats);
+ static_branch_inc(&nft_counters_enabled);
}
hookfn = hook.type->hooks[hook.num];
@@ -1539,31 +1409,204 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (chain == NULL)
return -ENOMEM;
}
-
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ if (!chain->name) {
+ err = -ENOMEM;
+ goto err1;
+ }
err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
- err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ ctx->chain = chain;
+ err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
+
return 0;
err2:
nf_tables_unregister_hooks(net, table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
+
return err;
}
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ bool create)
+{
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_chain *chain = ctx->chain;
+ struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats *stats = NULL;
+ struct nft_chain_hook hook;
+ const struct nlattr *name;
+ struct nf_hook_ops *ops;
+ struct nft_trans *trans;
+ int err, i;
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+ if (!nft_is_base_chain(chain))
+ return -EBUSY;
+
+ err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+ create);
+ if (err < 0)
+ return err;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type != hook.type) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < afi->nops; i++) {
+ ops = &basechain->ops[i];
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ }
+ nft_chain_release_hook(&hook);
+ }
+
+ if (nla[NFTA_CHAIN_HANDLE] &&
+ nla[NFTA_CHAIN_NAME]) {
+ struct nft_chain *chain2;
+
+ chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
+ genmask);
+ if (IS_ERR(chain2))
+ return PTR_ERR(chain2);
+ }
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ if (!nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
+ }
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL) {
+ free_percpu(stats);
+ return -ENOMEM;
+ }
+
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
+
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
+
+ name = nla[NFTA_CHAIN_NAME];
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nft_trans_chain_name(trans) =
+ nla_strdup(name, GFP_KERNEL);
+ if (!nft_trans_chain_name(trans)) {
+ kfree(trans);
+ free_percpu(stats);
+ return -ENOMEM;
+ }
+ }
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr * uninitialized_var(name);
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ u8 policy = NF_ACCEPT;
+ struct nft_ctx ctx;
+ u64 handle = 0;
+ bool create;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = NULL;
+ name = nla[NFTA_CHAIN_NAME];
+
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
+ chain = nf_tables_chain_lookup(table, name, genmask);
+ if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) != -ENOENT)
+ return PTR_ERR(chain);
+ chain = NULL;
+ }
+ }
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ if (chain != NULL &&
+ !nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ if (chain == NULL &&
+ nla[NFTA_CHAIN_HOOK] == NULL)
+ return -EOPNOTSUPP;
+
+ policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
+ switch (policy) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+
+ if (chain != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ return nf_tables_updchain(&ctx, genmask, policy, create);
+ }
+
+ return nf_tables_addchain(&ctx, family, genmask, policy, create);
+}
+
static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -1574,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
+ struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ u32 use;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1588,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->use > 0)
+
+ if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ chain->use > 0)
return -EBUSY;
nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ use = chain->use;
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (!nft_is_active_next(net, rule))
+ continue;
+ use--;
+
+ err = nft_delrule(&ctx, rule);
+ if (err < 0)
+ return err;
+ }
+
+ /* There are rules and elements that are still holding references to us,
+ * we cannot do a recursive removal in this case.
+ */
+ if (use > 0)
+ return -EBUSY;
+
return nft_delchain(&ctx);
}
@@ -1977,8 +2042,8 @@ err:
}
struct nft_rule_dump_ctx {
- char table[NFT_TABLE_MAXNAMELEN];
- char chain[NFT_CHAIN_MAXNAMELEN];
+ char *table;
+ char *chain;
};
static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2002,7 +2067,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx && ctx->table[0] &&
+ if (ctx && ctx->table &&
strcmp(ctx->table, table->name) != 0)
continue;
@@ -2042,7 +2107,13 @@ done:
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_rule_dump_ctx *ctx = cb->data;
+
+ if (ctx) {
+ kfree(ctx->table);
+ kfree(ctx->chain);
+ kfree(ctx);
+ }
return 0;
}
@@ -2074,12 +2145,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (!ctx)
return -ENOMEM;
- if (nla[NFTA_RULE_TABLE])
- nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
- sizeof(ctx->table));
- if (nla[NFTA_RULE_CHAIN])
- nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
- sizeof(ctx->chain));
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+ GFP_KERNEL);
+ if (!ctx->table) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+ GFP_KERNEL);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
c.data = ctx;
}
@@ -2621,7 +2703,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
unsigned long *inuse;
unsigned int n = 0, min = 0;
- p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+ p = strchr(name, '%');
if (p != NULL) {
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
@@ -2652,7 +2734,10 @@ cont:
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, min + n);
+ set->name = kasprintf(GFP_KERNEL, name, min + n);
+ if (!set->name)
+ return -ENOMEM;
+
list_for_each_entry(i, &ctx->table->sets, list) {
if (!nft_is_active_next(ctx->net, i))
continue;
@@ -2929,7 +3014,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
- char name[NFT_SET_MAXNAMELEN];
+ char *name;
unsigned int size;
bool create;
u64 timeout;
@@ -3075,8 +3160,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
goto err1;
}
- nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+ name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+ if (!name) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
err = nf_tables_set_alloc_name(&ctx, set, name);
+ kfree(name);
if (err < 0)
goto err2;
@@ -3126,6 +3217,7 @@ static void nft_set_destroy(struct nft_set *set)
{
set->ops->destroy(set);
module_put(set->ops->type->owner);
+ kfree(set->name);
kvfree(set);
}
@@ -3159,7 +3251,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings))
+
+ if (!list_empty(&set->bindings) ||
+ (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
return -EBUSY;
return nft_delset(&ctx, set);
@@ -4209,7 +4303,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
list_for_each_entry(obj, &table->objects, list) {
if (!nla_strcmp(nla, obj->name) &&
- objtype == obj->type->type &&
+ objtype == obj->ops->type->type &&
nft_active_genmask(obj, genmask))
return obj;
}
@@ -4231,6 +4325,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
+ const struct nft_object_ops *ops;
struct nft_object *obj;
int err;
@@ -4243,16 +4338,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
}
+ if (type->select_ops) {
+ ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+ if (IS_ERR(ops)) {
+ err = PTR_ERR(ops);
+ goto err1;
+ }
+ } else {
+ ops = type->ops;
+ }
+
err = -ENOMEM;
- obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+ obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
if (obj == NULL)
goto err1;
- err = type->init(ctx, (const struct nlattr * const *)tb, obj);
+ err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
- obj->type = type;
+ obj->ops = ops;
+
return obj;
err2:
kfree(obj);
@@ -4268,7 +4374,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
nest = nla_nest_start(skb, attr);
if (!nest)
goto nla_put_failure;
- if (obj->type->dump(skb, obj, reset) < 0)
+ if (obj->ops->dump(skb, obj, reset) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
@@ -4363,18 +4469,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err1;
}
obj->table = table;
- nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+ obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+ if (!obj->name) {
+ err = -ENOMEM;
+ goto err2;
+ }
err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&obj->list, &table->objects);
table->use++;
return 0;
+err3:
+ kfree(obj->name);
err2:
- if (obj->type->destroy)
- obj->type->destroy(obj);
+ if (obj->ops->destroy)
+ obj->ops->destroy(obj);
kfree(obj);
err1:
module_put(type->owner);
@@ -4401,7 +4513,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
- nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+ nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
goto nla_put_failure;
@@ -4415,7 +4527,7 @@ nla_put_failure:
}
struct nft_obj_filter {
- char table[NFT_OBJ_MAXNAMELEN];
+ char *table;
u32 type;
};
@@ -4455,7 +4567,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
goto cont;
if (filter &&
filter->type != NFT_OBJECT_UNSPEC &&
- obj->type->type != filter->type)
+ obj->ops->type->type != filter->type)
goto cont;
if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
@@ -4480,7 +4592,10 @@ done:
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_obj_filter *filter = cb->data;
+
+ kfree(filter->table);
+ kfree(filter);
return 0;
}
@@ -4494,9 +4609,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
if (!filter)
return ERR_PTR(-ENOMEM);
- if (nla[NFTA_OBJ_TABLE])
- nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
- NFT_TABLE_MAXNAMELEN);
+ if (nla[NFTA_OBJ_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+ if (!filter->table) {
+ kfree(filter);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
if (nla[NFTA_OBJ_TYPE])
filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
@@ -4576,10 +4695,11 @@ err:
static void nft_obj_destroy(struct nft_object *obj)
{
- if (obj->type->destroy)
- obj->type->destroy(obj);
+ if (obj->ops->destroy)
+ obj->ops->destroy(obj);
- module_put(obj->type->owner);
+ module_put(obj->ops->type->owner);
+ kfree(obj->name);
kfree(obj);
}
@@ -4662,6 +4782,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
+ char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4673,7 +4794,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+ nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+ nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4842,7 +4965,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans)[0])
+ if (nft_trans_chain_name(trans))
strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..dfd0bf3810d2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_RULE] = "rule",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
return true;
}
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_stats *stats;
+
+ local_bh_disable();
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ local_bh_enable();
+}
+
struct nft_jumpstack {
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
nft_trace_packet(&info, basechain, NULL, -1,
NFT_TRACETYPE_POLICY);
- rcu_read_lock_bh();
- stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
- u64_stats_update_begin(&stats->syncp);
- stats->pkts++;
- stats->bytes += pkt->skb->len;
- u64_stats_update_end(&stats->syncp);
- rcu_read_unlock_bh();
+ if (static_branch_unlikely(&nft_counters_enabled))
+ nft_update_chain_stats(basechain, pkt);
return nft_base_chain(basechain)->policy;
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7a5793..e1dc527a493b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
NFTA_TRACE_PAD);
}
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+ switch (info->type) {
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ break;
+ default:
+ return false;
+ }
+
+ switch (info->verdict->code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
@@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info)
return;
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
- nla_total_size(NFT_TABLE_MAXNAMELEN) +
- nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(strlen(info->chain->table->name)) +
+ nla_total_size(strlen(info->chain->name)) +
nla_total_size_64bit(sizeof(__be64)) + /* rule handle */
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
nla_total_size(sizeof(u32)) + /* verdict code */
- nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
nla_total_size(sizeof(u32)) + /* id */
nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
nla_total_size(sizeof(u32)) + /* nfproto */
nla_total_size(sizeof(u32)); /* policy */
+ if (nft_trace_have_verdict_chain(info))
+ size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb)
return;
@@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (trace_fill_id(skb, pkt->skb))
goto nla_put_failure;
- if (info->chain) {
- if (nla_put_string(skb, NFTA_TRACE_CHAIN,
- info->chain->name))
- goto nla_put_failure;
- if (nla_put_string(skb, NFTA_TRACE_TABLE,
- info->chain->table->name))
- goto nla_put_failure;
- }
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+ goto nla_put_failure;
if (nf_trace_fill_rule_info(skb, info))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 400e9ae97153..32b1c0b44e79 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+ const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
int ret = 0;
@@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
{
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout, *matching = NULL;
char *name;
int ret;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+ const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
+ const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts;
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
- unsigned int *timeouts;
int ret;
if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -401,7 +402,7 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
- __u16 l3num;
- __u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct sk_buff *skb2;
int ret, err;
+ __u16 l3num;
+ __u8 l4num;
if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
return -EINVAL;
@@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
- rcu_read_lock();
list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
break;
}
err:
- rcu_read_unlock();
return matching;
}
@@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
{
struct ctnl_timeout *cur, *tmp;
+ nf_ct_unconfirmed_destroy(net);
ctnl_untimeout(net, NULL);
list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba95dbb4..cad6498f10b0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ nla_put_failure:
return -1;
}
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 16fa04086880..c9796629858f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
#include "../bridge/br_private.h"
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#define NFQNL_QMAX_DEFAULT 1024
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -612,6 +616,18 @@ nlmsg_failure:
return NULL;
}
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+ const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+ if (ct && ((ct->status & flags) == IPS_DYING))
+ return true;
+#endif
+ return false;
+}
+
static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
}
spin_lock_bh(&queue->lock);
+ if (nf_ct_drop_unconfirmed(entry))
+ goto err_out_free_nskb;
+
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
@@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
unsigned int instances = 0;
int i;
- rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
@@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
instances++;
}
}
- rcu_read_unlock();
return instances;
}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 67a710ebde09..eefe3b409925 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static struct nft_object_type nft_counter_obj __read_mostly = {
- .type = NFT_OBJECT_COUNTER,
+static struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+ .type = &nft_counter_obj_type,
.size = sizeof(struct nft_counter_percpu_priv),
- .maxattr = NFTA_COUNTER_MAX,
- .policy = nft_counter_policy,
.eval = nft_counter_obj_eval,
.init = nft_counter_obj_init,
.destroy = nft_counter_obj_destroy,
.dump = nft_counter_obj_dump,
+};
+
+static struct nft_object_type nft_counter_obj_type __read_mostly = {
+ .type = NFT_OBJECT_COUNTER,
+ .ops = &nft_counter_obj_ops,
+ .maxattr = NFTA_COUNTER_MAX,
+ .policy = nft_counter_policy,
.owner = THIS_MODULE,
};
@@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void)
for_each_possible_cpu(cpu)
seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
- err = nft_register_obj(&nft_counter_obj);
+ err = nft_register_obj(&nft_counter_obj_type);
if (err < 0)
return err;
@@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void)
return 0;
err1:
- nft_unregister_obj(&nft_counter_obj);
+ nft_unregister_obj(&nft_counter_obj_type);
return err;
}
static void __exit nft_counter_module_exit(void)
{
nft_unregister_expr(&nft_counter_type);
- nft_unregister_obj(&nft_counter_obj);
+ nft_unregister_obj(&nft_counter_obj_type);
}
module_init(nft_counter_module_init);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1678e9e75e8e..bd0975d7dd6f 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -904,15 +904,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
};
-static struct nft_object_type nft_ct_helper_obj __read_mostly = {
- .type = NFT_OBJECT_CT_HELPER,
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+ .type = &nft_ct_helper_obj_type,
.size = sizeof(struct nft_ct_helper_obj),
- .maxattr = NFTA_CT_HELPER_MAX,
- .policy = nft_ct_helper_policy,
.eval = nft_ct_helper_obj_eval,
.init = nft_ct_helper_obj_init,
.destroy = nft_ct_helper_obj_destroy,
.dump = nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .ops = &nft_ct_helper_obj_ops,
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
.owner = THIS_MODULE,
};
@@ -930,7 +936,7 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
- err = nft_register_obj(&nft_ct_helper_obj);
+ err = nft_register_obj(&nft_ct_helper_obj_type);
if (err < 0)
goto err2;
@@ -945,7 +951,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
- nft_unregister_obj(&nft_ct_helper_obj);
+ nft_unregister_obj(&nft_ct_helper_obj_type);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe5845f..a0a93d987a3b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -23,6 +24,7 @@ struct nft_exthdr {
u8 len;
u8 op;
enum nft_registers dreg:8;
+ enum nft_registers sreg:8;
u8 flags;
};
@@ -61,6 +63,26 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+ unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+ struct tcphdr *tcph;
+
+ if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ return NULL;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+ if (!tcph)
+ return NULL;
+
+ *tcphdr_len = __tcp_hdrlen(tcph);
+ if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+ return NULL;
+
+ return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct tcphdr *tcph;
u8 *opt;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
- if (!tcph)
- goto err;
-
- tcphdr_len = __tcp_hdrlen(tcph);
- if (tcphdr_len < sizeof(*tcph))
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
goto err;
@@ -115,6 +126,88 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, optl, tcphdr_len, offset;
+ struct tcphdr *tcph;
+ u8 *opt;
+ u32 src;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+ return;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ union {
+ u8 octet;
+ __be16 v16;
+ __be32 v32;
+ } old, new;
+
+ optl = optlen(opt, i);
+
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+ return;
+
+ if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ return;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ &tcphdr_len);
+ if (!tcph)
+ return;
+
+ src = regs->data[priv->sreg];
+ offset = i + priv->offset;
+
+ switch (priv->len) {
+ case 2:
+ old.v16 = get_unaligned((u16 *)(opt + offset));
+ new.v16 = src;
+
+ switch (priv->type) {
+ case TCPOPT_MSS:
+ /* increase can cause connection to stall */
+ if (ntohs(old.v16) <= ntohs(new.v16))
+ return;
+ break;
+ }
+
+ if (old.v16 == new.v16)
+ return;
+
+ put_unaligned(new.v16, (u16*)(opt + offset));
+ inet_proto_csum_replace2(&tcph->check, pkt->skb,
+ old.v16, new.v16, false);
+ break;
+ case 4:
+ new.v32 = src;
+ old.v32 = get_unaligned((u32 *)(opt + offset));
+
+ if (old.v32 == new.v32)
+ return;
+
+ put_unaligned(new.v32, (u32*)(opt + offset));
+ inet_proto_csum_replace4(&tcph->check, pkt->skb,
+ old.v32, new.v32, false);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return;
+ }
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, priv->len);
}
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- const struct nft_exthdr *priv = nft_expr_priv(expr);
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+ int err;
- if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
- goto nla_put_failure;
+ if (!tb[NFTA_EXTHDR_SREG] ||
+ !tb[NFTA_EXTHDR_TYPE] ||
+ !tb[NFTA_EXTHDR_OFFSET] ||
+ !tb[NFTA_EXTHDR_LEN])
+ return -EINVAL;
+
+ if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+ return -EINVAL;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+
+ if (offset < 2)
+ return -EOPNOTSUPP;
+
+ switch (len) {
+ case 2: break;
+ case 4: break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+ if (err < 0)
+ return err;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->offset = offset;
+ priv->len = len;
+ priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
+ priv->flags = flags;
+ priv->op = op;
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -193,6 +331,26 @@ nla_put_failure:
return -1;
}
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
static struct nft_expr_type nft_exthdr_type;
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
@@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_set_eval,
+ .init = nft_exthdr_tcp_set_init,
+ .dump = nft_exthdr_dump_set,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_EXTHDR_OP])
return &nft_exthdr_ipv6_ops;
- op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+ if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+ return ERR_PTR(-EOPNOTSUPP);
+
+ op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
switch (op) {
case NFT_EXTHDR_OP_TCPOPT:
- return &nft_exthdr_tcp_ops;
+ if (tb[NFTA_EXTHDR_SREG])
+ return &nft_exthdr_tcp_set_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_tcp_ops;
+ break;
case NFT_EXTHDR_OP_IPV6:
- return &nft_exthdr_ipv6_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_ipv6_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 000000000000..3997ee36cfbd
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ switch (ntohs(pkt->skb->protocol)) {
+ case ETH_P_IP:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib4_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib4_eval_type(expr, regs, pkt);
+ }
+ break;
+ case ETH_P_IPV6:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib6_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib6_eval_type(expr, regs, pkt);
+ }
+ break;
+ }
+
+ regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+ .type = &nft_fib_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib_netdev_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "fib",
+ .ops = &nft_fib_netdev_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 14538b1d4d11..a9fc298ef4c3 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -168,9 +168,9 @@ static const struct nft_expr_ops nft_limit_pkts_ops = {
.dump = nft_limit_pkts_dump,
};
-static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_limit_bytes_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
@@ -179,29 +179,29 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
-static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_limit_bytes_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_init(priv, tb);
}
-static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_limit_bytes_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
-static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
+static const struct nft_expr_ops nft_limit_bytes_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
- .eval = nft_limit_pkt_bytes_eval,
- .init = nft_limit_pkt_bytes_init,
- .dump = nft_limit_pkt_bytes_dump,
+ .eval = nft_limit_bytes_eval,
+ .init = nft_limit_bytes_init,
+ .dump = nft_limit_bytes_dump,
};
static const struct nft_expr_ops *
@@ -215,7 +215,7 @@ nft_limit_select_ops(const struct nft_ctx *ctx,
case NFT_LIMIT_PKTS:
return &nft_limit_pkts_ops;
case NFT_LIMIT_PKT_BYTES:
- return &nft_limit_pkt_bytes_ops;
+ return &nft_limit_bytes_ops;
}
return ERR_PTR(-EOPNOTSUPP);
}
@@ -229,14 +229,133 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
.owner = THIS_MODULE,
};
+static void nft_limit_obj_pkts_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+ if (nft_limit_eval(&priv->limit, priv->cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_limit_pkts *priv = nft_obj_data(obj);
+ int err;
+
+ err = nft_limit_init(&priv->limit, tb);
+ if (err < 0)
+ return err;
+
+ priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+ return 0;
+}
+
+static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset)
+{
+ const struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+ return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_pkts_ops = {
+ .type = &nft_limit_obj_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .init = nft_limit_obj_pkts_init,
+ .eval = nft_limit_obj_pkts_eval,
+ .dump = nft_limit_obj_pkts_dump,
+};
+
+static void nft_limit_obj_bytes_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_obj_data(obj);
+ u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+ if (nft_limit_eval(priv, cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_limit *priv = nft_obj_data(obj);
+
+ return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset)
+{
+ const struct nft_limit *priv = nft_obj_data(obj);
+
+ return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_bytes_ops = {
+ .type = &nft_limit_obj_type,
+ .size = sizeof(struct nft_limit),
+ .init = nft_limit_obj_bytes_init,
+ .eval = nft_limit_obj_bytes_eval,
+ .dump = nft_limit_obj_bytes_dump,
+};
+
+static const struct nft_object_ops *
+nft_limit_obj_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (!tb[NFTA_LIMIT_TYPE])
+ return &nft_limit_obj_pkts_ops;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+ case NFT_LIMIT_PKTS:
+ return &nft_limit_obj_pkts_ops;
+ case NFT_LIMIT_PKT_BYTES:
+ return &nft_limit_obj_bytes_ops;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_object_type nft_limit_obj_type __read_mostly = {
+ .select_ops = nft_limit_obj_select_ops,
+ .type = NFT_OBJECT_LIMIT,
+ .maxattr = NFTA_LIMIT_MAX,
+ .policy = nft_limit_policy,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_limit_module_init(void)
{
- return nft_register_expr(&nft_limit_type);
+ int err;
+
+ err = nft_register_obj(&nft_limit_obj_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_limit_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_limit_obj_type);
+ return err;
}
static void __exit nft_limit_module_exit(void)
{
nft_unregister_expr(&nft_limit_type);
+ nft_unregister_obj(&nft_limit_obj_type);
}
module_init(nft_limit_module_init);
@@ -245,3 +364,4 @@ module_exit(nft_limit_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("limit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1dd428fbaaa3..7bcdc48f3d73 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->type->eval(obj, regs, pkt);
+ obj->ops->eval(obj, regs, pkt);
}
static int nft_objref_init(const struct nft_ctx *ctx,
@@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_object *obj = nft_objref_priv(expr);
if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
- nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+ nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+ htonl(obj->ops->type->type)))
goto nla_put_failure;
return 0;
@@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
return;
}
obj = *nft_set_ext_obj(ext);
- obj->type->eval(obj, regs, pkt);
+ obj->ops->eval(obj, regs, pkt);
}
static int nft_objref_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bbd45b0..e110b0ebbf58 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
if (!uh)
return false;
- return uh->check;
+ return (__force bool)uh->check;
}
static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 25e33159be57..0ed124a93fcf 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
return nft_quota_do_dump(skb, priv, reset);
}
-static struct nft_object_type nft_quota_obj __read_mostly = {
- .type = NFT_OBJECT_QUOTA,
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+ .type = &nft_quota_obj_type,
.size = sizeof(struct nft_quota),
- .maxattr = NFTA_QUOTA_MAX,
- .policy = nft_quota_policy,
.init = nft_quota_obj_init,
.eval = nft_quota_obj_eval,
.dump = nft_quota_obj_dump,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+ .type = NFT_OBJECT_QUOTA,
+ .ops = &nft_quota_obj_ops,
+ .maxattr = NFTA_QUOTA_MAX,
+ .policy = nft_quota_policy,
.owner = THIS_MODULE,
};
@@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void)
{
int err;
- err = nft_register_obj(&nft_quota_obj);
+ err = nft_register_obj(&nft_quota_obj_type);
if (err < 0)
return err;
@@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void)
return 0;
err1:
- nft_unregister_obj(&nft_quota_obj);
+ nft_unregister_obj(&nft_quota_obj_type);
return err;
}
static void __exit nft_quota_module_exit(void)
{
nft_unregister_expr(&nft_quota_type);
- nft_unregister_obj(&nft_quota_obj);
+ nft_unregister_obj(&nft_quota_obj_type);
}
module_init(nft_quota_module_init);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8f88d0..a6b7d05aeacf 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,43 @@ struct nft_rt {
enum nft_registers dreg:8;
};
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+ u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+ const struct sk_buff *skb = pkt->skb;
+ const struct nf_afinfo *ai;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+ minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+ minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ break;
+ }
+
+ ai = nf_get_afinfo(nft_pf(pkt));
+ if (ai) {
+ struct dst_entry *dst = NULL;
+
+ ai->route(nft_net(pkt), &dst, &fl, false);
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
+ }
+ }
+
+ if (mtu <= minlen || mtu > 0xffff)
+ return TCP_MSS_DEFAULT;
+
+ return mtu - minlen;
+}
+
static void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = rt_nexthop((const struct rtable *)dst,
- ip_hdr(skb)->daddr);
+ *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
+ case NFT_RT_TCPMSS:
+ nft_reg_store16(dest, get_tcpmss(pkt, dst));
+ break;
default:
WARN_ON(1);
goto err;
@@ -67,7 +107,7 @@ err:
regs->verdict.code = NFT_BREAK;
}
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_DREG] = { .type = NLA_U32 },
[NFTA_RT_KEY] = { .type = NLA_U32 },
};
@@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_NEXTHOP6:
len = sizeof(struct in6_addr);
break;
+ case NFT_RT_TCPMSS:
+ len = sizeof(u16);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -118,6 +161,29 @@ nla_put_failure:
return -1;
}
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->key) {
+ case NFT_RT_NEXTHOP4:
+ case NFT_RT_NEXTHOP6:
+ case NFT_RT_CLASSID:
+ return 0;
+ case NFT_RT_TCPMSS:
+ hooks = (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
static struct nft_expr_type nft_rt_type;
static const struct nft_expr_ops nft_rt_get_ops = {
.type = &nft_rt_type,
@@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.eval = nft_rt_get_eval,
.init = nft_rt_get_init,
.dump = nft_rt_get_dump,
+ .validate = nft_rt_validate,
};
static struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382f1d49..d83a4ec5900d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
#include <net/netfilter/nf_tables.h>
struct nft_rbtree {
- rwlock_t lock;
struct rb_root root;
+ rwlock_t lock;
+ seqcount_t count;
};
struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
}
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext,
+ unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const void *this;
int d;
- read_lock_bh(&priv->lock);
- parent = priv->root.rb_node;
+ parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
this = nft_set_ext_key(&rbe->ext);
d = memcmp(this, key, set->klen);
if (d < 0) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
continue;
interval = rbe;
} else if (d > 0)
- parent = parent->rb_right;
+ parent = rcu_dereference_raw(parent->rb_right);
else {
if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
continue;
}
if (nft_rbtree_interval_end(rbe))
goto out;
- read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -84,15 +87,32 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- read_unlock_bh(&priv->lock);
return false;
}
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ bool ret;
+
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return ret;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ read_unlock_bh(&priv->lock);
+
+ return ret;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
@@ -130,7 +150,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
- rb_link_node(&new->node, parent, p);
+ rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
}
@@ -144,7 +164,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
int err;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
return err;
@@ -158,7 +180,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
}
@@ -264,6 +288,7 @@ static int nft_rbtree_init(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
rwlock_init(&priv->lock);
+ seqcount_init(&priv->count);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e1648238a9c9..c83a3b5e1c6c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1192,16 +1192,10 @@ xt_replace_table(struct xt_table *table,
#ifdef CONFIG_AUDIT
if (audit_enabled) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(current->audit_context, GFP_KERNEL,
- AUDIT_NETFILTER_CFG);
- if (ab) {
- audit_log_format(ab, "table=%s family=%u entries=%u",
- table->name, table->af,
- private->number);
- audit_log_end(ab);
- }
+ audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ table->name, table->af, private->number);
}
#endif
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37de886..5a152e2acfd5 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
u8 proto;
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index e45a01255e70..58aa9dd3c5b7 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
struct nf_nat_range newrange;
- NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
- xt_hooknum(par) == NF_INET_POST_ROUTING ||
- xt_hooknum(par) == NF_INET_LOCAL_OUT ||
- xt_hooknum(par) == NF_INET_LOCAL_IN);
+ WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+ xt_hooknum(par) != NF_INET_POST_ROUTING &&
+ xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+ xt_hooknum(par) != NF_INET_LOCAL_IN);
ct = nf_ct_get(skb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c64aca611ac5..9dae4d665965 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- rcu_read_lock();
ai = nf_get_afinfo(family);
if (ai != NULL)
ai->route(net, (struct dst_entry **)&rt, &fl, false);
- rcu_read_unlock();
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35fff6b..17d7705e3bd4 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return user_laddr;
laddr = 0;
- rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
@@ -125,7 +123,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
- in->ifindex);
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -195,7 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
- in->ifindex);
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -208,7 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
- in->ifindex);
+ in->ifindex, 0);
break;
default:
BUG();
@@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
return user_laddr;
laddr = NULL;
- rcu_read_lock();
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
@@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
}
read_unlock_bh(&indev->lock);
}
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e329dabde35f..3b2be2ae6987 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- rcu_read_lock();
-
afinfo = nf_get_afinfo(NFPROTO_IPV6);
if (afinfo != NULL) {
const struct nf_ipv6_ops *v6ops;
@@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
} else {
route_err = 1;
}
- rcu_read_unlock();
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b8fd4ab762ed..ffa8eec980e9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -58,8 +58,7 @@ struct xt_connlimit_rb {
static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
struct xt_connlimit_data {
- struct rb_root climit_root4[CONNLIMIT_SLOTS];
- struct rb_root climit_root6[CONNLIMIT_SLOTS];
+ struct rb_root climit_root[CONNLIMIT_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
@@ -144,7 +143,6 @@ static unsigned int check_hlist(struct net *net,
unsigned int length = 0;
*addit = true;
- rcu_read_lock();
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
@@ -179,8 +177,6 @@ static unsigned int check_hlist(struct net *net,
length++;
}
- rcu_read_unlock();
-
return length;
}
@@ -297,13 +293,11 @@ static int count_them(struct net *net,
int count;
u32 hash;
- if (family == NFPROTO_IPV6) {
+ if (family == NFPROTO_IPV6)
hash = connlimit_iphash6(addr, mask);
- root = &data->climit_root6[hash];
- } else {
+ else
hash = connlimit_iphash(addr->ip & mask->ip);
- root = &data->climit_root4[hash];
- }
+ root = &data->climit_root[hash];
spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
@@ -382,10 +376,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
return -ENOMEM;
}
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- info->data->climit_root4[i] = RB_ROOT;
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- info->data->climit_root6[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ info->data->climit_root[i] = RB_ROOT;
return 0;
}
@@ -416,10 +408,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
nf_ct_netns_put(par->net, par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- destroy_tree(&info->data->climit_root4[i]);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- destroy_tree(&info->data->climit_root6[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ destroy_tree(&info->data->climit_root[i]);
kfree(info->data);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e1874f28b..10d48234f5f4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -56,6 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
}
/* need to declare this at the top */
+static const struct file_operations dl_file_ops_v2;
static const struct file_operations dl_file_ops_v1;
static const struct file_operations dl_file_ops;
@@ -87,8 +88,19 @@ struct dsthash_ent {
unsigned long expires; /* precalculated expiry time */
struct {
unsigned long prev; /* last modification */
- u_int64_t credit;
- u_int64_t credit_cap, cost;
+ union {
+ struct {
+ u_int64_t credit;
+ u_int64_t credit_cap;
+ u_int64_t cost;
+ };
+ struct {
+ u_int32_t interval, prev_window;
+ u_int64_t current_rate;
+ u_int64_t rate;
+ int64_t burst;
+ };
+ };
} rateinfo;
struct rcu_head rcu;
};
@@ -99,7 +111,7 @@ struct xt_hashlimit_htable {
u_int8_t family;
bool rnd_initialized;
- struct hashlimit_cfg2 cfg; /* config */
+ struct hashlimit_cfg3 cfg; /* config */
/* used internally */
spinlock_t lock; /* lock for list_head */
@@ -116,10 +128,10 @@ struct xt_hashlimit_htable {
};
static int
-cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
{
if (revision == 1) {
- struct hashlimit_cfg1 *cfg = from;
+ struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
to->mode = cfg->mode;
to->avg = cfg->avg;
@@ -131,7 +143,19 @@ cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
to->srcmask = cfg->srcmask;
to->dstmask = cfg->dstmask;
} else if (revision == 2) {
- memcpy(to, from, sizeof(struct hashlimit_cfg2));
+ struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+ to->mode = cfg->mode;
+ to->avg = cfg->avg;
+ to->burst = cfg->burst;
+ to->size = cfg->size;
+ to->max = cfg->max;
+ to->gc_interval = cfg->gc_interval;
+ to->expire = cfg->expire;
+ to->srcmask = cfg->srcmask;
+ to->dstmask = cfg->dstmask;
+ } else if (revision == 3) {
+ memcpy(to, from, sizeof(struct hashlimit_cfg3));
} else {
return -EINVAL;
}
@@ -240,13 +264,14 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
}
static void htable_gc(struct work_struct *work);
-static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
const char *name, u_int8_t family,
struct xt_hashlimit_htable **out_hinfo,
int revision)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
+ const struct file_operations *fops;
unsigned int size, i;
int ret;
@@ -268,7 +293,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
*out_hinfo = hinfo;
/* copy match config into hashtable config */
- ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
if (ret)
return ret;
@@ -293,11 +318,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
}
spin_lock_init(&hinfo->lock);
+ switch (revision) {
+ case 1:
+ fops = &dl_file_ops_v1;
+ break;
+ case 2:
+ fops = &dl_file_ops_v2;
+ break;
+ default:
+ fops = &dl_file_ops;
+ }
+
hinfo->pde = proc_create_data(name, 0,
(family == NFPROTO_IPV4) ?
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
- (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
- hinfo);
+ fops, hinfo);
if (hinfo->pde == NULL) {
kfree(hinfo->name);
vfree(hinfo);
@@ -482,6 +517,25 @@ static u32 user2credits_byte(u32 user)
return (u32) (us >> 32);
}
+static u64 user2rate(u64 user)
+{
+ if (user != 0) {
+ return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+ } else {
+ pr_warn("invalid rate from userspace: %llu\n", user);
+ return 0;
+ }
+}
+
+static u64 user2rate_bytes(u64 user)
+{
+ u64 r;
+
+ r = user ? 0xFFFFFFFFULL / user : 0xFFFFFFFFULL;
+ r = (r - 1) << 4;
+ return r;
+}
+
static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
u32 mode, int revision)
{
@@ -491,6 +545,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
if (delta == 0)
return;
+ if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+ u64 interval = dh->rateinfo.interval * HZ;
+
+ if (delta < interval)
+ return;
+
+ dh->rateinfo.prev = now;
+ dh->rateinfo.prev_window =
+ ((dh->rateinfo.current_rate * interval) >
+ (delta * dh->rateinfo.rate));
+ dh->rateinfo.current_rate = 0;
+
+ return;
+ }
+
dh->rateinfo.prev = now;
if (mode & XT_HASHLIMIT_BYTES) {
@@ -515,7 +584,23 @@ static void rateinfo_init(struct dsthash_ent *dh,
struct xt_hashlimit_htable *hinfo, int revision)
{
dh->rateinfo.prev = jiffies;
- if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+ dh->rateinfo.prev_window = 0;
+ dh->rateinfo.current_rate = 0;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ dh->rateinfo.rate = user2rate_bytes(hinfo->cfg.avg);
+ if (hinfo->cfg.burst)
+ dh->rateinfo.burst =
+ hinfo->cfg.burst * dh->rateinfo.rate;
+ else
+ dh->rateinfo.burst = dh->rateinfo.rate;
+ } else {
+ dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+ dh->rateinfo.burst =
+ hinfo->cfg.burst + dh->rateinfo.rate;
+ }
+ dh->rateinfo.interval = hinfo->cfg.interval;
+ } else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
dh->rateinfo.credit_cap = hinfo->cfg.burst;
@@ -648,7 +733,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
static bool
hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
struct xt_hashlimit_htable *hinfo,
- const struct hashlimit_cfg2 *cfg, int revision)
+ const struct hashlimit_cfg3 *cfg, int revision)
{
unsigned long now = jiffies;
struct dsthash_ent *dh;
@@ -659,12 +744,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
- rcu_read_lock_bh();
+ local_bh_disable();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
- rcu_read_unlock_bh();
+ local_bh_enable();
goto hotdrop;
} else if (race) {
/* Already got an entry, update expiration timeout */
@@ -680,6 +765,20 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
}
+ if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+ cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+ dh->rateinfo.current_rate += cost;
+
+ if (!dh->rateinfo.prev_window &&
+ (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+ spin_unlock(&dh->lock);
+ rcu_read_unlock_bh();
+ return !(cfg->mode & XT_HASHLIMIT_INVERT);
+ } else {
+ goto overlimit;
+ }
+ }
+
if (cfg->mode & XT_HASHLIMIT_BYTES)
cost = hashlimit_byte_cost(skb->len, dh);
else
@@ -689,12 +788,13 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
/* below the limit */
dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
return !(cfg->mode & XT_HASHLIMIT_INVERT);
}
+overlimit:
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
/* default match is underlimit - so over the limit, we need to invert */
return cfg->mode & XT_HASHLIMIT_INVERT;
@@ -708,7 +808,7 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
struct xt_hashlimit_htable *hinfo = info->hinfo;
- struct hashlimit_cfg2 cfg = {};
+ struct hashlimit_cfg3 cfg = {};
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -720,17 +820,33 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
}
static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
struct xt_hashlimit_htable *hinfo = info->hinfo;
+ struct hashlimit_cfg3 cfg = {};
+ int ret;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+ struct xt_hashlimit_htable *hinfo = info->hinfo;
- return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+ return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
- struct hashlimit_cfg2 *cfg,
+ struct hashlimit_cfg3 *cfg,
const char *name, int revision)
{
struct net *net = par->net;
@@ -753,7 +869,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
}
/* Check for overflow. */
- if (cfg->mode & XT_HASHLIMIT_BYTES) {
+ if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+ if (cfg->avg == 0) {
+ pr_info("hashlimit invalid rate\n");
+ return -ERANGE;
+ }
+
+ if (cfg->interval == 0) {
+ pr_info("hashlimit invalid interval\n");
+ return -EINVAL;
+ }
+ } else if (cfg->mode & XT_HASHLIMIT_BYTES) {
if (user2credits_byte(cfg->avg) == 0) {
pr_info("overflow, rate too high: %llu\n", cfg->avg);
return -EINVAL;
@@ -784,7 +910,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
- struct hashlimit_cfg2 cfg = {};
+ struct hashlimit_cfg3 cfg = {};
int ret;
if (info->name[sizeof(info->name) - 1] != '\0')
@@ -799,15 +925,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
&cfg, info->name, 1);
}
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ struct hashlimit_cfg3 cfg = {};
+ int ret;
+
+ if (info->name[sizeof(info->name) - 1] != '\0')
+ return -EINVAL;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_check_common(par, &info->hinfo,
+ &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
if (info->name[sizeof(info->name) - 1] != '\0')
return -EINVAL;
return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
- info->name, 2);
+ info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+ const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+ htable_put(info->hinfo);
}
static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
@@ -819,7 +970,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
- const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
htable_put(info->hinfo);
}
@@ -840,9 +991,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.name = "hashlimit",
.revision = 2,
.family = NFPROTO_IPV4,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v2,
.matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+ .checkentry = hashlimit_mt_check_v2,
+ .destroy = hashlimit_mt_destroy_v2,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo3),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -863,9 +1025,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.name = "hashlimit",
.revision = 2,
.family = NFPROTO_IPV6,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v2,
.matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+ .checkentry = hashlimit_mt_check_v2,
+ .destroy = hashlimit_mt_destroy_v2,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo3),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -947,6 +1120,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
}
}
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
+{
+ const struct xt_hashlimit_htable *ht = s->private;
+
+ spin_lock(&ent->lock);
+ /* recalculate to show accurate numbers */
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+ dl_seq_print(ent, family, s);
+
+ spin_unlock(&ent->lock);
+ return seq_has_overflowed(s);
+}
+
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
@@ -969,7 +1157,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
- rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
dl_seq_print(ent, family, s);
@@ -977,6 +1165,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
return seq_has_overflowed(s);
}
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+ struct xt_hashlimit_htable *htable = s->private;
+ unsigned int *bucket = (unsigned int *)v;
+ struct dsthash_ent *ent;
+
+ if (!hlist_empty(&htable->hash[*bucket])) {
+ hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+ if (dl_seq_real_show_v2(ent, htable->family, s))
+ return -1;
+ }
+ return 0;
+}
+
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
struct xt_hashlimit_htable *htable = s->private;
@@ -1012,6 +1214,13 @@ static const struct seq_operations dl_seq_ops_v1 = {
.show = dl_seq_show_v1
};
+static const struct seq_operations dl_seq_ops_v2 = {
+ .start = dl_seq_start,
+ .next = dl_seq_next,
+ .stop = dl_seq_stop,
+ .show = dl_seq_show_v2
+};
+
static const struct seq_operations dl_seq_ops = {
.start = dl_seq_start,
.next = dl_seq_next,
@@ -1019,6 +1228,18 @@ static const struct seq_operations dl_seq_ops = {
.show = dl_seq_show
};
+static int dl_proc_open_v2(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &dl_seq_ops_v2);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+
+ sf->private = PDE_DATA(inode);
+ }
+ return ret;
+}
+
static int dl_proc_open_v1(struct inode *inode, struct file *file)
{
int ret = seq_open(file, &dl_seq_ops_v1);
@@ -1042,6 +1263,14 @@ static int dl_proc_open(struct inode *inode, struct file *file)
return ret;
}
+static const struct file_operations dl_file_ops_v2 = {
+ .owner = THIS_MODULE,
+ .open = dl_proc_open_v2,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
static const struct file_operations dl_file_ops_v1 = {
.owner = THIS_MODULE,
.open = dl_proc_open_v1,
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 8107b3eb865f..0fd14d1eb09d 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
@@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
@@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
}
@@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 71cfa9551d08..36e14b1f061d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
sizeof(struct tcphdr), optsize, opts);
}
- rcu_read_lock();
list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
int foptsize, optnum;
@@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
info->loglevel == XT_OSF_LOGLEVEL_FIRST)
break;
}
- rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 3f6c4fa78bdb..245fa350a7a8 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock);
static DEFINE_MUTEX(recent_mutex);
#ifdef CONFIG_PROC_FS
-static const struct file_operations recent_old_fops, recent_mt_fops;
+static const struct file_operations recent_mt_fops;
#endif
static u_int32_t hash_rnd __read_mostly;