summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_fo.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_mh.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_twos.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c3
-rw-r--r--net/netfilter/nf_conncount.c211
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c5
-rw-r--r--net/netfilter/nf_flow_table_ip.c293
-rw-r--r--net/netfilter/nf_flow_table_offload.c2
-rw-r--r--net/netfilter/nf_flow_table_path.c330
-rw-r--r--net/netfilter/nf_tables_api.c34
-rw-r--r--net/netfilter/nft_connlimit.c54
-rw-r--r--net/netfilter/nft_flow_offload.c252
-rw-r--r--net/netfilter/nft_lookup.c13
-rw-r--r--net/netfilter/xt_connlimit.c14
43 files changed, 819 insertions, 492 deletions
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e43e20f529f8..6bfc250e474f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -141,6 +141,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
+ nf_flow_table_path.o \
nf_flow_table_offload.o nf_flow_table_xdp.o
nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
ifeq ($(CONFIG_NF_FLOW_TABLE),m)
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index fdacbc3c15be..d54d7da58334 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -13,8 +13,7 @@
* Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 37ebb0cb62b8..50cc492c7553 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -17,8 +17,7 @@
* Changes:
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/interrupt.h>
#include <linux/in.h>
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5ea7ab8bf4dc..90d56f92c0f6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -19,8 +19,7 @@
* Harald Welte don't use nfcache
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4c8fa22be88a..068702894377 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -13,8 +13,7 @@
* Changes:
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/init.h>
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 75f4c231f4a0..bb7aca4601ff 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -30,8 +30,7 @@
*
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/ip.h>
#include <linux/slab.h>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 93a925f1ed9b..77f4f637ff67 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -12,8 +12,7 @@
* get_stats()) do the per cpu summing.
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/kernel.h>
#include <linux/jiffies.h>
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
index ab117e5bc34e..d657b47c6511 100644
--- a/net/netfilter/ipvs/ip_vs_fo.c
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -8,8 +8,7 @@
* Kenny Mathis : added initial functionality based on weight
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 206c6700e200..b315c608fda4 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -16,8 +16,7 @@
* Author: Wouter Gadeyne
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/moduleparam.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 156181a3bacd..e6c8ed0c92f6 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -34,8 +34,7 @@
* me to write this module.
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/ip.h>
#include <linux/slab.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index a021e6aba3d7..a25cf7bb6185 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -32,8 +32,7 @@
*
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/ip.h>
#include <linux/module.h>
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index c2764505e380..38cc38c5d8bb 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -9,8 +9,7 @@
* Wensong Zhang : added any dest with weight=0 is quiesced
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
index e3d7f5c879ce..f61f54004c9e 100644
--- a/net/netfilter/ipvs/ip_vs_mh.c
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -17,8 +17,7 @@ https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
*
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/ip.h>
#include <linux/slab.h>
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 08adcb222986..81974f69e5bb 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -30,8 +30,7 @@
* PASV response can not be NAT-ed) but Active FTP should work
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index ed7f5c889b41..ada158c610ce 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -26,8 +26,7 @@
*
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index c7708b809700..c5c67df80a0b 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -12,8 +12,7 @@
* active connections
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 166c669f0763..3035079ebd99 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/spinlock.h>
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index e4ce1d9a63f9..85f31d71e29a 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index a9fd1d3fc2cb..fd9dbca24c85 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -8,8 +8,7 @@
* Changes:
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 89602c16f6b6..44e14acc187e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -6,8 +6,7 @@
* Wensong Zhang <wensong@linuxvirtualserver.org>
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/in.h>
#include <linux/ip.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 7da51390cea6..f68a1533ee45 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -13,8 +13,7 @@
* protocol ip_vs_proto_data and is handled by netns
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/kernel.h>
#include <linux/ip.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 68260d91c988..0f0107c80dd2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,8 +9,7 @@
* Network name space (netns) aware.
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/in.h>
#include <linux/ip.h>
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 6baa34dff9f0..4125ee561cdc 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -14,8 +14,7 @@
* Wensong Zhang : added any dest with weight=0 is quiesced
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index d4903723be7e..c6e421c4e299 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -12,8 +12,7 @@
* Changes:
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/spinlock.h>
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index a46f99a56618..245a323c84cd 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -30,8 +30,7 @@
*
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 92e77d7a6b50..0e85e07e23b9 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -32,8 +32,7 @@
*
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/ip.h>
#include <linux/slab.h>
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3402675bf521..54dd1514ac45 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -32,8 +32,7 @@
* Persistence support, fwmark and time-out.
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/slab.h>
@@ -1435,7 +1434,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
sin.sin_addr.s_addr = addr;
sin.sin_port = 0;
- return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+ return kernel_bind(sock, (struct sockaddr_unsized *)&sin, sizeof(sin));
}
static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
@@ -1501,7 +1500,7 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id,
}
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
- result = kernel_connect(sock, (struct sockaddr *)&mcast_addr,
+ result = kernel_connect(sock, (struct sockaddr_unsized *)&mcast_addr,
salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
@@ -1542,7 +1541,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id,
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
sock->sk->sk_bound_dev_if = dev->ifindex;
- result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen);
+ result = kernel_bind(sock, (struct sockaddr_unsized *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
goto error;
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
index 8d5419edde50..dbb7f5fd4688 100644
--- a/net/netfilter/ipvs/ip_vs_twos.c
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -4,8 +4,7 @@
* Authors: Darby Payne <darby.payne@applovin.com>
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 9fa500927c0a..9da445ca09a1 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -14,8 +14,7 @@
* Wensong Zhang : added any dest with weight=0 is quiesced
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 85ce0d04afac..99f09cbf2d9b 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -13,8 +13,7 @@
* with weight 0 when all weights are zero
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 95af252b2939..3162ce3c2640 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -21,8 +21,7 @@
* - the only place where we can see skb->sk != NULL
*/
-#define KMSG_COMPONENT "IPVS"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 913ede2f57f9..f1be4dd5cf85 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -122,15 +122,65 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
return ERR_PTR(-EAGAIN);
}
+static bool get_ct_or_tuple_from_skb(struct net *net,
+ const struct sk_buff *skb,
+ u16 l3num,
+ struct nf_conn **ct,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone **zone,
+ bool *refcounted)
+{
+ const struct nf_conntrack_tuple_hash *h;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *found_ct;
+
+ found_ct = nf_ct_get(skb, &ctinfo);
+ if (found_ct && !nf_ct_is_template(found_ct)) {
+ *tuple = found_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ *zone = nf_ct_zone(found_ct);
+ *ct = found_ct;
+ return true;
+ }
+
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, net, tuple))
+ return false;
+
+ if (found_ct)
+ *zone = nf_ct_zone(found_ct);
+
+ h = nf_conntrack_find_get(net, *zone, tuple);
+ if (!h)
+ return true;
+
+ found_ct = nf_ct_tuplehash_to_ctrack(h);
+ *refcounted = true;
+ *ct = found_ct;
+
+ return true;
+}
+
static int __nf_conncount_add(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+ const struct sk_buff *skb,
+ u16 l3num,
+ struct nf_conncount_list *list)
{
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct = NULL;
struct nf_conn *found_ct;
unsigned int collect = 0;
+ bool refcounted = false;
+
+ if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
+ return -ENOENT;
+
+ if (ct && nf_ct_is_confirmed(ct)) {
+ if (refcounted)
+ nf_ct_put(ct);
+ return -EEXIST;
+ }
if ((u32)jiffies == list->last_gc)
goto add_new_node;
@@ -144,10 +194,10 @@ static int __nf_conncount_add(struct net *net,
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
- if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
- return 0; /* already exists */
+ goto out_put; /* already exists */
} else {
collect++;
}
@@ -156,7 +206,7 @@ static int __nf_conncount_add(struct net *net,
found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
* We should not see tuples twice unless someone hooks
@@ -165,7 +215,7 @@ static int __nf_conncount_add(struct net *net,
* Attempt to avoid a re-add in this case.
*/
nf_ct_put(found_ct);
- return 0;
+ goto out_put;
} else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
@@ -188,31 +238,35 @@ add_new_node:
if (conn == NULL)
return -ENOMEM;
- conn->tuple = *tuple;
+ conn->tuple = tuple;
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
list_add_tail(&conn->node, &list->head);
list->count++;
list->last_gc = (u32)jiffies;
+
+out_put:
+ if (refcounted)
+ nf_ct_put(ct);
return 0;
}
-int nf_conncount_add(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+int nf_conncount_add_skb(struct net *net,
+ const struct sk_buff *skb,
+ u16 l3num,
+ struct nf_conncount_list *list)
{
int ret;
/* check the saved connections */
spin_lock_bh(&list->list_lock);
- ret = __nf_conncount_add(net, list, tuple, zone);
+ ret = __nf_conncount_add(net, skb, l3num, list);
spin_unlock_bh(&list->list_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(nf_conncount_add);
+EXPORT_SYMBOL_GPL(nf_conncount_add_skb);
void nf_conncount_list_init(struct nf_conncount_list *list)
{
@@ -224,8 +278,8 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
/* Return true if the list is empty. Must be called with BH disabled. */
-bool nf_conncount_gc_list(struct net *net,
- struct nf_conncount_list *list)
+static bool __nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
@@ -237,10 +291,6 @@ bool nf_conncount_gc_list(struct net *net,
if ((u32)jiffies == READ_ONCE(list->last_gc))
return false;
- /* don't bother if other cpu is already doing GC */
- if (!spin_trylock(&list->list_lock))
- return false;
-
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
@@ -269,7 +319,21 @@ bool nf_conncount_gc_list(struct net *net,
if (!list->count)
ret = true;
list->last_gc = (u32)jiffies;
- spin_unlock(&list->list_lock);
+
+ return ret;
+}
+
+bool nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list)
+{
+ bool ret;
+
+ /* don't bother if other cpu is already doing GC */
+ if (!spin_trylock_bh(&list->list_lock))
+ return false;
+
+ ret = __nf_conncount_gc_list(net, list);
+ spin_unlock_bh(&list->list_lock);
return ret;
}
@@ -309,19 +373,22 @@ static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
static unsigned int
insert_tree(struct net *net,
+ const struct sk_buff *skb,
+ u16 l3num,
struct nf_conncount_data *data,
struct rb_root *root,
unsigned int hash,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+ const u32 *key)
{
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+ bool do_gc = true, refcounted = false;
+ unsigned int count = 0, gc_count = 0;
struct rb_node **rbnode, *parent;
- struct nf_conncount_rb *rbconn;
+ struct nf_conntrack_tuple tuple;
struct nf_conncount_tuple *conn;
- unsigned int count = 0, gc_count = 0;
- bool do_gc = true;
+ struct nf_conncount_rb *rbconn;
+ struct nf_conn *ct = NULL;
spin_lock_bh(&nf_conncount_locks[hash]);
restart:
@@ -340,8 +407,8 @@ restart:
} else {
int ret;
- ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
- if (ret)
+ ret = nf_conncount_add_skb(net, skb, l3num, &rbconn->list);
+ if (ret && ret != -EEXIST)
count = 0; /* hotdrop */
else
count = rbconn->list.count;
@@ -364,30 +431,35 @@ restart:
goto restart;
}
- /* expected case: match, insert new node */
- rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
- if (rbconn == NULL)
- goto out_unlock;
+ if (get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) {
+ /* expected case: match, insert new node */
+ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+ goto out_unlock;
- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL) {
- kmem_cache_free(conncount_rb_cachep, rbconn);
- goto out_unlock;
- }
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL) {
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ goto out_unlock;
+ }
- conn->tuple = *tuple;
- conn->zone = *zone;
- conn->cpu = raw_smp_processor_id();
- conn->jiffies32 = (u32)jiffies;
- memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
+ conn->tuple = tuple;
+ conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
+ memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
- nf_conncount_list_init(&rbconn->list);
- list_add(&conn->node, &rbconn->list.head);
- count = 1;
- rbconn->list.count = count;
+ nf_conncount_list_init(&rbconn->list);
+ list_add(&conn->node, &rbconn->list.head);
+ count = 1;
+ rbconn->list.count = count;
- rb_link_node_rcu(&rbconn->node, parent, rbnode);
- rb_insert_color(&rbconn->node, root);
+ rb_link_node_rcu(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+
+ if (refcounted)
+ nf_ct_put(ct);
+ }
out_unlock:
spin_unlock_bh(&nf_conncount_locks[hash]);
return count;
@@ -395,10 +467,10 @@ out_unlock:
static unsigned int
count_tree(struct net *net,
+ const struct sk_buff *skb,
+ u16 l3num,
struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+ const u32 *key)
{
struct rb_root *root;
struct rb_node *parent;
@@ -422,7 +494,7 @@ count_tree(struct net *net,
} else {
int ret;
- if (!tuple) {
+ if (!skb) {
nf_conncount_gc_list(net, &rbconn->list);
return rbconn->list.count;
}
@@ -437,19 +509,23 @@ count_tree(struct net *net,
}
/* same source network -> be counted! */
- ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+ ret = __nf_conncount_add(net, skb, l3num, &rbconn->list);
spin_unlock_bh(&rbconn->list.list_lock);
- if (ret)
+ if (ret && ret != -EEXIST) {
return 0; /* hotdrop */
- else
+ } else {
+ /* -EEXIST means add was skipped, update the list */
+ if (ret == -EEXIST)
+ nf_conncount_gc_list(net, &rbconn->list);
return rbconn->list.count;
+ }
}
}
- if (!tuple)
+ if (!skb)
return 0;
- return insert_tree(net, data, root, hash, key, tuple, zone);
+ return insert_tree(net, skb, l3num, data, root, hash, key);
}
static void tree_gc_worker(struct work_struct *work)
@@ -511,18 +587,19 @@ next:
}
/* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
- * Call with RCU read lock.
+ * If 'skb' is not null, insert the corresponding tuple into the accounting
+ * data structure. Call with RCU read lock.
*/
-unsigned int nf_conncount_count(struct net *net,
- struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+unsigned int nf_conncount_count_skb(struct net *net,
+ const struct sk_buff *skb,
+ u16 l3num,
+ struct nf_conncount_data *data,
+ const u32 *key)
{
- return count_tree(net, data, key, tuple, zone);
+ return count_tree(net, skb, l3num, data, key);
+
}
-EXPORT_SYMBOL_GPL(nf_conncount_count);
+EXPORT_SYMBOL_GPL(nf_conncount_count_skb);
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
{
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 344f88295976..0b95f226f211 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1668,7 +1668,7 @@ __nf_conntrack_alloc(struct net *net,
/* We don't want any race condition at early drop stage */
ct_count = atomic_inc_return(&cnet->count);
- if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) {
+ if (unlikely(ct_count > nf_conntrack_max)) {
if (!early_drop(net, hash)) {
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 708b79380f04..207b240b14e5 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -648,7 +648,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_INT_MAX,
},
[NF_SYSCTL_CT_COUNT] = {
@@ -929,7 +929,7 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_INT_MAX,
},
};
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 9441ac3d8c1a..06e8251a6644 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -118,7 +118,10 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->in_vlan_ingress |= BIT(j);
j++;
}
+
+ flow_tuple->tun = route->tuple[dir].in.tun;
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+ flow_tuple->tun_num = route->tuple[dir].in.num_tuns;
switch (route->tuple[dir].xmit_type) {
case FLOW_OFFLOAD_XMIT_DIRECT:
@@ -127,11 +130,11 @@ static int flow_offload_fill_route(struct flow_offload *flow,
memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
- flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
+ flow_tuple->ifidx = route->tuple[dir].out.ifindex;
flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break;
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 8cd4cf7ae211..78883343e5d6 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -145,8 +145,11 @@ static bool ip_has_options(unsigned int thoff)
static void nf_flow_tuple_encap(struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
+ __be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
struct pppoe_hdr *phdr;
+ struct iphdr *iph;
+ u16 offset = 0;
int i = 0;
if (skb_vlan_tag_present(skb)) {
@@ -159,13 +162,26 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
tuple->encap[i].proto = skb->protocol;
+ inner_proto = veth->h_vlan_encapsulated_proto;
+ offset += VLAN_HLEN;
break;
case htons(ETH_P_PPP_SES):
phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
+ inner_proto = *((__be16 *)(phdr + 1));
+ offset += PPPOE_SES_HLEN;
break;
}
+
+ if (inner_proto == htons(ETH_P_IP)) {
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ if (iph->protocol == IPPROTO_IPIP) {
+ tuple->tun.dst_v4.s_addr = iph->daddr;
+ tuple->tun.src_v4.s_addr = iph->saddr;
+ tuple->tun.l3_proto = IPPROTO_IPIP;
+ }
+ }
}
struct nf_flowtable_ctx {
@@ -277,11 +293,46 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
+static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
+{
+ struct iphdr *iph;
+ u16 size;
+
+ if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
+ return false;
+
+ iph = (struct iphdr *)(skb_network_header(skb) + *psize);
+ size = iph->ihl << 2;
+
+ if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ if (iph->protocol == IPPROTO_IPIP)
+ *psize += size;
+
+ return true;
+}
+
+static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
+{
+ struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
+
+ if (iph->protocol != IPPROTO_IPIP)
+ return;
+
+ skb_pull(skb, iph->ihl << 2);
+ skb_reset_network_header(skb);
+}
+
static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
+ __be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
- __be16 inner_proto;
+ bool ret = false;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -291,19 +342,23 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
*offset += VLAN_HLEN;
- return true;
+ inner_proto = proto;
+ ret = true;
}
break;
case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
- return true;
+ ret = true;
}
break;
}
- return false;
+ if (inner_proto == htons(ETH_P_IP))
+ ret = nf_flow_ip4_tunnel_proto(skb, offset);
+
+ return ret;
}
static void nf_flow_encap_pop(struct sk_buff *skb,
@@ -331,21 +386,23 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
break;
}
}
+
+ if (skb->protocol == htons(ETH_P_IP))
+ nf_flow_ip4_tunnel_pop(skb);
}
+struct nf_flow_xmit {
+ const void *dest;
+ const void *source;
+ struct net_device *outdev;
+};
+
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
- const struct flow_offload_tuple_rhash *tuplehash,
- unsigned short type)
+ struct nf_flow_xmit *xmit)
{
- struct net_device *outdev;
-
- outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
- if (!outdev)
- return NF_DROP;
-
- skb->dev = outdev;
- dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
- tuplehash->tuple.out.h_source, skb->len);
+ skb->dev = xmit->outdev;
+ dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ xmit->dest, xmit->source, skb->len);
dev_queue_xmit(skb);
return NF_STOLEN;
@@ -357,8 +414,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
- if (skb->protocol != htons(ETH_P_IP) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
return NULL;
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -381,6 +437,9 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
+ if (flow->tuplehash[!dir].tuple.tun_num)
+ mtu -= sizeof(*iph);
+
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return 0;
@@ -414,20 +473,139 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
return 1;
}
+static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
+{
+ int data_len = skb->len + sizeof(__be16);
+ struct ppp_hdr {
+ struct pppoe_hdr hdr;
+ __be16 proto;
+ } *ph;
+ __be16 proto;
+
+ if (skb_cow_head(skb, PPPOE_SES_HLEN))
+ return -1;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ proto = htons(PPP_IP);
+ break;
+ case htons(ETH_P_IPV6):
+ proto = htons(PPP_IPV6);
+ break;
+ default:
+ return -1;
+ }
+
+ __skb_push(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+
+ ph = (struct ppp_hdr *)(skb->data);
+ ph->hdr.ver = 1;
+ ph->hdr.type = 1;
+ ph->hdr.code = 0;
+ ph->hdr.sid = htons(id);
+ ph->hdr.length = htons(data_len);
+ ph->proto = proto;
+ skb->protocol = htons(ETH_P_PPP_SES);
+
+ return 0;
+}
+
+static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple,
+ __be32 *ip_daddr)
+{
+ struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
+ struct rtable *rt = dst_rtable(tuple->dst_cache);
+ u8 tos = iph->tos, ttl = iph->ttl;
+ __be16 frag_off = iph->frag_off;
+ u32 headroom = sizeof(*iph);
+ int err;
+
+ err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4);
+ if (err)
+ return err;
+
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+ err = skb_cow_head(skb, headroom);
+ if (err)
+ return err;
+
+ skb_scrub_packet(skb, true);
+ skb_clear_hash_if_not_l4(skb);
+
+ /* Push down and install the IP header. */
+ skb_push(skb, sizeof(*iph));
+ skb_reset_network_header(skb);
+
+ iph = ip_hdr(skb);
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : frag_off;
+ iph->protocol = tuple->tun.l3_proto;
+ iph->tos = tos;
+ iph->daddr = tuple->tun.src_v4.s_addr;
+ iph->saddr = tuple->tun.dst_v4.s_addr;
+ iph->ttl = ttl;
+ iph->tot_len = htons(skb->len);
+ __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
+ ip_send_check(iph);
+
+ *ip_daddr = tuple->tun.src_v4.s_addr;
+
+ return 0;
+}
+
+static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple,
+ __be32 *ip_daddr)
+{
+ if (tuple->tun_num)
+ return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr);
+
+ return 0;
+}
+
+static int nf_flow_encap_push(struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
+{
+ int i;
+
+ for (i = 0; i < tuple->encap_num; i++) {
+ switch (tuple->encap[i].proto) {
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
+ if (skb_vlan_push(skb, tuple->encap[i].proto,
+ tuple->encap[i].id) < 0)
+ return -1;
+ break;
+ case htons(ETH_P_PPP_SES):
+ if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0)
+ return -1;
+ break;
+ }
+ }
+
+ return 0;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple *other_tuple;
enum flow_offload_tuple_dir dir;
struct nf_flowtable_ctx ctx = {
.in = state->in,
};
+ struct nf_flow_xmit xmit = {};
struct flow_offload *flow;
- struct net_device *outdev;
+ struct neighbour *neigh;
struct rtable *rt;
- __be32 nexthop;
+ __be32 ip_daddr;
int ret;
tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
@@ -450,29 +628,46 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ ip_daddr = other_tuple->src_v4.s_addr;
+
+ if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0)
+ return NF_DROP;
+
+ if (nf_flow_encap_push(skb, other_tuple) < 0)
+ return NF_DROP;
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rtable(tuplehash->tuple.dst_cache);
- outdev = rt->dst.dev;
- skb->dev = outdev;
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
+ if (!xmit.outdev) {
+ flow_offload_teardown(flow);
+ return NF_DROP;
+ }
+ neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, ip_daddr));
+ if (IS_ERR(neigh)) {
+ flow_offload_teardown(flow);
+ return NF_DROP;
+ }
+ xmit.dest = neigh->ha;
skb_dst_set_noref(skb, &rt->dst);
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
- ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
- if (ret == NF_DROP)
+ xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
+ if (!xmit.outdev) {
flow_offload_teardown(flow);
+ return NF_DROP;
+ }
+ xmit.dest = tuplehash->tuple.out.h_dest;
+ xmit.source = tuplehash->tuple.out.h_source;
break;
default:
WARN_ON_ONCE(1);
- ret = NF_DROP;
- break;
+ return NF_DROP;
}
- return ret;
+ return nf_flow_queue_xmit(state->net, skb, &xmit);
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
@@ -715,13 +910,15 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple *other_tuple;
enum flow_offload_tuple_dir dir;
struct nf_flowtable_ctx ctx = {
.in = state->in,
};
- const struct in6_addr *nexthop;
+ struct nf_flow_xmit xmit = {};
+ struct in6_addr *ip6_daddr;
struct flow_offload *flow;
- struct net_device *outdev;
+ struct neighbour *neigh;
struct rt6_info *rt;
int ret;
@@ -745,28 +942,42 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ other_tuple = &flow->tuplehash[!dir].tuple;
+ ip6_daddr = &other_tuple->src_v6;
+
+ if (nf_flow_encap_push(skb, other_tuple) < 0)
+ return NF_DROP;
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rt6_info(tuplehash->tuple.dst_cache);
- outdev = rt->dst.dev;
- skb->dev = outdev;
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
+ if (!xmit.outdev) {
+ flow_offload_teardown(flow);
+ return NF_DROP;
+ }
+ neigh = ip_neigh_gw6(rt->dst.dev, rt6_nexthop(rt, ip6_daddr));
+ if (IS_ERR(neigh)) {
+ flow_offload_teardown(flow);
+ return NF_DROP;
+ }
+ xmit.dest = neigh->ha;
skb_dst_set_noref(skb, &rt->dst);
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
- ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
- if (ret == NF_DROP)
+ xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
+ if (!xmit.outdev) {
flow_offload_teardown(flow);
+ return NF_DROP;
+ }
+ xmit.dest = tuplehash->tuple.out.h_dest;
+ xmit.source = tuplehash->tuple.out.h_source;
break;
default:
WARN_ON_ONCE(1);
- ret = NF_DROP;
- break;
+ return NF_DROP;
}
- return ret;
+ return nf_flow_queue_xmit(state->net, skb, &xmit);
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index e06bc36f49fe..d8f7bfd60ac6 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -555,7 +555,7 @@ static void flow_offload_redirect(struct net *net,
switch (this_tuple->xmit_type) {
case FLOW_OFFLOAD_XMIT_DIRECT:
this_tuple = &flow->tuplehash[dir].tuple;
- ifindex = this_tuple->out.hw_ifidx;
+ ifindex = this_tuple->out.ifidx;
break;
case FLOW_OFFLOAD_XMIT_NEIGH:
other_tuple = &flow->tuplehash[!dir].tuple;
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
new file mode 100644
index 000000000000..f0984cf69a09
--- /dev/null
+++ b/net/netfilter/nf_flow_table_path.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/ip.h>
+#include <net/inet_dscp.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_flow_table.h>
+
+static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
+{
+ if (dst_xfrm(dst))
+ return FLOW_OFFLOAD_XMIT_XFRM;
+
+ return FLOW_OFFLOAD_XMIT_NEIGH;
+}
+
+static void nft_default_forward_path(struct nf_flow_route *route,
+ struct dst_entry *dst_cache,
+ enum ip_conntrack_dir dir)
+{
+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
+ route->tuple[dir].dst = dst_cache;
+ route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
+}
+
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+ return false;
+
+ return true;
+}
+
+static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+ const struct dst_entry *dst_cache,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir, u8 *ha,
+ struct net_device_path_stack *stack)
+{
+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
+ struct net_device *dev = dst_cache->dev;
+ struct neighbour *n;
+ u8 nud_state;
+
+ if (!nft_is_valid_ether_device(dev))
+ goto out;
+
+ n = dst_neigh_lookup(dst_cache, daddr);
+ if (!n)
+ return -1;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(ha, n->ha);
+ read_unlock_bh(&n->lock);
+ neigh_release(n);
+
+ if (!(nud_state & NUD_VALID))
+ return -1;
+
+out:
+ return dev_fill_forward_path(dev, ha, stack);
+}
+
+struct nft_forward_info {
+ const struct net_device *indev;
+ const struct net_device *outdev;
+ struct id {
+ __u16 id;
+ __be16 proto;
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
+ u8 num_encaps;
+ struct flow_offload_tunnel tun;
+ u8 num_tuns;
+ u8 ingress_vlans;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ enum flow_offload_xmit_type xmit_type;
+};
+
+static void nft_dev_path_info(const struct net_device_path_stack *stack,
+ struct nft_forward_info *info,
+ unsigned char *ha, struct nf_flowtable *flowtable)
+{
+ const struct net_device_path *path;
+ int i;
+
+ memcpy(info->h_dest, ha, ETH_ALEN);
+
+ for (i = 0; i < stack->num_paths; i++) {
+ path = &stack->path[i];
+ switch (path->type) {
+ case DEV_PATH_ETHERNET:
+ case DEV_PATH_DSA:
+ case DEV_PATH_VLAN:
+ case DEV_PATH_PPPOE:
+ case DEV_PATH_TUN:
+ info->indev = path->dev;
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ if (path->type == DEV_PATH_ETHERNET)
+ break;
+ if (path->type == DEV_PATH_DSA) {
+ i = stack->num_paths;
+ break;
+ }
+
+ /* DEV_PATH_VLAN, DEV_PATH_PPPOE and DEV_PATH_TUN */
+ if (path->type == DEV_PATH_TUN) {
+ if (info->num_tuns) {
+ info->indev = NULL;
+ break;
+ }
+ info->tun.src_v6 = path->tun.src_v6;
+ info->tun.dst_v6 = path->tun.dst_v6;
+ info->tun.l3_proto = path->tun.l3_proto;
+ info->num_tuns++;
+ } else {
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+ info->indev = NULL;
+ break;
+ }
+ info->encap[info->num_encaps].id =
+ path->encap.id;
+ info->encap[info->num_encaps].proto =
+ path->encap.proto;
+ info->num_encaps++;
+ }
+ if (path->type == DEV_PATH_PPPOE)
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+ break;
+ case DEV_PATH_BRIDGE:
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ switch (path->bridge.vlan_mode) {
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
+ info->ingress_vlans |= BIT(info->num_encaps - 1);
+ break;
+ case DEV_PATH_BR_VLAN_TAG:
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+ info->indev = NULL;
+ break;
+ }
+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
+ info->num_encaps++;
+ break;
+ case DEV_PATH_BR_VLAN_UNTAG:
+ if (WARN_ON_ONCE(info->num_encaps-- == 0)) {
+ info->indev = NULL;
+ break;
+ }
+ break;
+ case DEV_PATH_BR_VLAN_KEEP:
+ break;
+ }
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+ break;
+ default:
+ info->indev = NULL;
+ break;
+ }
+ }
+ info->outdev = info->indev;
+
+ if (nf_flowtable_hw_offload(flowtable) &&
+ nft_is_valid_ether_device(info->indev))
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+}
+
+static bool nft_flowtable_find_dev(const struct net_device *dev,
+ struct nft_flowtable *ft)
+{
+ struct nft_hook *hook;
+ bool found = false;
+
+ list_for_each_entry_rcu(hook, &ft->hook_list, list) {
+ if (!nft_hook_find_ops_rcu(hook, dev))
+ continue;
+
+ found = true;
+ break;
+ }
+
+ return found;
+}
+
+static int nft_flow_tunnel_update_route(const struct nft_pktinfo *pkt,
+ struct flow_offload_tunnel *tun,
+ struct nf_flow_route *route,
+ enum ip_conntrack_dir dir)
+{
+ struct dst_entry *cur_dst = route->tuple[dir].dst;
+ struct dst_entry *tun_dst = NULL;
+ struct flowi fl = {};
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = tun->dst_v4.s_addr;
+ fl.u.ip4.saddr = tun->src_v4.s_addr;
+ fl.u.ip4.flowi4_iif = nft_in(pkt)->ifindex;
+ fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
+ fl.u.ip4.flowi4_mark = pkt->skb->mark;
+ fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = tun->dst_v6;
+ fl.u.ip6.saddr = tun->src_v6;
+ fl.u.ip6.flowi6_iif = nft_in(pkt)->ifindex;
+ fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
+ fl.u.ip6.flowi6_mark = pkt->skb->mark;
+ fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
+ break;
+ }
+
+ nf_route(nft_net(pkt), &tun_dst, &fl, false, nft_pf(pkt));
+ if (!tun_dst)
+ return -ENOENT;
+
+ route->tuple[dir].dst = tun_dst;
+ dst_release(cur_dst);
+
+ return 0;
+}
+
+static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
+ struct nf_flow_route *route,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ struct nft_flowtable *ft)
+{
+ const struct dst_entry *dst = route->tuple[dir].dst;
+ struct net_device_path_stack stack;
+ struct nft_forward_info info = {};
+ unsigned char ha[ETH_ALEN];
+ int i;
+
+ if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+ nft_dev_path_info(&stack, &info, ha, &ft->data);
+
+ if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
+ return;
+
+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
+ for (i = 0; i < info.num_encaps; i++) {
+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
+ }
+
+ if (info.num_tuns &&
+ !nft_flow_tunnel_update_route(pkt, &info.tun, route, dir)) {
+ route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
+ route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
+ route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
+ route->tuple[!dir].in.num_tuns = info.num_tuns;
+ }
+
+ route->tuple[!dir].in.num_encaps = info.num_encaps;
+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
+
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+ route->tuple[dir].xmit_type = info.xmit_type;
+ }
+}
+
+int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
+ struct nft_flowtable *ft)
+{
+ struct dst_entry *this_dst = skb_dst(pkt->skb);
+ struct dst_entry *other_dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+ fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
+ fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
+ fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
+ fl.u.ip4.flowi4_mark = pkt->skb->mark;
+ fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
+ fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
+ fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
+ fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
+ fl.u.ip6.flowi6_mark = pkt->skb->mark;
+ fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
+ break;
+ }
+
+ if (!dst_hold_safe(this_dst))
+ return -ENOENT;
+
+ nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
+ if (!other_dst) {
+ dst_release(this_dst);
+ return -ENOENT;
+ }
+
+ nft_default_forward_path(route, this_dst, dir);
+ nft_default_forward_path(route, other_dst, !dir);
+
+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
+ nft_dev_forward_path(pkt, route, ct, dir, ft);
+ if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
+ nft_dev_forward_path(pkt, route, ct, !dir, ft);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_flow_route);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index eed434e0a970..f3de2f9bbebf 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5770,7 +5770,11 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
struct nft_set_binding *i;
- struct nft_set_iter iter;
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
+ .fn = nf_tables_bind_check_setelem,
+ };
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -5785,13 +5789,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
goto bind;
}
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_bind_check_setelem;
-
set->ops->walk(ctx, set, &iter);
if (!iter.err)
iter.err = nft_set_catchall_bind_check(ctx, set);
@@ -6195,7 +6192,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
struct nftables_pernet *nft_net;
struct nft_table *table;
struct nft_set *set;
- struct nft_set_dump_args args;
+ struct nft_set_dump_args args = {
+ .cb = cb,
+ .skb = skb,
+ .reset = dump_ctx->reset,
+ .iter = {
+ .genmask = nft_genmask_cur(net),
+ .type = NFT_ITER_READ,
+ .skip = cb->args[0],
+ .fn = nf_tables_dump_setelem,
+ },
+ };
bool set_found = false;
struct nlmsghdr *nlh;
struct nlattr *nest;
@@ -6246,15 +6253,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto nla_put_failure;
- args.cb = cb;
- args.skb = skb;
- args.reset = dump_ctx->reset;
- args.iter.genmask = nft_genmask_cur(net);
- args.iter.type = NFT_ITER_READ;
- args.iter.skip = cb->args[0];
- args.iter.count = 0;
- args.iter.err = 0;
- args.iter.fn = nf_tables_dump_setelem;
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
if (!args.iter.err && args.iter.count == cb->args[0])
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index fc35a11cdca2..657764774a2d 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -24,33 +24,27 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
const struct nft_pktinfo *pkt,
const struct nft_set_ext *ext)
{
- const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
- const struct nf_conntrack_tuple *tuple_ptr;
- struct nf_conntrack_tuple tuple;
- enum ip_conntrack_info ctinfo;
- const struct nf_conn *ct;
unsigned int count;
+ int err;
- tuple_ptr = &tuple;
-
- ct = nf_ct_get(pkt->skb, &ctinfo);
- if (ct != NULL) {
- tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- zone = nf_ct_zone(ct);
- } else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
- nft_pf(pkt), nft_net(pkt), &tuple)) {
- regs->verdict.code = NF_DROP;
- return;
- }
-
- if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
- regs->verdict.code = NF_DROP;
- return;
+ err = nf_conncount_add_skb(nft_net(pkt), pkt->skb, nft_pf(pkt), priv->list);
+ if (err) {
+ if (err == -EEXIST) {
+ /* Call gc to update the list count if any connection has
+ * been closed already. This is useful for softlimit
+ * connections like limiting bandwidth based on a number
+ * of open connections.
+ */
+ nf_conncount_gc_list(nft_net(pkt), priv->list);
+ } else {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
}
count = READ_ONCE(priv->list->count);
- if ((count > priv->limit) ^ priv->invert) {
+ if ((count > READ_ONCE(priv->limit)) ^ READ_ONCE(priv->invert)) {
regs->verdict.code = NFT_BREAK;
return;
}
@@ -137,6 +131,16 @@ static int nft_connlimit_obj_init(const struct nft_ctx *ctx,
return nft_connlimit_do_init(ctx, tb, priv);
}
+static void nft_connlimit_obj_update(struct nft_object *obj,
+ struct nft_object *newobj)
+{
+ struct nft_connlimit *newpriv = nft_obj_data(newobj);
+ struct nft_connlimit *priv = nft_obj_data(obj);
+
+ WRITE_ONCE(priv->limit, newpriv->limit);
+ WRITE_ONCE(priv->invert, newpriv->invert);
+}
+
static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{
@@ -166,6 +170,7 @@ static const struct nft_object_ops nft_connlimit_obj_ops = {
.init = nft_connlimit_obj_init,
.destroy = nft_connlimit_obj_destroy,
.dump = nft_connlimit_obj_dump,
+ .update = nft_connlimit_obj_update,
};
static struct nft_object_type nft_connlimit_obj_type __read_mostly = {
@@ -238,13 +243,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
- bool ret;
-
- local_bh_disable();
- ret = nf_conncount_gc_list(net, priv->list);
- local_bh_enable();
- return ret;
+ return nf_conncount_gc_list(net, priv->list);
}
static struct nft_expr_type nft_connlimit_type;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 14dd1c0698c3..b8f76c9057fd 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -20,258 +20,6 @@ struct nft_flow_offload {
struct nft_flowtable *flowtable;
};
-static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
-{
- if (dst_xfrm(dst))
- return FLOW_OFFLOAD_XMIT_XFRM;
-
- return FLOW_OFFLOAD_XMIT_NEIGH;
-}
-
-static void nft_default_forward_path(struct nf_flow_route *route,
- struct dst_entry *dst_cache,
- enum ip_conntrack_dir dir)
-{
- route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
- route->tuple[dir].dst = dst_cache;
- route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
-}
-
-static bool nft_is_valid_ether_device(const struct net_device *dev)
-{
- if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
- dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
- return false;
-
- return true;
-}
-
-static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
- const struct dst_entry *dst_cache,
- const struct nf_conn *ct,
- enum ip_conntrack_dir dir, u8 *ha,
- struct net_device_path_stack *stack)
-{
- const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
- struct net_device *dev = dst_cache->dev;
- struct neighbour *n;
- u8 nud_state;
-
- if (!nft_is_valid_ether_device(dev))
- goto out;
-
- n = dst_neigh_lookup(dst_cache, daddr);
- if (!n)
- return -1;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(ha, n->ha);
- read_unlock_bh(&n->lock);
- neigh_release(n);
-
- if (!(nud_state & NUD_VALID))
- return -1;
-
-out:
- return dev_fill_forward_path(dev, ha, stack);
-}
-
-struct nft_forward_info {
- const struct net_device *indev;
- const struct net_device *outdev;
- const struct net_device *hw_outdev;
- struct id {
- __u16 id;
- __be16 proto;
- } encap[NF_FLOW_TABLE_ENCAP_MAX];
- u8 num_encaps;
- u8 ingress_vlans;
- u8 h_source[ETH_ALEN];
- u8 h_dest[ETH_ALEN];
- enum flow_offload_xmit_type xmit_type;
-};
-
-static void nft_dev_path_info(const struct net_device_path_stack *stack,
- struct nft_forward_info *info,
- unsigned char *ha, struct nf_flowtable *flowtable)
-{
- const struct net_device_path *path;
- int i;
-
- memcpy(info->h_dest, ha, ETH_ALEN);
-
- for (i = 0; i < stack->num_paths; i++) {
- path = &stack->path[i];
- switch (path->type) {
- case DEV_PATH_ETHERNET:
- case DEV_PATH_DSA:
- case DEV_PATH_VLAN:
- case DEV_PATH_PPPOE:
- info->indev = path->dev;
- if (is_zero_ether_addr(info->h_source))
- memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
-
- if (path->type == DEV_PATH_ETHERNET)
- break;
- if (path->type == DEV_PATH_DSA) {
- i = stack->num_paths;
- break;
- }
-
- /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
- if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
- info->indev = NULL;
- break;
- }
- if (!info->outdev)
- info->outdev = path->dev;
- info->encap[info->num_encaps].id = path->encap.id;
- info->encap[info->num_encaps].proto = path->encap.proto;
- info->num_encaps++;
- if (path->type == DEV_PATH_PPPOE)
- memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
- break;
- case DEV_PATH_BRIDGE:
- if (is_zero_ether_addr(info->h_source))
- memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
-
- switch (path->bridge.vlan_mode) {
- case DEV_PATH_BR_VLAN_UNTAG_HW:
- info->ingress_vlans |= BIT(info->num_encaps - 1);
- break;
- case DEV_PATH_BR_VLAN_TAG:
- info->encap[info->num_encaps].id = path->bridge.vlan_id;
- info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
- info->num_encaps++;
- break;
- case DEV_PATH_BR_VLAN_UNTAG:
- info->num_encaps--;
- break;
- case DEV_PATH_BR_VLAN_KEEP:
- break;
- }
- info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
- break;
- default:
- info->indev = NULL;
- break;
- }
- }
- if (!info->outdev)
- info->outdev = info->indev;
-
- info->hw_outdev = info->indev;
-
- if (nf_flowtable_hw_offload(flowtable) &&
- nft_is_valid_ether_device(info->indev))
- info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
-}
-
-static bool nft_flowtable_find_dev(const struct net_device *dev,
- struct nft_flowtable *ft)
-{
- struct nft_hook *hook;
- bool found = false;
-
- list_for_each_entry_rcu(hook, &ft->hook_list, list) {
- if (!nft_hook_find_ops_rcu(hook, dev))
- continue;
-
- found = true;
- break;
- }
-
- return found;
-}
-
-static void nft_dev_forward_path(struct nf_flow_route *route,
- const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- struct nft_flowtable *ft)
-{
- const struct dst_entry *dst = route->tuple[dir].dst;
- struct net_device_path_stack stack;
- struct nft_forward_info info = {};
- unsigned char ha[ETH_ALEN];
- int i;
-
- if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
- nft_dev_path_info(&stack, &info, ha, &ft->data);
-
- if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
- return;
-
- route->tuple[!dir].in.ifindex = info.indev->ifindex;
- for (i = 0; i < info.num_encaps; i++) {
- route->tuple[!dir].in.encap[i].id = info.encap[i].id;
- route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
- }
- route->tuple[!dir].in.num_encaps = info.num_encaps;
- route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
-
- if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
- memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
- memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
- route->tuple[dir].out.ifindex = info.outdev->ifindex;
- route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
- route->tuple[dir].xmit_type = info.xmit_type;
- }
-}
-
-static int nft_flow_route(const struct nft_pktinfo *pkt,
- const struct nf_conn *ct,
- struct nf_flow_route *route,
- enum ip_conntrack_dir dir,
- struct nft_flowtable *ft)
-{
- struct dst_entry *this_dst = skb_dst(pkt->skb);
- struct dst_entry *other_dst = NULL;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- switch (nft_pf(pkt)) {
- case NFPROTO_IPV4:
- fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
- fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
- fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
- fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
- fl.u.ip4.flowi4_mark = pkt->skb->mark;
- fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
- break;
- case NFPROTO_IPV6:
- fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
- fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
- fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
- fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
- fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
- fl.u.ip6.flowi6_mark = pkt->skb->mark;
- fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
- break;
- }
-
- if (!dst_hold_safe(this_dst))
- return -ENOENT;
-
- nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
- if (!other_dst) {
- dst_release(this_dst);
- return -ENOENT;
- }
-
- nft_default_forward_path(route, this_dst, dir);
- nft_default_forward_path(route, other_dst, !dir);
-
- if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
- route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
- nft_dev_forward_path(route, ct, dir, ft);
- nft_dev_forward_path(route, ct, !dir, ft);
- }
-
- return 0;
-}
-
static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
{
if (skb_sec_path(skb))
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 58c5b14889c4..fc2d7c5d83c8 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -246,19 +246,16 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
- struct nft_set_iter iter;
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
+ .fn = nft_setelem_validate,
+ };
if (!(priv->set->flags & NFT_SET_MAP) ||
priv->set->dtype != NFT_DATA_VERDICT)
return 0;
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nft_setelem_validate;
-
priv->set->ops->walk(ctx, priv->set, &iter);
if (!iter.err)
iter.err = nft_set_catchall_validate(ctx, priv->set);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 0189f8b6b0bd..848287ab79cf 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -31,8 +31,6 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = xt_net(par);
const struct xt_connlimit_info *info = par->matchinfo;
- struct nf_conntrack_tuple tuple;
- const struct nf_conntrack_tuple *tuple_ptr = &tuple;
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
@@ -40,13 +38,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
u32 key[5];
ct = nf_ct_get(skb, &ctinfo);
- if (ct != NULL) {
- tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ if (ct)
zone = nf_ct_zone(ct);
- } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- xt_family(par), net, &tuple)) {
- goto hotdrop;
- }
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -69,10 +62,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
key[1] = zone->id;
}
- connections = nf_conncount_count(net, info->data, key, tuple_ptr,
- zone);
+ connections = nf_conncount_count_skb(net, skb, xt_family(par), info->data, key);
if (connections == 0)
- /* kmalloc failed, drop it entirely */
+ /* kmalloc failed or tuple couldn't be found, drop it entirely */
goto hotdrop;
return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);