diff options
-rw-r--r-- | include/net/netfilter/nft_fib.h | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_fib_ipv4.c | 23 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_fib_ipv6.c | 16 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_flow_table_ip.c | 3 | ||||
-rw-r--r-- | net/netfilter/nf_nat_helper.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_queue.c | 1 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 20 | ||||
-rw-r--r-- | net/netfilter/nft_fib.c | 6 | ||||
-rw-r--r-- | net/netfilter/nft_flow_offload.c | 31 | ||||
-rw-r--r-- | tools/testing/selftests/netfilter/Makefile | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nft_flowtable.sh | 324 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nft_nat.sh | 6 |
13 files changed, 375 insertions, 63 deletions
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index a88f92737308..e4c4d8eaca8c 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -34,5 +34,5 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_fib_store_result(void *reg, const struct nft_fib *priv, - const struct nft_pktinfo *pkt, int index); + const struct net_device *dev); #endif diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 94eb25bc8d7e..c8888e52591f 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -58,11 +58,6 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); -static int get_ifindex(const struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -94,8 +89,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, pkt, - nft_in(pkt)->ifindex); + nft_fib_store_result(dest, priv, nft_in(pkt)); return; } @@ -108,8 +102,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { - nft_fib_store_result(dest, priv, pkt, - get_ifindex(pkt->skb->dev)); + nft_fib_store_result(dest, priv, pkt->skb->dev); return; } } @@ -150,17 +143,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, found = oif; } - switch (priv->result) { - case NFT_FIB_RESULT_OIF: - *dest = found->ifindex; - break; - case NFT_FIB_RESULT_OIFNAME: - strncpy((char *)dest, found->name, IFNAMSIZ); - break; - default: - WARN_ON_ONCE(1); - break; - } + nft_fib_store_result(dest, priv, found); } EXPORT_SYMBOL_GPL(nft_fib4_eval); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 73cdc0bc63f7..ec068b0cffca 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -169,8 +169,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, pkt, - nft_in(pkt)->ifindex); + nft_fib_store_result(dest, priv, nft_in(pkt)); return; } @@ -187,18 +186,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (oif && oif != rt->rt6i_idev->dev) goto put_rt_err; - switch (priv->result) { - case NFT_FIB_RESULT_OIF: - *dest = rt->rt6i_idev->dev->ifindex; - break; - case NFT_FIB_RESULT_OIFNAME: - strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); - break; - default: - WARN_ON_ONCE(1); - break; - } - + nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); put_rt_err: ip6_rt_put(rt); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 14457551bcb4..8ebf21149ec3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2312,7 +2312,6 @@ static void __net_exit __ip_vs_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); @@ -2327,6 +2326,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); EnterFunction(2); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 96825e20368f..241317473114 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -244,8 +244,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; outdev = rt->dst.dev; - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && - (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) + if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) return NF_ACCEPT; if (skb_try_make_writable(skb, sizeof(*iph))) diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index ccc06f7539d7..53aeb12b70fb 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -170,7 +170,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return true; - nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, + nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP, udph, &udph->check, datalen, oldlen); return true; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 9dc1d6e04946..b5b2be55ca82 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -255,6 +255,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, repeat: verdict = nf_hook_entry_hookfn(hook, skb, state); if (verdict != NF_ACCEPT) { + *index = i; if (verdict != NF_REPEAT) return verdict; goto repeat; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 28241e82fd15..4b5159936034 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2270,13 +2270,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct nft_rule *rule) + const struct nft_rule *rule, + const struct nft_rule *prule) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; - const struct nft_rule *prule; u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); @@ -2296,8 +2296,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, NFTA_RULE_PAD)) goto nla_put_failure; - if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { - prule = list_prev_entry(rule, list); + if (event != NFT_MSG_DELRULE && prule) { if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(prule->handle), NFTA_RULE_PAD)) @@ -2344,7 +2343,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, 0, ctx->family, ctx->table, - ctx->chain, rule); + ctx->chain, rule, NULL); if (err < 0) { kfree_skb(skb); goto err; @@ -2369,12 +2368,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, const struct nft_chain *chain) { struct net *net = sock_net(skb->sk); + const struct nft_rule *rule, *prule; unsigned int s_idx = cb->args[0]; - const struct nft_rule *rule; + prule = NULL; list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) - goto cont; + goto cont_skip; if (*idx < s_idx) goto cont; if (*idx > s_idx) { @@ -2386,11 +2386,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule) < 0) + table, chain, rule, prule) < 0) return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: + prule = rule; +cont_skip: (*idx)++; } return 0; @@ -2546,7 +2548,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, - family, table, chain, rule); + family, table, chain, rule, NULL); if (err < 0) goto err; diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 21df8cccea65..77f00a99dfab 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -135,17 +135,17 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) EXPORT_SYMBOL_GPL(nft_fib_dump); void nft_fib_store_result(void *reg, const struct nft_fib *priv, - const struct nft_pktinfo *pkt, int index) + const struct net_device *dev) { - struct net_device *dev; u32 *dreg = reg; + int index; switch (priv->result) { case NFT_FIB_RESULT_OIF: + index = dev ? dev->ifindex : 0; *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; break; case NFT_FIB_RESULT_OIFNAME: - dev = dev_get_by_index_rcu(nft_net(pkt), index); if (priv->flags & NFTA_FIB_F_PRESENT) *dreg = !!dev; else diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ffb25d5e8dbe..aa5f571d4361 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -13,7 +13,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <linux/netfilter/nf_conntrack_common.h> #include <net/netfilter/nf_flow_table.h> -#include <net/netfilter/nf_conntrack_helper.h> struct nft_flow_offload { struct nft_flowtable *flowtable; @@ -50,15 +49,20 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, return 0; } -static bool nft_flow_offload_skip(struct sk_buff *skb) +static bool nft_flow_offload_skip(struct sk_buff *skb, int family) { - struct ip_options *opt = &(IPCB(skb)->opt); - - if (unlikely(opt->optlen)) - return true; if (skb_sec_path(skb)) return true; + if (family == NFPROTO_IPV4) { + const struct ip_options *opt; + + opt = &(IPCB(skb)->opt); + + if (unlikely(opt->optlen)) + return true; + } + return false; } @@ -68,15 +72,15 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; - const struct nf_conn_help *help; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; enum ip_conntrack_dir dir; + bool is_tcp = false; struct nf_conn *ct; int ret; - if (nft_flow_offload_skip(pkt->skb)) + if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) goto out; ct = nf_ct_get(pkt->skb, &ctinfo); @@ -85,14 +89,16 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { case IPPROTO_TCP: + is_tcp = true; + break; case IPPROTO_UDP: break; default: goto out; } - help = nfct_help(ct); - if (help) + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || + ct->status & IPS_SEQ_ADJUST) goto out; if (!nf_ct_is_confirmed(ct)) @@ -109,6 +115,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; + if (is_tcp) { + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + } + ret = flow_offload_add(flowtable, flow); if (ret < 0) goto err_flow_add; diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3e6d1bcc2894..4144984ebee5 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,6 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh + conntrack_icmp_related.sh nft_flowtable.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh new file mode 100755 index 000000000000..fe52488a6f72 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -0,0 +1,324 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This tests basic flowtable functionality. +# Creates following topology: +# +# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) +# Router1 is the one doing flow offloading, Router2 has no special +# purpose other than having a link that is smaller than either Originator +# and responder, i.e. TCPMSS announced values are too large and will still +# result in fragmentation and/or PMTU discovery. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +ns1in="" +ns2in="" +ns1out="" +ns2out="" + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +which nc > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nc (netcat)" + exit $ksft_skip +fi + +ip netns add nsr1 +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +ip netns add ns1 +ip netns add ns2 + +ip netns add nsr2 + +cleanup() { + for i in 1 2; do + ip netns del ns$i + ip netns del nsr$i + done + + rm -f "$ns1in" "$ns1out" + rm -f "$ns2in" "$ns2out" + + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +trap cleanup EXIT + +sysctl -q net.netfilter.nf_log_all_netns=1 + +ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 +ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 + +ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 + +for dev in lo veth0 veth1; do + for i in 1 2; do + ip -net nsr$i link set $dev up + done +done + +ip -net nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net nsr1 addr add dead:1::1/64 dev veth0 + +ip -net nsr2 addr add 10.0.2.1/24 dev veth1 +ip -net nsr2 addr add dead:2::1/64 dev veth1 + +# set different MTUs so we need to push packets coming from ns1 (large MTU) +# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), +# or to do PTMU discovery (send ICMP error back to originator). +# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers +# is NOT the lowest link mtu. + +ip -net nsr1 link set veth0 mtu 9000 +ip -net ns1 link set eth0 mtu 9000 + +ip -net nsr2 link set veth1 mtu 2000 +ip -net ns2 link set eth0 mtu 2000 + +# transfer-net between nsr1 and nsr2. +# these addresses are not used for connections. +ip -net nsr1 addr add 192.168.10.1/24 dev veth1 +ip -net nsr1 addr add fee1:2::1/64 dev veth1 + +ip -net nsr2 addr add 192.168.10.2/24 dev veth0 +ip -net nsr2 addr add fee1:2::2/64 dev veth0 + +for i in 1 2; do + ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 + ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null + + # don't set ip DF bit for first two tests + ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null +done + +ip -net nsr1 route add default via 192.168.10.2 +ip -net nsr2 route add default via 192.168.10.1 + +ip netns exec nsr1 nft -f - <<EOF +table inet filter { + flowtable f1 { + hook ingress priority 0 + devices = { veth0, veth1 } + } + + chain forward { + type filter hook forward priority 0; policy drop; + + # flow offloaded? Tag ct with mark 1, so we can detect when it fails. + meta oif "veth1" tcp dport 12345 flow offload @f1 counter + + # use packet size to trigger 'should be offloaded by now'. + # otherwise, if 'flow offload' expression never offloads, the + # test will pass. + tcp dport 12345 meta length gt 200 ct mark set 1 counter + + # this turns off flow offloading internally, so expect packets again + tcp flags fin,rst ct mark set 0 accept + + # this allows large packets from responder, we need this as long + # as PMTUd is off. + # This rule is deleted for the last test, when we expect PMTUd + # to kick in and ensure all packets meet mtu requirements. + meta length gt 1500 accept comment something-to-grep-for + + # next line blocks connection w.o. working offload. + # we only do this for reverse dir, because we expect packets to + # enter slow path due to MTU mismatch of veth0 and veth1. + tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop + + ct state established,related accept + + # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) + meta length lt 200 oif "veth1" tcp dport 12345 counter accept + + meta nfproto ipv4 meta l4proto icmp accept + meta nfproto ipv6 meta l4proto icmpv6 accept + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +fi + +# test basic connectivity +ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null +if [ $? -ne 0 ];then + echo "ERROR: ns1 cannot reach ns2" 1>&2 + bash + exit 1 +fi + +ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null +if [ $? -ne 0 ];then + echo "ERROR: ns2 cannot reach ns1" 1>&2 + exit 1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns1 can reach ns2" +fi + +ns1in=$(mktemp) +ns1out=$(mktemp) +ns2in=$(mktemp) +ns2out=$(mktemp) + +make_file() +{ + name=$1 + who=$2 + + SIZE=$((RANDOM % (1024 * 8))) + TSIZE=$((SIZE * 1024)) + + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + + SIZE=$((RANDOM % 1024)) + SIZE=$((SIZE + 128)) + TSIZE=$((TSIZE + SIZE)) + dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null +} + +check_transfer() +{ + in=$1 + out=$2 + what=$3 + + cmp "$in" "$out" > /dev/null 2>&1 + if [ $? -ne 0 ] ;then + echo "FAIL: file mismatch for $what" 1>&2 + ls -l "$in" + ls -l "$out" + return 1 + fi + + return 0 +} + +test_tcp_forwarding() +{ + local nsa=$1 + local nsb=$2 + local lret=0 + + ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & + lpid=$! + + sleep 1 + ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" & + cpid=$! + + sleep 3 + + kill $lpid + kill $cpid + wait + + check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" + if [ $? -ne 0 ];then + lret=1 + fi + + check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" + if [ $? -ne 0 ];then + lret=1 + fi + + return $lret +} + +make_file "$ns1in" "ns1" +make_file "$ns2in" "ns2" + +# First test: +# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. +test_tcp_forwarding ns1 ns2 +if [ $? -eq 0 ] ;then + echo "PASS: flow offloaded for ns1/ns2" +else + echo "FAIL: flow offload for ns1/ns2:" 1>&2 + ip netns exec nsr1 nft list ruleset + ret=1 +fi + +# delete default route, i.e. ns2 won't be able to reach ns1 and +# will depend on ns1 being masqueraded in nsr1. +# expect ns1 has nsr1 address. +ip -net ns2 route del default via 10.0.2.1 +ip -net ns2 route del default via dead:2::1 +ip -net ns2 route add 192.168.10.1 via 10.0.2.1 + +# Second test: +# Same, but with NAT enabled. +ip netns exec nsr1 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" masquerade + } +} +EOF + +test_tcp_forwarding ns1 ns2 + +if [ $? -eq 0 ] ;then + echo "PASS: flow offloaded for ns1/ns2 with NAT" +else + echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 + ip netns exec nsr1 nft list ruleset + ret=1 +fi + +# Third test: +# Same as second test, but with PMTU discovery enabled. +handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) + +ip netns exec nsr1 nft delete rule inet filter forward $handle +if [ $? -ne 0 ] ;then + echo "FAIL: Could not delete large-packet accept rule" + exit 1 +fi + +ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null + +test_tcp_forwarding ns1 ns2 +if [ $? -eq 0 ] ;then + echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" +else + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 + ip netns exec nsr1 nft list ruleset +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 14fcf3104c77..1be55e705780 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -36,7 +36,11 @@ trap cleanup EXIT ip netns add ns1 ip netns add ns2 -ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 ip -net ns0 link set lo up |