diff options
author | Jakub Kicinski <kuba@kernel.org> | 2021-12-09 04:02:33 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2021-12-09 04:02:35 +0300 |
commit | fd31cb0c6a34499364259fc902a48a016e3760cf (patch) | |
tree | 9c1f6138ecdc7bcdedcf49787594922ec8b16f95 | |
parent | b5b6b6baf2bfa738a9a1220f33684c34e87cc77f (diff) | |
parent | 802a7dc5cf1bef06f7b290ce76d478138408d6b1 (diff) | |
download | linux-fd31cb0c6a34499364259fc902a48a016e3760cf.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says:
====================
Netfilter fixes for net
1) Fix bogus compilter warning in nfnetlink_queue, from Florian Westphal.
2) Don't run conntrack on vrf with !dflt qdisc, from Nicolas Dichtel.
3) Fix nft_pipapo bucket load in AVX2 lookup routine for six 8-bit
groups, from Stefano Brivio.
4) Break rule evaluation on malformed TCP options.
5) Use socat instead of nc in selftests/netfilter/nft_zones_many.sh,
also from Florian
6) Fix KCSAN data-race in conntrack timeout updates, from Eric Dumazet.
* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
netfilter: conntrack: annotate data-races around ct->timeout
selftests: netfilter: switch zone stress to socat
netfilter: nft_exthdr: break evaluation if setting TCP option fails
selftests: netfilter: Add correctness test for mac,net set type
nft_set_pipapo: Fix bucket load in AVX2 lookup routine for six 8-bit groups
vrf: don't run conntrack on vrf with !dflt qdisc
netfilter: nfnetlink_queue: silence bogus compiler warning
====================
Link: https://lore.kernel.org/r/20211209000847.102598-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | drivers/net/vrf.c | 8 | ||||
-rw-r--r-- | include/net/netfilter/nf_conntrack.h | 6 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 6 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_flow_table_core.c | 4 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue.c | 2 | ||||
-rw-r--r-- | net/netfilter/nft_exthdr.c | 11 | ||||
-rw-r--r-- | net/netfilter/nft_set_pipapo_avx2.c | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/conntrack_vrf.sh | 30 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nft_concat_range.sh | 24 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nft_zones_many.sh | 19 |
11 files changed, 82 insertions, 32 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 131c745dc701..b2242a082431 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -770,8 +770,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; - vrf_nf_set_untracked(skb); - err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); @@ -792,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); @@ -1000,8 +1000,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; - vrf_nf_set_untracked(skb); - err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip_out_direct_finish); @@ -1023,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc663c68ddc4..d24b0a34c8f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return timeout > 0 ? timeout : 0; } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ @@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) - ct->timeout = nfct_time_stamp + NF_CT_DAY; + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 770a63103c7a..4712a90a1820 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) tstamp = nf_conn_tstamp_find(ct); if (tstamp) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; tstamp->stop = ktime_get_real_ns(); if (timeout < 0) @@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) } /* We want the clashing entry to go away real soon: 1 second timeout. */ - loser_ct->timeout = nfct_time_stamp + HZ; + WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); /* IPS_NAT_CLASH removes the entry automatically on the first * reply. Also prevents UDP tracker from moving the entry to @@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - ct->timeout = 0; + WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset, 0, offsetof(struct nf_conn, proto) - diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c7708bde057c..81d03acf68d4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, if (timeout > INT_MAX) timeout = INT_MAX; - ct->timeout = nfct_time_stamp + (u32)timeout; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 87a7388b6c89..ed37bb9b4e58 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (timeout < 0) timeout = 0; - if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) - ct->timeout = nfct_time_stamp + timeout; + if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) + WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } static void flow_offload_fixup_ct_state(struct nf_conn *ct) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 4acc4b8e9fe5..5837e8efc9c2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo; + enum ip_conntrack_info ctinfo = 0; struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index af4ee874a067..dbe1f2e7dd9e 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) - return; + goto err; opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { @@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) - return; + goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + i + priv->len)) - return; + goto err; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) - return; + goto err; offset = i + priv->offset; @@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, return; } + return; +err: + regs->verdict.code = NFT_BREAK; } static void nft_exthdr_sctp_eval(const struct nft_expr *expr, diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index e517663e0cd1..6f4116e72958 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); NFT_PIPAPO_AVX2_AND(5, 0, 1); - NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize); NFT_PIPAPO_AVX2_AND(7, 2, 3); /* Stall */ diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh index 91f3ef0f1192..8b5ea9234588 100755 --- a/tools/testing/selftests/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -150,11 +150,27 @@ EOF # oifname is the vrf device. test_masquerade_vrf() { + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + ip netns exec $ns0 conntrack -F 2>/dev/null ip netns exec $ns0 nft -f - <<EOF flush ruleset table ip nat { + chain rawout { + type filter hook output priority raw; + + oif tvrf ct state untracked counter + } + chain postrouting2 { + type filter hook postrouting priority mangle; + + oif tvrf ct state untracked counter + } chain postrouting { type nat hook postrouting priority 0; # NB: masquerade should always be combined with 'oif(name) bla', @@ -171,13 +187,18 @@ EOF fi # must also check that nat table was evaluated on second (lower device) iteration. - ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && + ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' if [ $? -eq 0 ]; then - echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device" + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" else - echo "FAIL: vrf masq rule has unexpected counter value" + echo "FAIL: vrf rules have unexpected counter value" ret=1 fi + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc del dev tvrf root + fi } # add masq rule that gets evaluated w. outif set to veth device. @@ -213,7 +234,8 @@ EOF } test_ct_zone_in -test_masquerade_vrf +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" test_masquerade_veth exit $ret diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5a4938d6dcf2..ed61f6cab60f 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout" # Set types, defined by TYPE_ variables below TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto - net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port - net_port_mac_proto_net" + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below BUGS="flush_remove_add" @@ -277,6 +277,23 @@ perf_entries 1000 perf_proto ipv4 " +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + TYPE_net_mac_icmp=" display net,mac - ICMP type_spec ipv4_addr . ether_addr @@ -984,7 +1001,8 @@ format() { fi done for f in ${src}; do - __expr="${__expr} . " + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" __end="$(eval format_"${f}" "${srcend}")" diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index ac646376eb01..04633119b29a 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -18,11 +18,17 @@ cleanup() ip netns del $ns } -ip netns add $ns -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $gw" - exit $ksft_skip -fi +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "socat -V" "run test without socat tool" +checktool "ip netns add $ns" "create net namespace" trap cleanup EXIT @@ -71,7 +77,8 @@ EOF local start=$(date +%s%3N) i=$((i + 10000)) j=$((j + 1)) - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null + # nft rule in output places each packet in a different zone. + dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break |