From d265929930e2ffafc744c0ae05fb70acd53be1ee Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 4 May 2022 08:35:59 -0700 Subject: netfilter: nf_conncount: reduce unnecessary GC Currently nf_conncount can trigger garbage collection (GC) at multiple places. Each GC process takes a spin_lock_bh to traverse the nf_conncount_list. We found that when testing port scanning use two parallel nmap, because the number of connection increase fast, the nf_conncount_count and its subsequent call to __nf_conncount_add take too much time, causing several CPU lockup. This happens when user set the conntrack limit to +20,000, because the larger the limit, the longer the list that GC has to traverse. The patch mitigate the performance issue by avoiding unnecessary GC with a timestamp. Whenever nf_conncount has done a GC, a timestamp is updated, and beforce the next time GC is triggered, we make sure it's more than a jiffies. By doin this we can greatly reduce the CPU cycles and avoid the softirq lockup. To reproduce it in OVS, $ ovs-appctl dpctl/ct-set-limits zone=1,limit=20000 $ ovs-appctl dpctl/ct-get-limits At another machine, runs two nmap $ nmap -p1- $ nmap -p1- Signed-off-by: William Tu Co-authored-by: Yifeng Sun Reported-by: Greg Rose Suggested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 82f36beb2e76..5d8ed6c90b7e 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,6 +132,9 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; + if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + goto add_new_node; + /* check the saved connections */ list_for_each_entry_safe(conn, conn_n, &list->head, node) { if (collect > CONNCOUNT_GC_MAX_NODES) @@ -177,6 +180,7 @@ static int __nf_conncount_add(struct net *net, nf_ct_put(found_ct); } +add_new_node: if (WARN_ON_ONCE(list->count > INT_MAX)) return -EOVERFLOW; @@ -190,6 +194,7 @@ static int __nf_conncount_add(struct net *net, conn->jiffies32 = (u32)jiffies; list_add_tail(&conn->node, &list->head); list->count++; + list->last_gc = (u32)jiffies; return 0; } @@ -214,6 +219,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list) spin_lock_init(&list->list_lock); INIT_LIST_HEAD(&list->head); list->count = 0; + list->last_gc = (u32)jiffies; } EXPORT_SYMBOL_GPL(nf_conncount_list_init); @@ -227,6 +233,10 @@ bool nf_conncount_gc_list(struct net *net, unsigned int collected = 0; bool ret = false; + /* don't bother if we just did GC */ + if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + return false; + /* don't bother if other cpu is already doing GC */ if (!spin_trylock(&list->list_lock)) return false; @@ -258,6 +268,7 @@ bool nf_conncount_gc_list(struct net *net, if (!list->count) ret = true; + list->last_gc = (u32)jiffies; spin_unlock(&list->list_lock); return ret; -- cgit v1.2.3 From f74360d3440ccf3bb10178f2805498c835c3ed5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 May 2022 17:08:47 +0200 Subject: netfilter: conntrack: remove pr_debug callsites from tcp tracker They are either obsolete or useless. Those in the normal processing path cannot be enabled on a production system; they generate too much noise. One pr_debug call resides in an error path and does provide useful info, merge it with the existing nf_log_invalid(). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 52 +++------------------------------- 1 file changed, 4 insertions(+), 48 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 204a5cdff5b1..a63b51dceaf2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -485,7 +485,6 @@ static bool tcp_in_window(struct nf_conn *ct, struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; - const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; u16 win_raw; s32 receiver_offset; @@ -508,18 +507,6 @@ static bool tcp_in_window(struct nf_conn *ct, ack -= receiver_offset; sack -= receiver_offset; - pr_debug("tcp_in_window: START\n"); - pr_debug("tcp_in_window: "); - nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", - seq, ack, receiver_offset, sack, receiver_offset, win, end); - pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - if (sender->td_maxwin == 0) { /* * Initialize sender data. @@ -597,27 +584,10 @@ static bool tcp_in_window(struct nf_conn *ct, */ seq = end = sender->td_end; - pr_debug("tcp_in_window: "); - nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", - seq, ack, receiver_offset, sack, receiver_offset, win, end); - pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - /* Is the ending sequence in the receive window (if available)? */ in_recv_win = !receiver->td_maxwin || after(end, sender->td_end - receiver->td_maxwin - 1); - pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(seq, sender->td_maxend + 1), - (in_recv_win ? 1 : 0), - before(sack, receiver->td_end + 1), - after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); - if (before(seq, sender->td_maxend + 1) && in_recv_win && before(sack, receiver->td_end + 1) && @@ -698,11 +668,6 @@ static bool tcp_in_window(struct nf_conn *ct, } } - pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " - "receiver end=%u maxend=%u maxwin=%u\n", - res, sender->td_end, sender->td_maxend, sender->td_maxwin, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin); - return res; } @@ -772,8 +737,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); const struct nf_tcp_net *tn = nf_tcp_pernet(net); - const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; - const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; /* Don't need lock here: this conntrack not in circulation yet */ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; @@ -826,14 +789,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, /* tcp_packet will set them */ ct->proto.tcp.last_index = TCP_NONE_SET; - - pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - __func__, - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); return true; } @@ -1032,10 +987,11 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, } /* Invalid packet */ - pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", - dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state"); + nf_ct_l4proto_log_invalid(skb, ct, state, + "packet (index %d) in dir %d invalid, state %s", + index, dir, + tcp_conntrack_names[old_state]); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" -- cgit v1.2.3 From 58a94a62a53ff76085f8b3face7d9b929b6a34ee Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 17 May 2022 18:58:43 +1000 Subject: netfilter: ctnetlink: fix up for "netfilter: conntrack: remove unconfirmed list" After merging the net-next tree, today's linux-next build (powerpc ppc64_defconfig) produced this warning: nf_conntrack_netlink.c:1717 warning: 'ctnetlink_dump_one_entry' defined but not used Fixes: 8a75a2c17410 ("netfilter: conntrack: remove unconfirmed list") Signed-off-by: Stephen Rothwell Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_netlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e768f59741a6..722af5e309ba 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1714,6 +1714,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb) return 0; } +#ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_dump_one_entry(struct sk_buff *skb, struct netlink_callback *cb, struct nf_conn *ct, @@ -1754,6 +1755,7 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, return res; } +#endif static int ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) -- cgit v1.2.3