diff options
author | Kuniyuki Iwashima <kuniyu@amazon.com> | 2022-09-08 04:10:18 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-09-20 20:21:49 +0300 |
commit | e9bd0cca09d13ac2f08d25e195203e42d4ad1ce8 (patch) | |
tree | c098713403c2ed7e70d310348766a1acf9d5e374 /net/ipv4/tcp_ipv4.c | |
parent | 08eaef90403110e51861d93e8008a355af467bbe (diff) | |
download | linux-e9bd0cca09d13ac2f08d25e195203e42d4ad1ce8.tar.xz |
tcp: Don't allocate tcp_death_row outside of struct netns_ipv4.
We will soon introduce an optional per-netns ehash and access hash
tables via net->ipv4.tcp_death_row->hashinfo instead of &tcp_hashinfo
in most places.
It could harm the fast path because dereferences of two fields in net
and tcp_death_row might incur two extra cache line misses. To save one
dereference, let's place tcp_death_row back in netns_ipv4 and fetch
hashinfo via net->ipv4.tcp_death_row"."hashinfo.
Note tcp_death_row was initially placed in netns_ipv4, and commit
fbb8295248e1 ("tcp: allocate tcp_death_row outside of struct netns_ipv4")
changed it to a pointer so that we can fire TIME_WAIT timers after freeing
net. However, we don't do so after commit 04c494e68a13 ("Revert "tcp/dccp:
get rid of inet_twsk_purge()""), so we need not define tcp_death_row as a
pointer.
Also, we move refcount_dec_and_test(&tw_refcount) from tcp_sk_exit() to
tcp_sk_exit_batch() as a debug check.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 19 |
1 files changed, 7 insertions, 12 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a07243f66d4c..3930b6a1e0d6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -292,7 +292,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); - tcp_death_row = net->ipv4.tcp_death_row; + tcp_death_row = &net->ipv4.tcp_death_row; err = inet_hash_connect(tcp_death_row, sk); if (err) goto failure; @@ -3091,13 +3091,9 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { - struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row; - if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); - if (refcount_dec_and_test(&tcp_death_row->tw_refcount)) - kfree(tcp_death_row); } static int __net_init tcp_sk_init(struct net *net) @@ -3129,13 +3125,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 2; net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; - net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL); - if (!net->ipv4.tcp_death_row) - return -ENOMEM; - refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1); + refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2; - net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128); net->ipv4.sysctl_tcp_sack = 1; @@ -3201,8 +3194,10 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) inet_twsk_purge(&tcp_hashinfo, AF_INET); - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { + WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); + } } static struct pernet_operations __net_initdata tcp_sk_ops = { |