summaryrefslogtreecommitdiff
path: root/net/ipv4/inet_hashtables.c
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2022-05-12 03:06:05 +0300
committerJakub Kicinski <kuba@kernel.org>2022-05-13 02:52:18 +0300
commitcae3873c5b3a4fcd9706fb461ff4e91bdf1f0120 (patch)
tree54650bd157a612c462c1fb7856c1800babd1790b /net/ipv4/inet_hashtables.c
parente8d0059000b20c4745c5b6a713f6adb269cff8ff (diff)
downloadlinux-cae3873c5b3a4fcd9706fb461ff4e91bdf1f0120.tar.xz
net: inet: Retire port only listening_hash
The listen sk is currently stored in two hash tables, listening_hash (hashed by port) and lhash2 (hashed by port and address). After commit 0ee58dad5b06 ("net: tcp6: prefer listeners bound to an address") and commit d9fbc7f6431f ("net: tcp: prefer listeners bound to an address"), the TCP-SYN lookup fast path does not use listening_hash. The commit 05c0b35709c5 ("tcp: seq_file: Replace listening_hash with lhash2") also moved the seq_file (/proc/net/tcp) iteration usage from listening_hash to lhash2. There are still a few listening_hash usages left. One of them is inet_reuseport_add_sock() which uses the listening_hash to search a listen sk during the listen() system call. This turns out to be very slow on use cases that listen on many different VIPs at a popular port (e.g. 443). [ On top of the slowness in adding to the tail in the IPv6 case ]. The latter patch has a selftest to demonstrate this case. This patch takes this chance to move all remaining listening_hash usages to lhash2 and then retire listening_hash. Since most changes need to be done together, it is hard to cut the listening_hash to lhash2 switch into small patches. The changes in this patch is highlighted here for the review purpose. 1. Because of the listening_hash removal, lhash2 can use the sk->sk_nulls_node instead of the icsk->icsk_listen_portaddr_node. This will also keep the sk_unhashed() check to work as is after stop adding sk to listening_hash. The union is removed from inet_listen_hashbucket because only nulls_head is needed. 2. icsk->icsk_listen_portaddr_node and its helpers are removed. 3. The current lhash2 users needs to iterate with sk_nulls_node instead of icsk_listen_portaddr_node. One case is in the inet[6]_lhash2_lookup(). Another case is the seq_file iterator in tcp_ipv4.c. One thing to note is sk_nulls_next() is needed because the old inet_lhash2_for_each_icsk_continue() does a "next" first before iterating. 4. Move the remaining listening_hash usage to lhash2 inet_reuseport_add_sock() which this series is trying to improve. inet_diag.c and mptcp_diag.c are the final two remaining use cases and is moved to lhash2 now also. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r--net/ipv4/inet_hashtables.c47
1 files changed, 9 insertions, 38 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 4b00fcb9088d..0b8235fbd440 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -246,12 +246,11 @@ static struct sock *inet_lhash2_lookup(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
int score, hiscore = 0;
- inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
@@ -598,7 +597,6 @@ int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb2;
- struct inet_listen_hashbucket *ilb;
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
@@ -608,31 +606,23 @@ int __inet_hash(struct sock *sk, struct sock *osk)
return 0;
}
WARN_ON(!sk_unhashed(sk));
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
- spin_lock(&ilb->lock);
spin_lock(&ilb2->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb);
+ err = inet_reuseport_add_sock(sk, ilb2);
if (err)
goto unlock;
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
- sk->sk_family == AF_INET6) {
- hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
- } else {
- hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
- }
+ sk->sk_family == AF_INET6)
+ __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
+ else
+ __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
spin_unlock(&ilb2->lock);
- spin_unlock(&ilb->lock);
return err;
}
@@ -658,29 +648,23 @@ void inet_unhash(struct sock *sk)
if (sk->sk_state == TCP_LISTEN) {
struct inet_listen_hashbucket *ilb2;
- struct inet_listen_hashbucket *ilb;
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
/* Don't disable bottom halves while acquiring the lock to
* avoid circular locking dependency on PREEMPT_RT.
*/
- spin_lock(&ilb->lock);
spin_lock(&ilb2->lock);
if (sk_unhashed(sk)) {
spin_unlock(&ilb2->lock);
- spin_unlock(&ilb->lock);
return;
}
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_stop_listen_sock(sk);
- hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
__sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_unlock(&ilb2->lock);
- spin_unlock(&ilb->lock);
} else {
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
@@ -848,27 +832,14 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
-void inet_hashinfo_init(struct inet_hashinfo *h)
-{
- int i;
-
- for (i = 0; i < INET_LHTABLE_SIZE; i++) {
- spin_lock_init(&h->listening_hash[i].lock);
- INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
- i + LISTENING_NULLS_BASE);
- }
-
- h->lhash2 = NULL;
-}
-EXPORT_SYMBOL_GPL(inet_hashinfo_init);
-
static void init_hashinfo_lhash2(struct inet_hashinfo *h)
{
int i;
for (i = 0; i <= h->lhash2_mask; i++) {
spin_lock_init(&h->lhash2[i].lock);
- INIT_HLIST_HEAD(&h->lhash2[i].head);
+ INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head,
+ i + LISTENING_NULLS_BASE);
}
}