diff options
| author | Jakub Sitnicki <jakub@cloudflare.com> | 2025-09-17 16:22:04 +0300 |
|---|---|---|
| committer | Paolo Abeni <pabeni@redhat.com> | 2025-09-23 11:12:15 +0300 |
| commit | d57f4b874946e997be52f5ebb5e0e1dad368c16f (patch) | |
| tree | 2a7751bc386282c4f36b9a06bbd634457f644c86 /include | |
| parent | 3afb106f3f9aa81c512ec5c7e2f7e1c01a2a6e6b (diff) | |
| download | linux-d57f4b874946e997be52f5ebb5e0e1dad368c16f.tar.xz | |
tcp: Update bind bucket state on port release
Today, once an inet_bind_bucket enters a state where fastreuse >= 0 or
fastreuseport >= 0 after a socket is explicitly bound to a port, it remains
in that state until all sockets are removed and the bucket is destroyed.
In this state, the bucket is skipped during ephemeral port selection in
connect(). For applications using a reduced ephemeral port
range (IP_LOCAL_PORT_RANGE socket option), this can cause faster port
exhaustion since blocked buckets are excluded from reuse.
The reason the bucket state isn't updated on port release is unclear.
Possibly a performance trade-off to avoid scanning bucket owners, or just
an oversight.
Fix it by recalculating the bucket state when a socket releases a port. To
limit overhead, each inet_bind2_bucket stores its own (fastreuse,
fastreuseport) state. On port release, only the relevant port-addr bucket
is scanned, and the overall state is derived from these.
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250917-update-bind-bucket-state-on-unhash-v5-1-57168b661b47@cloudflare.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/inet_connection_sock.h | 5 | ||||
| -rw-r--r-- | include/net/inet_hashtables.h | 2 | ||||
| -rw-r--r-- | include/net/inet_timewait_sock.h | 3 | ||||
| -rw-r--r-- | include/net/sock.h | 4 |
4 files changed, 11 insertions, 3 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 0737d8e178dd..b4b886647607 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -316,8 +316,9 @@ int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); /* update the fast reuse flag when adding a socket */ -void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, - struct sock *sk); +void inet_csk_update_fastreuse(const struct sock *sk, + struct inet_bind_bucket *tb, + struct inet_bind2_bucket *tb2); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b787be651ce7..ac05a52d9e13 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,6 +108,8 @@ struct inet_bind2_bucket { struct hlist_node bhash_node; /* List of sockets hashed to this bucket */ struct hlist_head owners; + signed char fastreuse; + signed char fastreuseport; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 3a31c74c9e15..63a644ff30de 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -70,7 +70,8 @@ struct inet_timewait_sock { unsigned int tw_transparent : 1, tw_flowlabel : 20, tw_usec_ts : 1, - tw_pad : 2, /* 2 bits hole */ + tw_connect_bind : 1, + tw_pad : 1, /* 1 bit hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; diff --git a/include/net/sock.h b/include/net/sock.h index b4fefeea0213..8c5b64f41ab7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1494,6 +1494,10 @@ static inline int __sk_prot_rehash(struct sock *sk) #define SOCK_BINDADDR_LOCK 4 #define SOCK_BINDPORT_LOCK 8 +/** + * define SOCK_CONNECT_BIND - &sock->sk_userlocks flag for auto-bind at connect() time + */ +#define SOCK_CONNECT_BIND 16 struct socket_alloc { struct socket socket; |
