diff options
author | Kuniyuki Iwashima <kuniyu@amazon.co.jp> | 2020-07-21 09:15:31 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-07-31 19:39:31 +0300 |
commit | df89c1ee034ce11fa14dbbfe53d5b91ce70de5a6 (patch) | |
tree | 816c4ee155408c250bb3ef143554b451b30958a1 | |
parent | 6735c126d27200db85c338410ffd34649dd572ff (diff) | |
download | linux-df89c1ee034ce11fa14dbbfe53d5b91ce70de5a6.tar.xz |
udp: Improve load balancing for SO_REUSEPORT.
[ Upstream commit efc6b6f6c3113e8b203b9debfb72d81e0f3dcace ]
Currently, SO_REUSEPORT does not work well if connected sockets are in a
UDP reuseport group.
Then reuseport_has_conns() returns true and the result of
reuseport_select_sock() is discarded. Also, unconnected sockets have the
same score, hence only does the first unconnected socket in udp_hslot
always receive all packets sent to unconnected sockets.
So, the result of reuseport_select_sock() should be used for load
balancing.
The noteworthy point is that the unconnected sockets placed after
connected sockets in sock_reuseport.socks will receive more packets than
others because of the algorithm in reuseport_select_sock().
index | connected | reciprocal_scale | result
---------------------------------------------
0 | no | 20% | 40%
1 | no | 20% | 20%
2 | yes | 20% | 0%
3 | no | 20% | 40%
4 | yes | 20% | 0%
If most of the sockets are connected, this can be a problem, but it still
works better than now.
Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
CC: Willem de Bruijn <willemb@google.com>
Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | net/ipv4/udp.c | 15 | ||||
-rw-r--r-- | net/ipv6/udp.c | 15 |
2 files changed, 18 insertions, 12 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b3320e42cbcb..5d016bbdf16e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -413,7 +413,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -423,17 +423,20 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } + + result = reuseport_result ? : sk; badness = score; - result = sk; } } return result; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ede3bed354a8..6762430280f5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } - result = sk; + + result = reuseport_result ? : sk; badness = score; } } |