diff options
author | Xin Long <lucien.xin@gmail.com> | 2023-01-28 18:58:39 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-02-02 07:54:27 +0300 |
commit | b1a78b9b98862cda167b643690e43662ea060625 (patch) | |
tree | 6a0ceb380c64d02b978f47debe65aef168d76d9b /net/core | |
parent | 9eefedd58ae1daece2ba907849a44db2941fb4b0 (diff) | |
download | linux-b1a78b9b98862cda167b643690e43662ea060625.tar.xz |
net: add support for ipv4 big tcp
Similar to Eric's IPv6 BIG TCP, this patch is to enable IPv4 BIG TCP.
Firstly, allow sk->sk_gso_max_size to be set to a value greater than
GSO_LEGACY_MAX_SIZE by not trimming gso_max_size in sk_trim_gso_size()
for IPv4 TCP sockets.
Then on TX path, set IP header tot_len to 0 when skb->len > IP_MAX_MTU
in __ip_local_out() to allow to send BIG TCP packets, and this implies
that skb->len is the length of a IPv4 packet; On RX path, use skb->len
as the length of the IPv4 packet when the IP header tot_len is 0 and
skb->len > IP_MAX_MTU in ip_rcv_core(). As the API iph_set_totlen() and
skb_ip_totlen() are used in __ip_local_out() and ip_rcv_core(), we only
need to update these APIs.
Also in GRO receive, add the check for ETH_P_IP/IPPROTO_TCP, and allows
the merged packet size >= GRO_LEGACY_MAX_SIZE in skb_gro_receive(). In
GRO complete, set IP header tot_len to 0 when the merged packet size
greater than IP_MAX_MTU in iph_set_totlen() so that it can be processed
on RX path.
Note that by checking skb_is_gso_tcp() in API iph_totlen(), it makes
this implementation safe to use iph->len == 0 indicates IPv4 BIG TCP
packets.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/gro.c | 12 | ||||
-rw-r--r-- | net/core/sock.c | 26 |
2 files changed, 21 insertions, 17 deletions
diff --git a/net/core/gro.c b/net/core/gro.c index 506f83d715f8..b15f85546bdd 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -162,16 +162,18 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) struct sk_buff *lp; int segs; - /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ - gro_max_size = READ_ONCE(p->dev->gro_max_size); + /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ + gro_max_size = p->protocol == htons(ETH_P_IPV6) ? + READ_ONCE(p->dev->gro_max_size) : + READ_ONCE(p->dev->gro_ipv4_max_size); if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) return -E2BIG; if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) { - if (p->protocol != htons(ETH_P_IPV6) || - skb_headroom(p) < sizeof(struct hop_jumbo_hdr) || - ipv6_hdr(p)->nexthdr != IPPROTO_TCP || + if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP || + (p->protocol == htons(ETH_P_IPV6) && + skb_headroom(p) < sizeof(struct hop_jumbo_hdr)) || p->encapsulation) return -E2BIG; } diff --git a/net/core/sock.c b/net/core/sock.c index 7ba4891460ad..f08b76acde9b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2373,17 +2373,22 @@ void sk_free_unlock_clone(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_free_unlock_clone); -static void sk_trim_gso_size(struct sock *sk) +static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) { - if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE) - return; + bool is_ipv6 = false; + u32 max_size; + #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6 && - sk_is_tcp(sk) && - !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return; + is_ipv6 = (sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif - sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE; + /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ + max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) : + READ_ONCE(dst->dev->gso_ipv4_max_size); + if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) + max_size = GSO_LEGACY_MAX_SIZE; + + return max_size - (MAX_TCP_HEADER + 1); } void sk_setup_caps(struct sock *sk, struct dst_entry *dst) @@ -2403,10 +2408,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; - /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ - sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); - sk_trim_gso_size(sk); - sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1); + sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); } |