diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-02-07 07:48:35 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-02-07 07:50:13 +0300 |
| commit | 1fdad81d880349756414b1a7d4e2a8bdf52664e6 (patch) | |
| tree | 1da4ea8ca4da35cc6af3e08a573dc725ec5109b3 /include | |
| parent | 5826eec8710c214f02f253c165d66a230f5a86c3 (diff) | |
| parent | 35f66ce900370ed0eab6553977adc0a2fb9cccfb (diff) | |
| download | linux-1fdad81d880349756414b1a7d4e2a8bdf52664e6.tar.xz | |
Merge branch 'big-tcp-without-hbh-in-ipv6'
Alice Mikityanska says:
====================
BIG TCP without HBH in IPv6
Resubmitting after the grace period.
This series is part 1 of "BIG TCP for UDP tunnels". Due to the number of
patches, I'm splitting it into two logical parts:
* Remove hop-by-hop header for BIG TCP IPv6 to align with BIG TCP IPv4.
* Fix up things that prevent BIG TCP from working with UDP tunnels.
The current BIG TCP IPv6 code inserts a hop-by-hop extension header with
32-bit length of the packet. When the packet is encapsulated, and either
the outer or the inner protocol is IPv6, or both are IPv6, there will be
1 or 2 HBH headers that need to be dealt with. The issues that arise:
1. The drivers don't strip it, and they'd all need to know the structure
of each tunnel protocol in order to strip it correctly, also taking into
account all combinations of IPv4/IPv6 inner/outer protocols.
2. Even if (1) is implemented, it would be an additional performance
penalty per aggregated packet.
3. The skb_gso_validate_network_len check is skipped in
ip6_finish_output_gso when IP6SKB_FAKEJUMBO is set, but it seems that it
would make sense to do the actual validation, just taking into account
the length of the HBH header. When the support for tunnels is added, it
becomes trickier, because there may be one or two HBH headers, depending
on whether it's IPv6 in IPv6 or not.
At the same time, having an HBH header to store the 32-bit length is not
strictly necessary, as BIG TCP IPv4 doesn't do anything like this and
just restores the length from skb->len. The same thing can be done for
BIG TCP IPv6. Removing HBH from BIG TCP would allow to simplify the
implementation significantly, and align it with BIG TCP IPv4, which has
been a long-standing goal.
====================
Link: https://patch.msgid.link/20260205133925.526371-1-alice.kernel@fastmail.im
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/ipv6.h | 23 | ||||
| -rw-r--r-- | include/net/ipv6.h | 79 | ||||
| -rw-r--r-- | include/net/netfilter/nf_tables_ipv6.h | 4 |
3 files changed, 24 insertions, 82 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 20aae8357dd1..bdbd63f9a85e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -126,6 +126,28 @@ static inline unsigned int ipv6_transport_len(const struct sk_buff *skb) skb_network_header_len(skb); } +static inline unsigned int +ipv6_payload_len(const struct sk_buff *skb, const struct ipv6hdr *ip6) +{ + u32 len = ntohs(ip6->payload_len); + + return (len || !skb_is_gso(skb) || !skb_is_gso_tcp(skb)) ? + len : + skb->len - skb_network_offset(skb) - sizeof(struct ipv6hdr); +} + +static inline unsigned int skb_ipv6_payload_len(const struct sk_buff *skb) +{ + return ipv6_payload_len(skb, ipv6_hdr(skb)); +} + +#define IPV6_MAXPLEN 65535 + +static inline void ipv6_set_payload_len(struct ipv6hdr *ip6, unsigned int len) +{ + ip6->payload_len = len <= IPV6_MAXPLEN ? htons(len) : 0; +} + /* This structure contains results of exthdrs parsing as offsets from skb->nh. @@ -155,7 +177,6 @@ struct inet6_skb_parm { #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 -#define IP6SKB_FAKEJUMBO 512 #define IP6SKB_MULTIPATH 1024 #define IP6SKB_MCROUTE 2048 }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c27b9d7aeb7c..cc56e09525d0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -25,8 +25,6 @@ struct ip_tunnel_info; #define SIN6_LEN_RFC2133 24 -#define IPV6_MAXPLEN 65535 - /* * NextHeader field of IPv6 header */ @@ -151,17 +149,6 @@ struct frag_hdr { __be32 identification; }; -/* - * Jumbo payload option, as described in RFC 2675 2. - */ -struct hop_jumbo_hdr { - u8 nexthdr; - u8 hdrlen; - u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ - u8 tlv_len; /* 4 */ - __be32 jumbo_payload_len; -}; - #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 @@ -464,72 +451,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); -/* This helper is specialized for BIG TCP needs. - * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. - * It assumes headers are already in skb->head. - * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there. - */ -static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) -{ - const struct hop_jumbo_hdr *jhdr; - const struct ipv6hdr *nhdr; - - if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) - return 0; - - if (skb->protocol != htons(ETH_P_IPV6)) - return 0; - - if (skb_network_offset(skb) + - sizeof(struct ipv6hdr) + - sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) - return 0; - - nhdr = ipv6_hdr(skb); - - if (nhdr->nexthdr != NEXTHDR_HOP) - return 0; - - jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); - if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || - jhdr->nexthdr != IPPROTO_TCP) - return 0; - return jhdr->nexthdr; -} - -/* Return 0 if HBH header is successfully removed - * Or if HBH removal is unnecessary (packet is not big TCP) - * Return error to indicate dropping the packet - */ -static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) -{ - const int hophdr_len = sizeof(struct hop_jumbo_hdr); - int nexthdr = ipv6_has_hopopt_jumbo(skb); - struct ipv6hdr *h6; - - if (!nexthdr) - return 0; - - if (skb_cow_head(skb, 0)) - return -1; - - /* Remove the HBH header. - * Layout: [Ethernet header][IPv6 header][HBH][L4 Header] - */ - memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb), - skb_network_header(skb) - skb_mac_header(skb) + - sizeof(struct ipv6hdr)); - - __skb_pull(skb, hophdr_len); - skb->network_header += hophdr_len; - skb->mac_header += hophdr_len; - - h6 = ipv6_hdr(skb); - h6->nexthdr = nexthdr; - - return 0; -} - static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index a0633eeaec97..c53ac00bb974 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -42,7 +42,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) if (ip6h->version != 6) return -1; - pkt_len = ntohs(ip6h->payload_len); + pkt_len = ipv6_payload_len(pkt->skb, ip6h); skb_len = pkt->skb->len - skb_network_offset(pkt->skb); if (pkt_len + sizeof(*ip6h) > skb_len) return -1; @@ -86,7 +86,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) if (ip6h->version != 6) goto inhdr_error; - pkt_len = ntohs(ip6h->payload_len); + pkt_len = ipv6_payload_len(pkt->skb, ip6h); if (pkt_len + sizeof(*ip6h) > pkt->skb->len) { idev = __in6_dev_get(nft_in(pkt)); __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS); |
