From cbc53e08a793b073e79f42ca33f1f3568703540d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:44:51 -0400 Subject: GSO: Add GSO type for fixed IPv4 ID This patch adds support for TSO using IPv4 headers with a fixed IP ID field. This is meant to allow us to do a lossless GRO in the case of TCP flows that use a fixed IP ID such as those that convert IPv6 header to IPv4 headers. In addition I am adding a feature that for now I am referring to TSO with IP ID mangling. Basically when this flag is enabled the device has the option to either output the flow with incrementing IP IDs or with a fixed IP ID regardless of what the original IP ID ordering was. This is useful in cases where the DF bit is set and we do not care if the original IP ID value is maintained. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +++ include/linux/netdevice.h | 1 + include/linux/skbuff.h | 20 +++++++++++--------- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a734bf43d190..7cf272a4b5c8 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -39,6 +39,7 @@ enum { NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ + NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ @@ -120,6 +121,7 @@ enum { #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) +#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) @@ -147,6 +149,7 @@ enum { /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ + NETIF_F_TSO_MANGLEID | \ NETIF_F_TSO6 | NETIF_F_UFO) /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9884fe9a6552..8e372d01b3c1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3992,6 +3992,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 007381270ff8..5fba16658f9d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -465,23 +465,25 @@ enum { /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 3, - SKB_GSO_TCPV6 = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 4, - SKB_GSO_FCOE = 1 << 5, + SKB_GSO_TCPV6 = 1 << 5, - SKB_GSO_GRE = 1 << 6, + SKB_GSO_FCOE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 7, + SKB_GSO_GRE = 1 << 7, - SKB_GSO_IPIP = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 8, - SKB_GSO_SIT = 1 << 9, + SKB_GSO_IPIP = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 10, + SKB_GSO_SIT = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 11, - SKB_GSO_TUNNEL_REMCSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, }; #if BITS_PER_LONG > 32 -- cgit v1.2.3 From 1530545ed64b42e87acb43c0c16401bd1ebae6bf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:44:57 -0400 Subject: GRO: Add support for TCP with fixed IPv4 ID field, limit tunnel IP ID values This patch does two things. First it allows TCP to aggregate TCP frames with a fixed IPv4 ID field. As a result we should now be able to aggregate flows that were converted from IPv6 to IPv4. In addition this allows us more flexibility for future implementations of segmentation as we may be able to use a fixed IP ID when segmenting the flow. The second thing this does is that it places limitations on the outer IPv4 ID header in the case of tunneled frames. Specifically it forces the IP ID to be incrementing by 1 unless the DF bit is set in the outer IPv4 header. This way we can avoid creating overlapping series of IP IDs that could possibly be fragmented if the frame goes through GRO and is then resegmented via GSO. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 1 + net/ipv4/af_inet.c | 35 ++++++++++++++++++++++++++++------- net/ipv4/tcp_offload.c | 16 +++++++++++++++- net/ipv6/ip6_offload.c | 8 ++++++-- 5 files changed, 54 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8e372d01b3c1..2d70c521d516 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2121,7 +2121,10 @@ struct napi_gro_cb { /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; - /* 6 bit hole */ + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* 5 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/net/core/dev.c b/net/core/dev.c index e896b1953ab6..b78b586b1856 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4462,6 +4462,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5bbea9a0ce96..8564cab96189 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1328,6 +1328,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, for (p = *head; p; p = p->next) { struct iphdr *iph2; + u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1351,16 +1352,36 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - /* Save the IP ID check to be included later when we get to - * the transport layer so only the inner most IP ID is checked. - * This is because some GSO/TSO implementations do not - * correctly increment the IP ID for the outer hdrs. - */ - NAPI_GRO_CB(p)->flush_id = - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; + + /* We need to store of the IP ID check to be included later + * when we can verify that this packet does in fact belong + * to a given flow. + */ + flush_id = (u16)(id - ntohs(iph2->id)); + + /* This bit of code makes it much easier for us to identify + * the cases where we are doing atomic vs non-atomic IP ID + * checks. Specifically an atomic check can return IP ID + * values 0 - 0xFFFF, while a non-atomic check can only + * return 0 or 0xFFFF. + */ + if (!NAPI_GRO_CB(p)->is_atomic || + !(iph->frag_off & htons(IP_DF))) { + flush_id ^= NAPI_GRO_CB(p)->count; + flush_id = flush_id ? 0xFFFF : 0; + } + + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = flush_id; + else + NAPI_GRO_CB(p)->flush_id |= flush_id; } + NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); NAPI_GRO_CB(skb)->flush |= flush; skb_set_network_header(skb, off); /* The above will be needed by the transport layer if there is one diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 08dd25d835af..d1ffd55289bd 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -239,7 +239,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: /* Include the IP ID check below from the inner most IP hdr */ - flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; + flush = NAPI_GRO_CB(p)->flush; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -248,6 +248,17 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); + /* When we receive our second frame we can made a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use + * an incrementing ID. + */ + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + mss = skb_shinfo(p)->gso_size; flush |= (len - 1) >= mss; @@ -316,6 +327,9 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (NAPI_GRO_CB(skb)->is_atomic) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + return tcp_gro_complete(skb); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b3a779393d71..061adcda65f3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -240,10 +240,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; - /* Clear flush_id, there's really no concept of ID in IPv6. */ - NAPI_GRO_CB(p)->flush_id = 0; + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = 0; } + NAPI_GRO_CB(skb)->is_atomic = true; NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); -- cgit v1.2.3 From 802ab55adc39a06940a1b384e9fd0387fc762d7e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:45:03 -0400 Subject: GSO: Support partial segmentation offload This patch adds support for something I am referring to as GSO partial. The basic idea is that we can support a broader range of devices for segmentation if we use fixed outer headers and have the hardware only really deal with segmenting the inner header. The idea behind the naming is due to the fact that everything before csum_start will be fixed headers, and everything after will be the region that is handled by hardware. With the current implementation it allows us to add support for the following GSO types with an inner TSO_MANGLEID or TSO6 offload: NETIF_F_GSO_GRE NETIF_F_GSO_GRE_CSUM NETIF_F_GSO_IPIP NETIF_F_GSO_SIT NETIF_F_UDP_TUNNEL NETIF_F_UDP_TUNNEL_CSUM In the case of hardware that already supports tunneling we may be able to extend this further to support TSO_TCPV4 without TSO_MANGLEID if the hardware can support updating inner IPv4 headers. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 5 +++++ include/linux/netdevice.h | 2 ++ include/linux/skbuff.h | 9 +++++++-- net/core/dev.c | 36 +++++++++++++++++++++++++++++++++--- net/core/ethtool.c | 1 + net/core/skbuff.c | 29 ++++++++++++++++++++++++++++- net/ipv4/af_inet.c | 20 ++++++++++++++++---- net/ipv4/gre_offload.c | 26 +++++++++++++++++++++----- net/ipv4/tcp_offload.c | 10 ++++++++-- net/ipv4/udp_offload.c | 27 +++++++++++++++++++++------ net/ipv6/ip6_offload.c | 10 +++++++++- 11 files changed, 151 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7cf272a4b5c8..9fc79df0e561 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -48,6 +48,10 @@ enum { NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ + NETIF_F_GSO_PARTIAL_BIT, /* ... Only segment inner-most L4 + * in hardware and all other + * headers in software. + */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, @@ -122,6 +126,7 @@ enum { #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) +#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d70c521d516..a3bb534576a3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1654,6 +1654,7 @@ struct net_device { netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; + netdev_features_t gso_partial_features; int ifindex; int group; @@ -4004,6 +4005,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5fba16658f9d..da0ace389fec 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -483,7 +483,9 @@ enum { SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 13, + SKB_GSO_PARTIAL = 1 << 13, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 14, }; #if BITS_PER_LONG > 32 @@ -3591,7 +3593,10 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) * Keeps track of level of encapsulation of network headers. */ struct skb_gso_cb { - int mac_offset; + union { + int mac_offset; + int data_offset; + }; int encap_level; __wsum csum; __u16 csum_start; diff --git a/net/core/dev.c b/net/core/dev.c index b78b586b1856..556dd09af3b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2711,6 +2711,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); @@ -2834,8 +2847,17 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > dev->gso_max_segs) return features & ~NETIF_F_GSO_MASK; - /* Make sure to clear the IPv4 ID mangling feature if - * the IPv4 header has the potential to be fragmented. + /* Support for GSO partial features requires software + * intervention before we can actually process the packets + * so we need to strip support for any partial features now + * and we can pull them back in after we have partially + * segmented the frame. + */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) + features &= ~dev->gso_partial_features; + + /* Make sure to clear the IPv4 ID mangling feature if the + * IPv4 header has the potential to be fragmented. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { struct iphdr *iph = skb->encapsulation ? @@ -6729,6 +6751,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* GSO partial features require GSO partial be set */ + if ((features & dev->gso_partial_features) && + !(features & NETIF_F_GSO_PARTIAL)) { + netdev_dbg(dev, + "Dropping partially supported GSO features since no GSO partial.\n"); + features &= ~dev->gso_partial_features; + } + #ifdef CONFIG_NET_RX_BUSY_POLL if (dev->netdev_ops->ndo_busy_poll) features |= NETIF_F_BUSY_POLL; @@ -7011,7 +7041,7 @@ int register_netdevice(struct net_device *dev) /* Make NETIF_F_SG inheritable to tunnel devices. */ - dev->hw_enc_features |= NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL; /* Make NETIF_F_SG inheritable to MPLS. */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9494c41cc77c..e0cf20a3b3dd 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -88,6 +88,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d04c2d1c8c87..4cc594cdaada 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,8 +3076,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); + unsigned int partial_segs = 0; unsigned int headroom; - unsigned int len; + unsigned int len = head_skb->len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); @@ -3094,6 +3095,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, csum = !!can_checksum_protocol(features, proto); + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ + if (features & NETIF_F_GSO_PARTIAL) { + partial_segs = len / mss; + mss *= partial_segs; + } + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3281,6 +3291,23 @@ perform_csum_check: */ segs->prev = tail; + /* Update GSO info on first skb in partial sequence. */ + if (partial_segs) { + int type = skb_shinfo(head_skb)->gso_type; + + /* Update type to add partial and then remove dodgy if set */ + type |= SKB_GSO_PARTIAL; + type &= ~SKB_GSO_DODGY; + + /* Update GSO info and prepare to start updating headers on + * our way back down the stack of protocols. + */ + skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; + skb_shinfo(segs)->gso_segs = partial_segs; + skb_shinfo(segs)->gso_type = type; + SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + } + /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8564cab96189..2e6e65fc4d20 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1200,7 +1200,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, const struct net_offload *ops; unsigned int offset = 0; struct iphdr *iph; - int proto; + int proto, tot_len; int nhoff; int ihl; int id; @@ -1219,6 +1219,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TCP_FIXEDID | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -1273,10 +1274,21 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; - } else if (!fixedid) { - iph->id = htons(id++); + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { + if (!fixedid) { + iph->id = htons(id); + id += skb_shinfo(skb)->gso_segs; + } + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; + } else { + if (!fixedid) + iph->id = htons(id++); + tot_len = skb->len - nhoff; } - iph->tot_len = htons(skb->len - nhoff); + iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6376b0cdf693..20557f211408 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT))) + SKB_GSO_SIT | + SKB_GSO_PARTIAL))) goto out; if (!skb->encapsulation) @@ -87,7 +88,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb = segs; do { struct gre_base_hdr *greh; - __be32 *pcsum; + __sum16 *pcsum; /* Set up inner headers if we are offloading inner checksum */ if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -107,10 +108,25 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, continue; greh = (struct gre_base_hdr *)skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); + pcsum = (__sum16 *)(greh + 1); + + if (skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that + * the partial checksum is based on actual size + * whereas headers should be based on MSS size. + */ + partial_adj = skb->len + skb_headroom(skb) - + SKB_GSO_CB(skb)->data_offset - + skb_shinfo(skb)->gso_size; + *pcsum = ~csum_fold((__force __wsum)htonl(partial_adj)); + } else { + *pcsum = 0; + } - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); + *(pcsum + 1) = 0; + *pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index d1ffd55289bd..02737b607aa7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -109,6 +109,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } + /* GSO partial only requires splitting the frame into an MSS + * multiple and possibly a remainder. So update the mss now. + */ + if (features & NETIF_F_GSO_PARTIAL) + mss = skb->len - (skb->len % mss); + copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -133,7 +139,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - do { + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -153,7 +159,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->seq = htonl(seq); th->cwr = 0; - } while (skb->next); + } /* Following permits TCP Small Queues to work well with GSO : * The callback to TCP stack will be called at the time last frag diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6230cf4b0d2d..097060def7f0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -39,8 +39,11 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * 16 bit length field due to the header being added outside of an * IP or IPv6 frame that was already limited to 64K - 1. */ - partial = csum_sub(csum_unfold(uh->check), - (__force __wsum)htonl(skb->len)); + if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) + partial = (__force __wsum)uh->len; + else + partial = (__force __wsum)htonl(skb->len); + partial = csum_sub(csum_unfold(uh->check), partial); /* setup inner skb. */ skb->encapsulation = 0; @@ -89,7 +92,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - __be16 len; + unsigned int len; if (remcsum) skb->ip_summed = CHECKSUM_NONE; @@ -107,14 +110,26 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = htons(skb->len - udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = len; + + /* If we are only performing partial GSO the inner header + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ + if (skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); + } else { + uh->len = htons(len); + } if (!need_csum) continue; - uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len)); + uh->check = ~csum_fold(csum_add(partial, + (__force __wsum)htonl(len))); if (skb->encapsulation || !offload_csum) { uh->check = gso_make_checksum(skb, ~uh->check); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 061adcda65f3..f5eb184e1093 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,6 +63,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int proto; struct frag_hdr *fptr; unsigned int unfrag_ip6hlen; + unsigned int payload_len; u8 *prevhdr; int offset = 0; bool encap, udpfrag; @@ -82,6 +83,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -118,7 +120,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); + else + payload_len = skb->len - nhoff - sizeof(*ipv6h); + ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { -- cgit v1.2.3