From 89114afd435a486deb8583e89f490fc274444d18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 8 Jul 2006 13:34:32 -0700 Subject: [NET] gso: Add skb_is_gso This patch adds the wrapper function skb_is_gso which can be used instead of directly testing skb_shinfo(skb)->gso_size. This makes things a little nicer and allows us to change the primary key for indicating whether an skb is GSO (if we ever want to do that). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85f99f60deea..0359a6430018 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1001,7 +1001,7 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { - return net_gso_ok(features, skb_shinfo(skb)->gso_size ? + return net_gso_ok(features, skb_is_gso(skb) ? skb_shinfo(skb)->gso_type : 0); } -- cgit v1.2.3 From a430a43d087545c96542ee64573237919109d370 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 8 Jul 2006 13:34:56 -0700 Subject: [NET] gso: Fix up GSO packets with broken checksums Certain subsystems in the stack (e.g., netfilter) can break the partial checksum on GSO packets. Until they're fixed, this patch allows this to work by recomputing the partial checksums through the GSO mechanism. Once they've all been converted to update the partial checksum instead of clearing it, this workaround can be removed. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++-- include/net/protocol.h | 2 ++ include/net/tcp.h | 1 + net/core/dev.c | 36 ++++++++++++++++--- net/ipv4/af_inet.c | 36 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 18 ++++++++++ net/ipv6/ipv6_sockglue.c | 89 ++++++++++++++++++++++++++++++++--------------- net/ipv6/tcp_ipv6.c | 19 ++++++++++ 8 files changed, 174 insertions(+), 35 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0359a6430018..76cc099c8580 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -549,6 +549,7 @@ struct packet_type { struct net_device *); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); + int (*gso_send_check)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; }; @@ -1001,13 +1002,14 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { - return net_gso_ok(features, skb_is_gso(skb) ? - skb_shinfo(skb)->gso_type : 0); + return net_gso_ok(features, skb_shinfo(skb)->gso_type); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { - return !skb_gso_ok(skb, dev->features); + return skb_is_gso(skb) && + (!skb_gso_ok(skb, dev->features) || + unlikely(skb->ip_summed != CHECKSUM_HW)); } #endif /* __KERNEL__ */ diff --git a/include/net/protocol.h b/include/net/protocol.h index a225d6371cb1..c643bce64e55 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -36,6 +36,7 @@ struct net_protocol { int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); + int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); int no_policy; @@ -51,6 +52,7 @@ struct inet6_protocol int type, int code, int offset, __u32 info); + int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); diff --git a/include/net/tcp.h b/include/net/tcp.h index 3cd803b0d7a5..0720bddff1e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1086,6 +1086,7 @@ extern struct request_sock_ops tcp_request_sock_ops; extern int tcp_v4_destroy_sock(struct sock *sk); +extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); #ifdef CONFIG_PROC_FS diff --git a/net/core/dev.c b/net/core/dev.c index 0096349ee38b..4d2b5167d7f5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1162,9 +1162,17 @@ int skb_checksum_help(struct sk_buff *skb, int inward) unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (inward) { - skb->ip_summed = CHECKSUM_NONE; - goto out; + if (inward) + goto out_set_summed; + + if (unlikely(skb_shinfo(skb)->gso_size)) { + static int warned; + + WARN_ON(!warned); + warned = 1; + + /* Let GSO fix up the checksum. */ + goto out_set_summed; } if (skb_cloned(skb)) { @@ -1181,6 +1189,8 @@ int skb_checksum_help(struct sk_buff *skb, int inward) BUG_ON(skb->csum + 2 > offset); *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); + +out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: return ret; @@ -1201,17 +1211,35 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; int type = skb->protocol; + int err; BUG_ON(skb_shinfo(skb)->frag_list); - BUG_ON(skb->ip_summed != CHECKSUM_HW); skb->mac.raw = skb->data; skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + static int warned; + + WARN_ON(!warned); + warned = 1; + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + err = ptype->gso_send_check(skb); + segs = ERR_PTR(err); + if (err || skb_gso_ok(skb, features)) + break; + __skb_push(skb, skb->data - skb->nh.raw); + } segs = ptype->gso_segment(skb, features); break; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 318d4674faa1..c84a32070f8d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1097,6 +1097,40 @@ int inet_sk_rebuild_header(struct sock *sk) EXPORT_SYMBOL(inet_sk_rebuild_header); +static int inet_gso_send_check(struct sk_buff *skb) +{ + struct iphdr *iph; + struct net_protocol *ops; + int proto; + int ihl; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = skb->nh.iph; + ihl = iph->ihl * 4; + if (ihl < sizeof(*iph)) + goto out; + + if (unlikely(!pskb_may_pull(skb, ihl))) + goto out; + + skb->h.raw = __skb_pull(skb, ihl); + iph = skb->nh.iph; + proto = iph->protocol & (MAX_INET_PROTOS - 1); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (likely(ops && ops->gso_send_check)) + err = ops->gso_send_check(skb); + rcu_read_unlock(); + +out: + return err; +} + static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EINVAL); @@ -1162,6 +1196,7 @@ static struct net_protocol igmp_protocol = { static struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, + .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .no_policy = 1, }; @@ -1208,6 +1243,7 @@ static int ipv4_proc_init(void); static struct packet_type ip_packet_type = { .type = __constant_htons(ETH_P_IP), .func = ip_rcv, + .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a886e6efbbe..a891133f00e4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -496,6 +496,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) } } +int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = skb->nh.iph; + th = skb->h.th; + + th->check = 0; + th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); + skb->csum = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_HW; + return 0; +} + /* * This routine will send an RST to the other tcp. * diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0c17dec11c8d..43327264e69c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -57,29 +57,11 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, + int proto) { - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; - int proto; + struct inet6_protocol *ops = NULL; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - 0))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = skb->nh.ipv6h; - proto = ipv6h->nexthdr; - __skb_pull(skb, sizeof(*ipv6h)); - - rcu_read_lock(); for (;;) { struct ipv6_opt_hdr *opth; int len; @@ -88,30 +70,80 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) ops = rcu_dereference(inet6_protos[proto]); if (unlikely(!ops)) - goto unlock; + break; if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) break; } if (unlikely(!pskb_may_pull(skb, 8))) - goto unlock; + break; opth = (void *)skb->data; len = opth->hdrlen * 8 + 8; if (unlikely(!pskb_may_pull(skb, len))) - goto unlock; + break; proto = opth->nexthdr; __skb_pull(skb, len); } - skb->h.raw = skb->data; - if (likely(ops->gso_segment)) - segs = ops->gso_segment(skb, features); + return ops; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; -unlock: + ipv6h = skb->nh.ipv6h; + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_send_check)) { + skb->h.raw = skb->data; + err = ops->gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = skb->nh.ipv6h; + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_segment)) { + skb->h.raw = skb->data; + segs = ops->gso_segment(skb, features); + } rcu_read_unlock(); if (unlikely(IS_ERR(segs))) @@ -130,6 +162,7 @@ out: static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), .func = ipv6_rcv, + .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5bdcb9002cf7..923989d0520d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -552,6 +552,24 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) } } +static int tcp_v6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + ipv6h = skb->nh.ipv6h; + th = skb->h.th; + + th->check = 0; + th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, + IPPROTO_TCP, 0); + skb->csum = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_HW; + return 0; +} static void tcp_v6_send_reset(struct sk_buff *skb) { @@ -1603,6 +1621,7 @@ struct proto tcpv6_prot = { static struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, + .gso_send_check = tcp_v6_gso_send_check, .gso_segment = tcp_tso_segment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -- cgit v1.2.3 From 53c4b2cc7a05c034fd21d104d2ab43ea8cc0e075 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 21 Jul 2006 14:55:38 -0700 Subject: [NET]: Fix reversed error test in netif_tx_trylock A non-zero return value indicates success from spin_trylock, not error. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76cc099c8580..75f02d8c6ed3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -924,10 +924,10 @@ static inline void netif_tx_lock_bh(struct net_device *dev) static inline int netif_tx_trylock(struct net_device *dev) { - int err = spin_trylock(&dev->_xmit_lock); - if (!err) + int ok = spin_trylock(&dev->_xmit_lock); + if (likely(ok)) dev->xmit_lock_owner = smp_processor_id(); - return err; + return ok; } static inline void netif_tx_unlock(struct net_device *dev) -- cgit v1.2.3 From 7ea49ed73c8d0d0bdf7c11fc18c61572d2d22176 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 14 Aug 2006 17:08:36 -0700 Subject: [VLAN]: Make sure bonding packet drop checks get done in hwaccel RX path. Since __vlan_hwaccel_rx() is essentially bypassing the netif_receive_skb() call that would have occurred if we did the VLAN decapsulation in software, we are missing the skb_bond() call and the assosciated checks it does. Export those checks via an inline function, skb_bond_should_drop(), and use this in __vlan_hwaccel_rx(). Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 +++++ include/linux/netdevice.h | 24 ++++++++++++++++++++++++ net/core/dev.c | 18 +----------------- 3 files changed, 30 insertions(+), 17 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 383627ad328f..ab2740832742 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -155,6 +155,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, { struct net_device_stats *stats; + if (skb_bond_should_drop(skb)) { + dev_kfree_skb_any(skb); + return NET_RX_DROP; + } + skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { dev_kfree_skb_any(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 75f02d8c6ed3..c0c2b46face1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1012,6 +1012,30 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) unlikely(skb->ip_summed != CHECKSUM_HW)); } +/* On bonding slaves other than the currently active slave, suppress + * duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast. + */ +static inline int skb_bond_should_drop(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct net_device *master = dev->master; + + if (master && + (dev->priv_flags & IFF_SLAVE_INACTIVE)) { + if (master->priv_flags & IFF_MASTER_ALB) { + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + return 0; + } + if (master->priv_flags & IFF_MASTER_8023AD && + skb->protocol == __constant_htons(ETH_P_SLOW)) + return 0; + + return 1; + } + return 0; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index d95e2626d944..9fe96cde3e19 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1619,26 +1619,10 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) struct net_device *dev = skb->dev; if (dev->master) { - /* - * On bonding slaves other than the currently active - * slave, suppress duplicates except for 802.3ad - * ETH_P_SLOW and alb non-mcast/bcast. - */ - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if (dev->master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - goto keep; - } - - if (dev->master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - goto keep; - + if (skb_bond_should_drop(skb)) { kfree_skb(skb); return NULL; } -keep: skb->dev = dev->master; } -- cgit v1.2.3 From 78eb887733ec8ff5d6e6c69e3c32a187a9303622 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Aug 2006 18:22:32 -0700 Subject: [BRIDGE]: Disable SG/GSO if TX checksum is off When the bridge recomputes features, it does not maintain the constraint that SG/GSO must be off if TX checksum is off. This patch adds that constraint. On a completely unrelated note, I've also added TSO6 and TSO_ECN feature bits if GSO is enabled on the underlying device through the new NETIF_F_GSO_SOFTWARE macro. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/bridge/br_if.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c0c2b46face1..50a4719512ed 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -320,6 +320,9 @@ struct net_device #define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT) #define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) + /* List of features with software fallbacks. */ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) + #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f55ef682ef84..b1211d5342f6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,12 +386,17 @@ void br_features_recompute(struct net_bridge *br) checksum = 0; if (feature & NETIF_F_GSO) - feature |= NETIF_F_TSO; + feature |= NETIF_F_GSO_SOFTWARE; feature |= NETIF_F_GSO; features &= feature; } + if (!(checksum & NETIF_F_ALL_CSUM)) + features &= ~NETIF_F_SG; + if (!(features & NETIF_F_SG)) + features &= ~NETIF_F_GSO_MASK; + br->dev->features = features | checksum | NETIF_F_LLTX | NETIF_F_GSO_ROBUST; } -- cgit v1.2.3