From 5699b3431e0b14736867484b8669ead2d40f575e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:01 -0700 Subject: tunnels: Don't apply GRO to multiple layers of encapsulation. commit fac8e0f579695a3ecbc4d3cac369139d7f819971 upstream. When drivers express support for TSO of encapsulated packets, they only mean that they can do it for one layer of encapsulation. Supporting additional levels would mean updating, at a minimum, more IP length fields and they are unaware of this. No encapsulation device expresses support for handling offloaded encapsulated packets, so we won't generate these types of frames in the transmit path. However, GRO doesn't have a check for multiple levels of encapsulation and will attempt to build them. UDP tunnel GRO actually does prevent this situation but it only handles multiple UDP tunnels stacked on top of each other. This generalizes that solution to prevent any kind of tunnel stacking that would cause problems. Fixes: bf5a755f ("net-gre-gro: Add GRE support to the GRO stack") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller Signed-off-by: Juerg Haefliger Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index de4ed2b5a221..0989fea88c44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4239,7 +4239,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ -- cgit v1.2.3 From 63091b2c1deae0a7f1347a74097de8ccc9db3359 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Mon, 3 Oct 2016 13:43:02 -0600 Subject: net: Add netdev all_adj_list refcnt propagation to fix panic [ Upstream commit 93409033ae653f1c9a949202fb537ab095b2092f ] This is a respin of a patch to fix a relatively easily reproducible kernel panic related to the all_adj_list handling for netdevs in recent kernels. The following sequence of commands will reproduce the issue: ip link add link eth0 name eth0.100 type vlan id 100 ip link add link eth0 name eth0.200 type vlan id 200 ip link add name testbr type bridge ip link set eth0.100 master testbr ip link set eth0.200 master testbr ip link add link testbr mac0 type macvlan ip link delete dev testbr This creates an upper/lower tree of (excuse the poor ASCII art): /---eth0.100-eth0 mac0-testbr- \---eth0.200-eth0 When testbr is deleted, the all_adj_lists are walked, and eth0 is deleted twice from the mac0 list. Unfortunately, during setup in __netdev_upper_dev_link, only one reference to eth0 is added, so this results in a panic. This change adds reference count propagation so things are handled properly. Matthias Schiffer reported a similar crash in batman-adv: https://github.com/freifunk-gluon/gluon/issues/680 https://www.open-mesh.org/issues/247 which this patch also seems to resolve. Signed-off-by: Andrew Collins Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 68 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 31 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0989fea88c44..5d9ec0458998 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5204,6 +5204,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5213,7 +5214,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -5223,7 +5224,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -5262,6 +5263,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -5274,10 +5276,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5296,21 +5298,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5318,9 +5321,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5328,17 +5332,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5347,17 +5353,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5367,8 +5373,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5420,7 +5426,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5430,7 +5436,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5439,7 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5453,7 +5459,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5463,7 +5469,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5475,7 +5481,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5559,16 +5565,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); -- cgit v1.2.3 From 3cb00b90e8b1bd59382f5e1304dd751f9674f027 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 20 Oct 2016 15:58:02 +0200 Subject: net: add recursion limit to GRO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fcd91dd449867c6bfe56a81cabba76b829fd05cd ] Currently, GRO can do unlimited recursion through the gro_receive handlers. This was fixed for tunneling protocols by limiting tunnel GRO to one level with encap_mark, but both VLAN and TEB still have this problem. Thus, the kernel is vulnerable to a stack overflow, if we receive a packet composed entirely of VLAN headers. This patch adds a recursion counter to the GRO layer to prevent stack overflow. When a gro_receive function hits the recursion limit, GRO is aborted for this skb and it is processed normally. This recursion counter is put in the GRO CB, but could be turned into a percpu counter if we run out of space in the CB. Thanks to Vladimír Beneš for the initial bug report. Fixes: CVE-2016-7039 Fixes: 9b174d88c257 ("net: Add Transparent Ethernet Bridging GRO support.") Fixes: 66e5133f19e9 ("vlan: Add GRO support for non hardware accelerated vlan") Signed-off-by: Sabrina Dubroca Reviewed-by: Jiri Benc Acked-by: Hannes Frederic Sowa Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/linux/netdevice.h | 40 +++++++++++++++++++++++++++++++++++++++- net/8021q/vlan.c | 2 +- net/core/dev.c | 1 + net/ethernet/eth.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/fou.c | 4 ++-- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 4 ++-- net/ipv6/ip6_offload.c | 2 +- 11 files changed, 51 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 69e31e2a68fc..4827c6987ac3 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -440,7 +440,7 @@ static struct sk_buff **geneve_gro_receive(struct sk_buff **head, skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 003780901628..6fa8e165878e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -593,7 +593,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, } } - pp = eth_gro_receive(head, skb); + pp = call_gro_receive(eth_gro_receive, head, skb); out: skb_gro_remcsum_cleanup(skb, &grc); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12b4d54a8ffa..9d6025703f73 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2003,7 +2003,10 @@ struct napi_gro_cb { /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; - /* 7 bit hole */ + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* 3 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2014,6 +2017,25 @@ struct napi_gro_cb { #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); +static inline struct sk_buff **call_gro_receive(gro_receive_t cb, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ @@ -2059,6 +2081,22 @@ struct udp_offload { struct udp_offload_callbacks callbacks; }; +typedef struct sk_buff **(*gro_receive_udp_t)(struct sk_buff **, + struct sk_buff *, + struct udp_offload *); +static inline struct sk_buff **call_gro_receive_udp(gro_receive_udp_t cb, + struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb, uoff); +} + /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index d2cd9de4b724..ad8d6e6b87ca 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -659,7 +659,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index 5d9ec0458998..d200a7ccbde6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4240,6 +4240,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9e63f252a89e..de85d4e1cf43 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -436,7 +436,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1a5c1ca3ad3c..afc18e9ca94a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1372,7 +1372,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 08d7de55e57e..08d8ee124538 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -201,7 +201,7 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, if (!ops || !ops->callbacks.gro_receive) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -360,7 +360,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e603004c1af8..79ae0d7becbf 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -219,7 +219,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ skb_gro_postpull_rcsum(skb, greh, grehlen); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0e36e56dfd22..6396f1c80ae9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -339,8 +339,8 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb, - uo_priv->offload); + pp = call_gro_receive_udp(uo_priv->offload->callbacks.gro_receive, + head, skb, uo_priv->offload); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 82e9f3076028..efe6268b8bc3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -247,7 +247,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); -- cgit v1.2.3 From d21daf7f3ee8da964596c4b62da190756a239d1a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 26 Oct 2016 11:23:07 -0400 Subject: packet: on direct_xmit, limit tso and csum to supported devices [ Upstream commit 104ba78c98808ae837d1f63aae58c183db5505df ] When transmitting on a packet socket with PACKET_VNET_HDR and PACKET_QDISC_BYPASS, validate device support for features requested in vnet_hdr. Drop TSO packets sent to devices that do not support TSO or have the feature disabled. Note that the latter currently do process those packets correctly, regardless of not advertising the feature. Because of SKB_GSO_DODGY, it is not sufficient to test device features with netif_needs_gso. Full validate_xmit_skb is needed. Switch to software checksum for non-TSO packets that request checksum offload if that device feature is unsupported or disabled. Note that similar to the TSO case, device drivers may perform checksum offload correctly even when not advertising it. When switching to software checksum, packets hit skb_checksum_help, which has two BUG_ON checksum not in linear segment. Packet sockets always allocate at least up to csum_start + csum_off + 2 as linear. Tested by running github.com/wdebruij/kerneltools/psock_txring_vnet.c ethtool -K eth0 tso off tx on psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v -N ethtool -K eth0 tx off psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G -N v2: - add EXPORT_SYMBOL_GPL(validate_xmit_skb_list) Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 1 + net/packet/af_packet.c | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d200a7ccbde6..b3fa4b86ab4c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2836,6 +2836,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea1115602f58..34e4fcfd240b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -249,7 +249,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -257,9 +257,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -279,7 +278,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } -- cgit v1.2.3