From 4b9b1cdf83c4facba89e0646aeac8ead679851b8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 28 May 2014 18:03:48 +0200 Subject: net: fix wrong mac_len calculation for vlans After 1e785f48d29a ("net: Start with correct mac_len in skb_network_protocol") skb->mac_len is used as a start of the calculation in skb_network_protocol() but that is not always correct. If skb->protocol == 8021Q/AD, usually the vlan header is already inserted in the skb (i.e. vlan reorder hdr == 0). Usually when the packet enters dev_hard_xmit it has mac_len == 0 so we take 2 bytes from the destination mac address (skb->data + VLAN_HLEN) as a type in skb_network_protocol() and return vlan_depth == 4. In the case where TSO is off, then the mac_len is set but it's == 18 (ETH_HLEN + VLAN_HLEN), so skb_network_protocol() returns a type from inside the packet and offset == 22. Also make vlan_depth unsigned as suggested before. As suggested by Eric Dumazet, move the while() loop in the if() so we can avoid additional testing in fast path. Here are few netperf tests + debug printk's to illustrate: cat netperf.tso-on.reorder-on.bugged - Vlan -> device (reorder on, default, this case is okay) MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 7111.54 [ 81.605435] skb->len 65226 skb->gso_size 1448 skb->proto 0x800 skb->mac_len 0 vlan_depth 0 type 0x800 - Vlan -> device (reorder off, bad) cat netperf.tso-on.reorder-off.bugged MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 241.35 [ 204.578332] skb->len 1518 skb->gso_size 0 skb->proto 0x8100 skb->mac_len 0 vlan_depth 4 type 0x5301 0x5301 are the last two bytes of the destination mac. And if we stop TSO, we may get even the following: [ 83.343156] skb->len 2966 skb->gso_size 1448 skb->proto 0x8100 skb->mac_len 18 vlan_depth 22 type 0xb84 Because mac_len already accounts for VLAN_HLEN. After the fix: cat netperf.tso-on.reorder-off.fixed MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.01 5001.46 [ 81.888489] skb->len 65230 skb->gso_size 1448 skb->proto 0x8100 skb->mac_len 0 vlan_depth 18 type 0x800 CC: Vlad Yasevich CC: Eric Dumazet CC: Daniel Borkman CC: David S. Miller Fixes:1e785f48d29a ("net: Start with correct mac_len in skb_network_protocol") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 9abc503b19b7..fb8b0546485b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2283,8 +2283,8 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { + unsigned int vlan_depth = skb->mac_len; __be16 type = skb->protocol; - int vlan_depth = skb->mac_len; /* Tunnel gso handlers can set protocol to ethernet. */ if (type == htons(ETH_P_TEB)) { @@ -2297,15 +2297,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { - struct vlan_hdr *vh; - - if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return 0; - - vh = (struct vlan_hdr *)(skb->data + vlan_depth); - type = vh->h_vlan_encapsulated_proto; - vlan_depth += VLAN_HLEN; + /* if skb->protocol is 802.1Q/AD then the header should already be + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at + * ETH_HLEN otherwise + */ + if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (vlan_depth) { + if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN))) + return 0; + vlan_depth -= VLAN_HLEN; + } else { + vlan_depth = ETH_HLEN; + } + do { + struct vlan_hdr *vh; + + if (unlikely(!pskb_may_pull(skb, + vlan_depth + VLAN_HLEN))) + return 0; + + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; + } while (type == htons(ETH_P_8021Q) || + type == htons(ETH_P_8021AD)); } *depth = vlan_depth; -- cgit v1.2.3 From 418c96ac151a16a5094a95d14252c92c1d47ec67 Mon Sep 17 00:00:00 2001 From: Leon Yu Date: Sun, 1 Jun 2014 05:37:25 +0000 Subject: net: filter: fix possible memory leak in __sk_prepare_filter() __sk_prepare_filter() was reworked in commit bd4cf0ed3 (net: filter: rework/optimize internal BPF interpreter's instruction set) so that it should have uncharged memory once things went wrong. However that work isn't complete. Error is handled only in __sk_migrate_filter() while memory can still leak in the error path right after sk_chk_filter(). Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") Signed-off-by: Leon Yu Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 9d79ca0a6e8e..4aec7b93f1a9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1559,8 +1559,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, fp->jited = 0; err = sk_chk_filter(fp->insns, fp->len); - if (err) + if (err) { + if (sk != NULL) + sk_filter_uncharge(sk, fp); + else + kfree(fp); return ERR_PTR(err); + } /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. -- cgit v1.2.3 From e51fb152318ee6502a2d224771b0bbbbda046128 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 3 Jun 2014 16:40:47 -0700 Subject: rtnetlink: fix a memory leak when ->newlink fails It is possible that ->newlink() fails before registering the device, in this case we should just free it, it's safe to call free_netdev(). Fixes: commit 0e0eee2465df77bcec2 (net: correct error path in rtnl_newlink()) Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d8d8fcfa060..f4e9037f9a0c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2019,11 +2019,15 @@ replay: if (ops->newlink) { err = ops->newlink(net, dev, tb, data); /* Drivers should call free_netdev() in ->destructor - * and unregister it on failure so that device could be - * finally freed in rtnl_unlock. + * and unregister it on failure after registration + * so that device could be finally freed in rtnl_unlock. */ - if (err < 0) + if (err < 0) { + /* If device is not registered at all, free it now */ + if (dev->reg_state == NETREG_UNINITIALIZED) + free_netdev(dev); goto out; + } } else { err = register_netdevice(dev); if (err < 0) { -- cgit v1.2.3