From 640b2b107cec23c754214b62a811465fa8f9257f Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 2 Jun 2015 14:36:34 +0200
Subject: openvswitch: disable LRO

Currently, openvswitch tries to disable LRO from the user space. This does
not work correctly when the device added is a vlan interface, though.
Instead of dealing with possibly complex stacked cross name space relations
in the user space, do the same as bridging does and call dev_disable_lro in
the kernel.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-netdev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4776282c6417..33e6d6e2908f 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -125,6 +125,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
 	if (err)
 		goto error_master_upper_dev_unlink;
 
+	dev_disable_lro(netdev_vport->dev);
 	dev_set_promiscuity(netdev_vport->dev, 1);
 	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
 	rtnl_unlock();
-- 
cgit v1.2.3


From 6e540309326188f769e03bb4c6dd8ff6752930c2 Mon Sep 17 00:00:00 2001
From: Shawn Bohrer <sbohrer@rgmadvisors.com>
Date: Wed, 3 Jun 2015 16:27:38 -0500
Subject: ipv4/udp: Verify multicast group is ours in upd_v4_early_demux()

421b3885bf6d56391297844f43fb7154a6396e12 "udp: ipv4: Add udp early
demux" introduced a regression that allowed sockets bound to INADDR_ANY
to receive packets from multicast groups that the socket had not joined.
For example a socket that had joined 224.168.2.9 could also receive
packets from 225.168.2.9 despite not having joined that group if
ip_early_demux is enabled.

Fix this by calling ip_check_mc_rcu() in udp_v4_early_demux() to verify
that the multicast packet is indeed ours.

Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1c92ea67baef..83aa604f9273 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -90,6 +90,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -1960,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	struct sock *sk;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int ours;
 
 	/* validate the packet */
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
@@ -1969,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	uh = udp_hdr(skb);
 
 	if (skb->pkt_type == PACKET_BROADCAST ||
-	    skb->pkt_type == PACKET_MULTICAST)
+	    skb->pkt_type == PACKET_MULTICAST) {
+		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+
+		if (!in_dev)
+			return;
+
+		ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+				       iph->protocol);
+		if (!ours)
+			return;
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
 						   uh->source, iph->saddr, dif);
-	else if (skb->pkt_type == PACKET_HOST)
+	} else if (skb->pkt_type == PACKET_HOST) {
 		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
 					     uh->source, iph->saddr, dif);
-	else
+	} else {
 		return;
+	}
 
 	if (!sk)
 		return;
-- 
cgit v1.2.3


From 1d7c49037b12016e7056b9f2c990380e2187e766 Mon Sep 17 00:00:00 2001
From: Wilson Kok <wkok@cumulusnetworks.com>
Date: Fri, 5 Jun 2015 00:52:57 -0700
Subject: bridge: use _bh spinlock variant for br_fdb_update to avoid lockup

br_fdb_update() can be called in process context in the following way:
br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set)
so we need to use spin_lock_bh because there are softirq users of the
hash_lock. One easy way to reproduce this is to modify the bridge utility
to set NTF_USE, enable stp and then set maxageing to a low value so
br_fdb_cleanup() is called frequently and then just add new entries in
a loop. This happens because br_fdb_cleanup() is called from timer/softirq
context. These locks were _bh before commit f8ae737deea1
("[BRIDGE]: forwarding remove unneeded preempt and bh diasables")
and at the time that commit was correct because br_fdb_update() couldn't be
called from process context, but that changed after commit:
292d1398983f ("bridge: add NTF_USE support")

Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: 292d1398983f ("bridge: add NTF_USE support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e0670d7054f9..7eacc8ae9779 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
 	} else {
-		spin_lock(&br->hash_lock);
+		spin_lock_bh(&br->hash_lock);
 		if (likely(!fdb_find(head, addr, vid))) {
 			fdb = fdb_create(head, source, addr, vid);
 			if (fdb) {
@@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		/* else  we lose race and someone else inserts
 		 * it first, don't bother updating
 		 */
-		spin_unlock(&br->hash_lock);
+		spin_unlock_bh(&br->hash_lock);
 	}
 }
 
-- 
cgit v1.2.3


From 25cc8f0763c972911b1a65099cd10d9f8a45a7b0 Mon Sep 17 00:00:00 2001
From: Robert Shearman <rshearma@brocade.com>
Date: Fri, 5 Jun 2015 18:54:45 +0100
Subject: mpls: fix possible use after free of device

The mpls device is used in an RCU read context without a lock being
held. As the memory is freed without waiting for the RCU grace period
to elapse, the freed memory could still be in use.

Address this by using kfree_rcu to free the memory for the mpls device
after the RCU grace period has elapsed.

Fixes: 03c57747a702 ("mpls: Per-device MPLS state")
Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c  | 2 +-
 net/mpls/internal.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7b3f732269e4..bff427f31924 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -541,7 +541,7 @@ static void mpls_ifdown(struct net_device *dev)
 
 	RCU_INIT_POINTER(dev->mpls_ptr, NULL);
 
-	kfree(mdev);
+	kfree_rcu(mdev, rcu);
 }
 
 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index b064c345042c..8cabeb5a1cb9 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -16,6 +16,7 @@ struct mpls_dev {
 	int			input_enabled;
 
 	struct ctl_table_header *sysctl;
+	struct rcu_head		rcu;
 };
 
 struct sk_buff;
-- 
cgit v1.2.3


From 7ff46e79fb7df5b09c46c36857929fdf039f8b31 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 7 Jun 2015 19:43:47 -0700
Subject: Revert "bridge: use _bh spinlock variant for br_fdb_update to avoid
 lockup"

This reverts commit 1d7c49037b12016e7056b9f2c990380e2187e766.

Nikolay Aleksandrov has a better version of this fix.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 7eacc8ae9779..e0670d7054f9 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
 	} else {
-		spin_lock_bh(&br->hash_lock);
+		spin_lock(&br->hash_lock);
 		if (likely(!fdb_find(head, addr, vid))) {
 			fdb = fdb_create(head, source, addr, vid);
 			if (fdb) {
@@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		/* else  we lose race and someone else inserts
 		 * it first, don't bother updating
 		 */
-		spin_unlock_bh(&br->hash_lock);
+		spin_unlock(&br->hash_lock);
 	}
 }
 
-- 
cgit v1.2.3


From c4c832f89dc468cf11dc0dd17206bace44526651 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Sat, 6 Jun 2015 06:49:00 -0700
Subject: bridge: disable softirqs around br_fdb_update to avoid lockup

br_fdb_update() can be called in process context in the following way:
br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set)
so we need to disable softirqs because there are softirq users of the
hash_lock. One easy way to reproduce this is to modify the bridge utility
to set NTF_USE, enable stp and then set maxageing to a low value so
br_fdb_cleanup() is called frequently and then just add new entries in
a loop. This happens because br_fdb_cleanup() is called from timer/softirq
context. The spin locks in br_fdb_update were _bh before commit f8ae737deea1
("[BRIDGE]: forwarding remove unneeded preempt and bh diasables")
and at the time that commit was correct because br_fdb_update() couldn't be
called from process context, but that changed after commit:
292d1398983f ("bridge: add NTF_USE support")
Using local_bh_disable/enable around br_fdb_update() allows us to keep
using the spin_lock/unlock in br_fdb_update for the fast-path.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: 292d1398983f ("bridge: add NTF_USE support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e0670d7054f9..659fb96672e4 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -796,9 +796,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
 	int err = 0;
 
 	if (ndm->ndm_flags & NTF_USE) {
+		local_bh_disable();
 		rcu_read_lock();
 		br_fdb_update(p->br, p, addr, vid, true);
 		rcu_read_unlock();
+		local_bh_enable();
 	} else {
 		spin_lock_bh(&p->br->hash_lock);
 		err = fdb_add_entry(p, addr, ndm->ndm_state,
-- 
cgit v1.2.3


From 27e41fcfa6b326ad44eee7e0b1930d080b270895 Mon Sep 17 00:00:00 2001
From: Robert Shearman <rshearma@brocade.com>
Date: Fri, 5 Jun 2015 18:51:54 +0100
Subject: ipv6: fix possible use after free of dev stats

The memory pointed to by idev->stats.icmpv6msgdev,
idev->stats.icmpv6dev and idev->stats.ipv6 can each be used in an RCU
read context without taking a reference on idev. For example, through
IP6_*_STATS_* calls in ip6_rcv. These memory blocks are freed without
waiting for an RCU grace period to elapse. This could lead to the
memory being written to after it has been freed.

Fix this by using call_rcu to free the memory used for stats, as well
as idev after an RCU grace period has elapsed.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf_core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d873ceea86e6..ca09bf49ac68 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev)
 	free_percpu(idev->stats.ipv6);
 }
 
+static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+
+	snmp6_free_dev(idev);
+	kfree(idev);
+}
+
 /* Nobody refers to this device, we may destroy it. */
 
 void in6_dev_finish_destroy(struct inet6_dev *idev)
@@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 		pr_warn("Freeing alive inet6 device %p\n", idev);
 		return;
 	}
-	snmp6_free_dev(idev);
-	kfree_rcu(idev, rcu);
+	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 EXPORT_SYMBOL(in6_dev_finish_destroy);
-- 
cgit v1.2.3


From 0243508edd317ff1fa63b495643a7c192fbfcd92 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Mon, 8 Jun 2015 12:00:59 -0400
Subject: ipv6: Fix protocol resubmission

UDP encapsulation is broken on IPv6. This is because the logic to resubmit
the nexthdr is inverted, checking for a ret value > 0 instead of < 0. Also,
the resubmit label is in the wrong position since we already get the
nexthdr value when performing decapsulation. In addition the skb pull is no
longer necessary either.

This changes the return value check to look for < 0, using it for the
nexthdr on the next iteration, and moves the resubmit label to the proper
location.

With these changes the v6 code now matches what we do in the v4 ip input
code wrt resubmitting when decapsulating.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Acked-by: "Tom Herbert" <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_input.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f2e464eba5ef..41a73da371a9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
 	 */
 
 	rcu_read_lock();
-resubmit:
 	idev = ip6_dst_idev(skb_dst(skb));
 	if (!pskb_pull(skb, skb_transport_offset(skb)))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
 	nexthdr = skb_network_header(skb)[nhoff];
 
+resubmit:
 	raw = raw6_local_deliver(skb, nexthdr);
 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
 	if (ipprot) {
@@ -246,10 +246,12 @@ resubmit:
 			goto discard;
 
 		ret = ipprot->handler(skb);
-		if (ret > 0)
+		if (ret < 0) {
+			nexthdr = -ret;
 			goto resubmit;
-		else if (ret == 0)
+		} else if (ret == 0) {
 			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+		}
 	} else {
 		if (!raw) {
 			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-- 
cgit v1.2.3


From bbbf2df0039d31c6a0a9708ce4fe220a54bd5379 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 8 Jun 2015 11:53:08 -0400
Subject: net: replace last open coded skb_orphan_frags with function call

Commit 70008aa50e92 ("skbuff: convert to skb_orphan_frags") replaced
open coded tests of SKBTX_DEV_ZEROCOPY and skb_copy_ubufs with calls
to helper function skb_orphan_frags. Apply that to the last remaining
open coded site.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2c1c67fad64d..aa82f9ab6a36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
-			atomic_long_inc(&dev->rx_dropped);
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-	}
-
-	if (unlikely(!is_skb_forwardable(dev, skb))) {
+	if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+	    unlikely(!is_skb_forwardable(dev, skb))) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
 		return NET_RX_DROP;
-- 
cgit v1.2.3