From 2d45a02d0166caf2627fe91897c6ffc3b19514c4 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 12 Jun 2015 10:16:41 -0300
Subject: sctp: fix ASCONF list handling

->auto_asconf_splist is per namespace and mangled by functions like
sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization.

Also, the call to inet_sk_copy_descendant() was backuping
->auto_asconf_list through the copy but was not honoring
->do_auto_asconf, which could lead to list corruption if it was
different between both sockets.

This commit thus fixes the list handling by using ->addr_wq_lock
spinlock to protect the list. A special handling is done upon socket
creation and destruction for that. Error handlig on sctp_init_sock()
will never return an error after having initialized asconf, so
sctp_destroy_sock() can be called without addrq_wq_lock. The lock now
will be take on sctp_close_sock(), before locking the socket, so we
don't do it in inverse order compared to sctp_addr_wq_timeout_handler().

Instead of taking the lock on sctp_sock_migrate() for copying and
restoring the list values, it's preferred to avoid rewritting it by
implementing sctp_copy_descendant().

Issue was found with a test application that kept flipping sysctl
default_auto_asconf on and off, but one could trigger it by issuing
simultaneous setsockopt() calls on multiple sockets or by
creating/destroying sockets fast enough. This is only triggerable
locally.

Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).")
Reported-by: Ji Jianwen <jiji@redhat.com>
Suggested-by: Neil Horman <nhorman@tuxdriver.com>
Suggested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f09de7fac2e6..5f6c4e61325b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1528,8 +1528,10 @@ static void sctp_close(struct sock *sk, long timeout)
 
 	/* Supposedly, no process has access to the socket, but
 	 * the net layers still may.
+	 * Also, sctp_destroy_sock() needs to be called with addr_wq_lock
+	 * held and that should be grabbed before socket lock.
 	 */
-	local_bh_disable();
+	spin_lock_bh(&net->sctp.addr_wq_lock);
 	bh_lock_sock(sk);
 
 	/* Hold the sock, since sk_common_release() will put sock_put()
@@ -1539,7 +1541,7 @@ static void sctp_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 
 	bh_unlock_sock(sk);
-	local_bh_enable();
+	spin_unlock_bh(&net->sctp.addr_wq_lock);
 
 	sock_put(sk);
 
@@ -3580,6 +3582,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
 	if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf))
 		return 0;
 
+	spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock);
 	if (val == 0 && sp->do_auto_asconf) {
 		list_del(&sp->auto_asconf_list);
 		sp->do_auto_asconf = 0;
@@ -3588,6 +3591,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
 		    &sock_net(sk)->sctp.auto_asconf_splist);
 		sp->do_auto_asconf = 1;
 	}
+	spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock);
 	return 0;
 }
 
@@ -4121,18 +4125,28 @@ static int sctp_init_sock(struct sock *sk)
 	local_bh_disable();
 	percpu_counter_inc(&sctp_sockets_allocated);
 	sock_prot_inuse_add(net, sk->sk_prot, 1);
+
+	/* Nothing can fail after this block, otherwise
+	 * sctp_destroy_sock() will be called without addr_wq_lock held
+	 */
 	if (net->sctp.default_auto_asconf) {
+		spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
 		list_add_tail(&sp->auto_asconf_list,
 		    &net->sctp.auto_asconf_splist);
 		sp->do_auto_asconf = 1;
-	} else
+		spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
+	} else {
 		sp->do_auto_asconf = 0;
+	}
+
 	local_bh_enable();
 
 	return 0;
 }
 
-/* Cleanup any SCTP per socket resources.  */
+/* Cleanup any SCTP per socket resources. Must be called with
+ * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true
+ */
 static void sctp_destroy_sock(struct sock *sk)
 {
 	struct sctp_sock *sp;
@@ -7195,6 +7209,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newinet->mc_list = NULL;
 }
 
+static inline void sctp_copy_descendant(struct sock *sk_to,
+					const struct sock *sk_from)
+{
+	int ancestor_size = sizeof(struct inet_sock) +
+			    sizeof(struct sctp_sock) -
+			    offsetof(struct sctp_sock, auto_asconf_list);
+
+	if (sk_from->sk_family == PF_INET6)
+		ancestor_size += sizeof(struct ipv6_pinfo);
+
+	__inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
+}
+
 /* Populate the fields of the newsk from the oldsk and migrate the assoc
  * and its messages to the newsk.
  */
@@ -7209,7 +7236,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	struct sk_buff *skb, *tmp;
 	struct sctp_ulpevent *event;
 	struct sctp_bind_hashbucket *head;
-	struct list_head tmplist;
 
 	/* Migrate socket buffer sizes and all the socket level options to the
 	 * new socket.
@@ -7217,12 +7243,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	newsk->sk_sndbuf = oldsk->sk_sndbuf;
 	newsk->sk_rcvbuf = oldsk->sk_rcvbuf;
 	/* Brute force copy old sctp opt. */
-	if (oldsp->do_auto_asconf) {
-		memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist));
-		inet_sk_copy_descendant(newsk, oldsk);
-		memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist));
-	} else
-		inet_sk_copy_descendant(newsk, oldsk);
+	sctp_copy_descendant(newsk, oldsk);
 
 	/* Restore the ep value that was overwritten with the above structure
 	 * copy.
-- 
cgit v1.2.3


From 2dab80a8b486f02222a69daca6859519e05781d9 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Mon, 15 Jun 2015 20:28:51 +0300
Subject: bridge: fix br_stp_set_bridge_priority race conditions

After the ->set() spinlocks were removed br_stp_set_bridge_priority
was left running without any protection when used via sysfs. It can
race with port add/del and could result in use-after-free cases and
corrupted lists. Tested by running port add/del in a loop with stp
enabled while setting priority in a loop, crashes are easily
reproducible.
The spinlocks around sysfs ->set() were removed in commit:
14f98f258f19 ("bridge: range check STP parameters")
There's also a race condition in the netlink priority support that is
fixed by this change, but it was introduced recently and the fixes tag
covers it, just in case it's needed the commit is:
af615762e972 ("bridge: add ageing_time, stp_state, priority over netlink")

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Fixes: 14f98f258f19 ("bridge: range check STP parameters")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_ioctl.c  | 2 --
 net/bridge/br_stp_if.c | 4 +++-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index a9a4a1b7863d..8d423bc649b9 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 
-		spin_lock_bh(&br->lock);
 		br_stp_set_bridge_priority(br, args[1]);
-		spin_unlock_bh(&br->lock);
 		return 0;
 
 	case BRCTL_SET_PORT_PRIORITY:
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 41146872c1b4..7832d07f48f6 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -243,12 +243,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
 	return true;
 }
 
-/* called under bridge lock */
+/* Acquires and releases bridge lock */
 void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
 {
 	struct net_bridge_port *p;
 	int wasroot;
 
+	spin_lock_bh(&br->lock);
 	wasroot = br_is_root_bridge(br);
 
 	list_for_each_entry(p, &br->port_list, list) {
@@ -266,6 +267,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
 	br_port_state_selection(br);
 	if (br_is_root_bridge(br) && !wasroot)
 		br_become_root_bridge(br);
+	spin_unlock_bh(&br->lock);
 }
 
 /* called under bridge lock */
-- 
cgit v1.2.3


From f98f4514d07871da7a113dd9e3e330743fd70ae4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Jun 2015 07:59:11 -0700
Subject: packet: read num_members once in packet_rcv_fanout()

We need to tell compiler it must not read f->num_members multiple
times. Otherwise testing if num is not zero is flaky, and we could
attempt an invalid divide by 0 in fanout_demux_cpu()

Note bug was present in packet_rcv_fanout_hash() and
packet_rcv_fanout_lb() but final 3.1 had a simple location
after commit 95ec3eb417115fb ("packet: Add 'cpu' fanout policy.")

Fixes: dc99f600698dc ("packet: Add fanout support.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5989c6ee551..131545a06f05 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1353,7 +1353,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 			     struct packet_type *pt, struct net_device *orig_dev)
 {
 	struct packet_fanout *f = pt->af_packet_priv;
-	unsigned int num = f->num_members;
+	unsigned int num = READ_ONCE(f->num_members);
 	struct packet_sock *po;
 	unsigned int idx;
 
-- 
cgit v1.2.3


From 2c51a97f76d20ebf1f50fef908b986cb051fdff9 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 16 Jun 2015 22:56:39 +0300
Subject: neigh: do not modify unlinked entries

The lockless lookups can return entry that is unlinked.
Sometimes they get reference before last neigh_cleanup_and_release,
sometimes they do not need reference. Later, any
modification attempts may result in the following problems:

1. entry is not destroyed immediately because neigh_update
can start the timer for dead entry, eg. on change to NUD_REACHABLE
state. As result, entry lives for some time but is invisible
and out of control.

2. __neigh_event_send can run in parallel with neigh_destroy
while refcnt=0 but if timer is started and expired refcnt can
reach 0 for second time leading to second neigh_destroy and
possible crash.

Thanks to Eric Dumazet and Ying Xue for their work and analyze
on the __neigh_event_send change.

Fixes: 767e97e1e0db ("neigh: RCU conversion of struct neighbour")
Fixes: a263b3093641 ("ipv4: Make neigh lookups directly in output packet path.")
Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().")
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3de654256028..2237c1b3cdd2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -957,6 +957,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	rc = 0;
 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
 		goto out_unlock_bh;
+	if (neigh->dead)
+		goto out_dead;
 
 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
@@ -1013,6 +1015,13 @@ out_unlock_bh:
 		write_unlock(&neigh->lock);
 	local_bh_enable();
 	return rc;
+
+out_dead:
+	if (neigh->nud_state & NUD_STALE)
+		goto out_unlock_bh;
+	write_unlock_bh(&neigh->lock);
+	kfree_skb(skb);
+	return 1;
 }
 EXPORT_SYMBOL(__neigh_event_send);
 
@@ -1076,6 +1085,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
 	    (old & (NUD_NOARP | NUD_PERMANENT)))
 		goto out;
+	if (neigh->dead)
+		goto out;
 
 	if (!(new & NUD_VALID)) {
 		neigh_del_timer(neigh);
@@ -1225,6 +1236,8 @@ EXPORT_SYMBOL(neigh_update);
  */
 void __neigh_set_probe_once(struct neighbour *neigh)
 {
+	if (neigh->dead)
+		return;
 	neigh->updated = jiffies;
 	if (!(neigh->nud_state & NUD_FAILED))
 		return;
-- 
cgit v1.2.3


From 36c01245eb8046c16eee6431e7dbfbb302635fa8 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Sun, 21 Jun 2015 18:50:44 +0200
Subject: can: fix loss of CAN frames in raw_rcv

As reported by Manfred Schlaegl here

   http://marc.info/?l=linux-netdev&m=143482089824232&w=2

commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for
overlapping CAN filters" requires the skb->tstamp to be set to check for
identical CAN skbs.

As net timestamping is influenced by several players (netstamp_needed and
netdev_tstamp_prequeue) Manfred missed a proper timestamp which leads to
CAN frame loss.

As skb timestamping became now mandatory for CAN related skbs this patch
makes sure that received CAN skbs always have a proper timestamp set.
Maybe there's a better solution in the future but this patch fixes the
CAN frame loss so far.

Reported-by: Manfred Schlaegl <manfred.schlaegl@gmx.at>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 5 +++++
 drivers/net/can/slcan.c | 1 +
 drivers/net/can/vcan.c  | 3 +++
 net/can/af_can.c        | 6 +++++-
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index b0f69248cb71..e9b1810d319f 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -440,6 +440,9 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
 		struct can_frame *cf = (struct can_frame *)skb->data;
 		u8 dlc = cf->can_dlc;
 
+		if (!(skb->tstamp.tv64))
+			__net_timestamp(skb);
+
 		netif_rx(priv->echo_skb[idx]);
 		priv->echo_skb[idx] = NULL;
 
@@ -575,6 +578,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 	if (unlikely(!skb))
 		return NULL;
 
+	__net_timestamp(skb);
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -603,6 +607,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 	if (unlikely(!skb))
 		return NULL;
 
+	__net_timestamp(skb);
 	skb->protocol = htons(ETH_P_CANFD);
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index c837eb91d43e..f64f5290d6f8 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -207,6 +207,7 @@ static void slc_bump(struct slcan *sl)
 	if (!skb)
 		return;
 
+	__net_timestamp(skb);
 	skb->dev = sl->dev;
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index 674f367087c5..0ce868de855d 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -78,6 +78,9 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev)
 	skb->dev       = dev;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+	if (!(skb->tstamp.tv64))
+		__net_timestamp(skb);
+
 	netif_rx_ni(skb);
 }
 
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 32d710eaf1fc..689c818ed007 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -310,8 +310,12 @@ int can_send(struct sk_buff *skb, int loop)
 		return err;
 	}
 
-	if (newskb)
+	if (newskb) {
+		if (!(newskb->tstamp.tv64))
+			__net_timestamp(newskb);
+
 		netif_rx_ni(newskb);
+	}
 
 	/* update statistics */
 	can_stats.tx_frames++;
-- 
cgit v1.2.3


From 51f458d9612177f69c2e2c437034ae15f93078e7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 17 Jun 2015 13:54:54 +0200
Subject: mac80211: fix locking in update_vlan_tailroom_need_count()

Unfortunately, Michal's change to fix AP_VLAN crypto tailroom
caused a locking issue that was reported by lockdep, but only
in a few cases - the issue was a classic ABBA deadlock caused
by taking the mtx after the key_mtx, where normally they're
taken the other way around.

As the key mutex protects the field in question (I'm adding a
few annotations to make that clear) only the iteration needs
to be protected, but we can also iterate the interface list
with just RCU protection while holding the key mutex.

Fixes: f9dca80b98ca ("mac80211: fix AP_VLAN crypto tailroom calculation")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/key.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a907f2d5c12d..81e9785f38bc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta)
 	if (sdata->vif.type != NL80211_IFTYPE_AP)
 		return;
 
-	mutex_lock(&sdata->local->mtx);
+	/* crypto_tx_tailroom_needed_cnt is protected by this */
+	assert_key_lock(sdata->local);
+
+	rcu_read_lock();
 
-	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+	list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list)
 		vlan->crypto_tx_tailroom_needed_cnt += delta;
 
-	mutex_unlock(&sdata->local->mtx);
+	rcu_read_unlock();
 }
 
 static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
@@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
 	 * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net
 	 */
 
+	assert_key_lock(sdata->local);
+
 	update_vlan_tailroom_need_count(sdata, 1);
 
 	if (!sdata->crypto_tx_tailroom_needed_cnt++) {
@@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
 static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
 					 int delta)
 {
+	assert_key_lock(sdata->local);
+
 	WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta);
 
 	update_vlan_tailroom_need_count(sdata, -delta);
-- 
cgit v1.2.3


From 468479e6043c84f5a65299cc07cb08a22a28c2b1 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 17 Jun 2015 15:59:34 -0400
Subject: packet: avoid out of bounds read in round robin fanout

PACKET_FANOUT_LB computes f->rr_cur such that it is modulo
f->num_members. It returns the old value unconditionally, but
f->num_members may have changed since the last store. Ensure
that the return value is always < num.

When modifying the logic, simplify it further by replacing the loop
with an unconditional atomic increment.

Fixes: dc99f600698d ("packet: Add fanout support.")
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 131545a06f05..fe1610ddeacf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1272,16 +1272,6 @@ static void packet_sock_destruct(struct sock *sk)
 	sk_refcnt_debug_dec(sk);
 }
 
-static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
-{
-	int x = atomic_read(&f->rr_cur) + 1;
-
-	if (x >= num)
-		x = 0;
-
-	return x;
-}
-
 static unsigned int fanout_demux_hash(struct packet_fanout *f,
 				      struct sk_buff *skb,
 				      unsigned int num)
@@ -1293,13 +1283,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f,
 				    struct sk_buff *skb,
 				    unsigned int num)
 {
-	int cur, old;
+	unsigned int val = atomic_inc_return(&f->rr_cur);
 
-	cur = atomic_read(&f->rr_cur);
-	while ((old = atomic_cmpxchg(&f->rr_cur, cur,
-				     fanout_rr_next(f, num))) != cur)
-		cur = old;
-	return cur;
+	return val % num;
 }
 
 static unsigned int fanout_demux_cpu(struct packet_fanout *f,
-- 
cgit v1.2.3


From dfea2aa654243f70dc53b8648d0bbdeec55a7df1 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <cpaasch@apple.com>
Date: Thu, 18 Jun 2015 09:15:34 -0700
Subject: tcp: Do not call tcp_fastopen_reset_cipher from interrupt context

tcp_fastopen_reset_cipher really cannot be called from interrupt
context. It allocates the tcp_fastopen_context with GFP_KERNEL and
calls crypto_alloc_cipher, which allocates all kind of stuff with
GFP_KERNEL.

Thus, we might sleep when the key-generation is triggered by an
incoming TFO cookie-request which would then happen in interrupt-
context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP:

[   36.001813] BUG: sleeping function called from invalid context at mm/slub.c:1266
[   36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill
[   36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14
[   36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
[   36.008250]  00000000000004f2 ffff88007f8838a8 ffffffff8171d53a ffff880075a084a8
[   36.009630]  ffff880075a08000 ffff88007f8838c8 ffffffff810967d3 ffff88007f883928
[   36.011076]  0000000000000000 ffff88007f8838f8 ffffffff81096892 ffff88007f89be00
[   36.012494] Call Trace:
[   36.012953]  <IRQ>  [<ffffffff8171d53a>] dump_stack+0x4f/0x6d
[   36.014085]  [<ffffffff810967d3>] ___might_sleep+0x103/0x170
[   36.015117]  [<ffffffff81096892>] __might_sleep+0x52/0x90
[   36.016117]  [<ffffffff8118e887>] kmem_cache_alloc_trace+0x47/0x190
[   36.017266]  [<ffffffff81680d82>] ? tcp_fastopen_reset_cipher+0x42/0x130
[   36.018485]  [<ffffffff81680d82>] tcp_fastopen_reset_cipher+0x42/0x130
[   36.019679]  [<ffffffff81680f01>] tcp_fastopen_init_key_once+0x61/0x70
[   36.020884]  [<ffffffff81680f2c>] __tcp_fastopen_cookie_gen+0x1c/0x60
[   36.022058]  [<ffffffff816814ff>] tcp_try_fastopen+0x58f/0x730
[   36.023118]  [<ffffffff81671788>] tcp_conn_request+0x3e8/0x7b0
[   36.024185]  [<ffffffff810e3872>] ? __module_text_address+0x12/0x60
[   36.025327]  [<ffffffff8167b2e1>] tcp_v4_conn_request+0x51/0x60
[   36.026410]  [<ffffffff816727e0>] tcp_rcv_state_process+0x190/0xda0
[   36.027556]  [<ffffffff81661f97>] ? __inet_lookup_established+0x47/0x170
[   36.028784]  [<ffffffff8167c2ad>] tcp_v4_do_rcv+0x16d/0x3d0
[   36.029832]  [<ffffffff812e6806>] ? security_sock_rcv_skb+0x16/0x20
[   36.030936]  [<ffffffff8167cc8a>] tcp_v4_rcv+0x77a/0x7b0
[   36.031875]  [<ffffffff816af8c3>] ? iptable_filter_hook+0x33/0x70
[   36.032953]  [<ffffffff81657d22>] ip_local_deliver_finish+0x92/0x1f0
[   36.034065]  [<ffffffff81657f1a>] ip_local_deliver+0x9a/0xb0
[   36.035069]  [<ffffffff81657c90>] ? ip_rcv+0x3d0/0x3d0
[   36.035963]  [<ffffffff81657569>] ip_rcv_finish+0x119/0x330
[   36.036950]  [<ffffffff81657ba7>] ip_rcv+0x2e7/0x3d0
[   36.037847]  [<ffffffff81610652>] __netif_receive_skb_core+0x552/0x930
[   36.038994]  [<ffffffff81610a57>] __netif_receive_skb+0x27/0x70
[   36.040033]  [<ffffffff81610b72>] process_backlog+0xd2/0x1f0
[   36.041025]  [<ffffffff81611482>] net_rx_action+0x122/0x310
[   36.042007]  [<ffffffff81076743>] __do_softirq+0x103/0x2f0
[   36.042978]  [<ffffffff81723e3c>] do_softirq_own_stack+0x1c/0x30

This patch moves the call to tcp_fastopen_init_key_once to the places
where a listener socket creates its TFO-state, which always happens in
user-context (either from the setsockopt, or implicitly during the
listen()-call)

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Fixes: 222e83d2e0ae ("tcp: switch tcp_fastopen key generation to net_get_random_once")
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c      | 2 ++
 net/ipv4/tcp.c          | 7 +++++--
 net/ipv4/tcp_fastopen.c | 2 --
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b47a4d79d04..a5aa54ea6533 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int backlog)
 				err = 0;
 			if (err)
 				goto out;
+
+			tcp_fastopen_init_key_once(true);
 		}
 		err = inet_csk_listen_start(sk, backlog);
 		if (err)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1377f2a0472..bb2ce74f6004 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2545,10 +2545,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 
 	case TCP_FASTOPEN:
 		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
-		    TCPF_LISTEN)))
+		    TCPF_LISTEN))) {
+			tcp_fastopen_init_key_once(true);
+
 			err = fastopen_init_queue(sk, val);
-		else
+		} else {
 			err = -EINVAL;
+		}
 		break;
 	case TCP_TIMESTAMP:
 		if (!tp->repair)
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 46b087a27503..f9c0fb84e435 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -78,8 +78,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
 	struct tcp_fastopen_context *ctx;
 	bool ok = false;
 
-	tcp_fastopen_init_key_once(true);
-
 	rcu_read_lock();
 	ctx = rcu_dereference(tcp_fastopen_ctx);
 	if (ctx) {
-- 
cgit v1.2.3


From d496f7842aada20c61e6044b3395383fa972872c Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 19 Jun 2015 00:46:53 +0200
Subject: NET: ROSE: Don't dereference NULL neighbour pointer.

A ROSE socket doesn't necessarily always have a neighbour pointer so check
if the neighbour pointer is valid before dereferencing it.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Tested-by: Bernard Pidoux <f6bvp@free.fr>
Cc: stable@vger.kernel.org #2.6.11+
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/af_rose.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8ae603069a1a..dd304bc40788 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -192,7 +192,8 @@ static void rose_kill_by_device(struct net_device *dev)
 
 		if (rose->device == dev) {
 			rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
-			rose->neighbour->use--;
+			if (rose->neighbour)
+				rose->neighbour->use--;
 			rose->device = NULL;
 		}
 	}
-- 
cgit v1.2.3


From 754bc547f0a79f7568b5b81c7fc0a8d044a6571a Mon Sep 17 00:00:00 2001
From: Satish Ashok <sashok@cumulusnetworks.com>
Date: Fri, 19 Jun 2015 01:22:57 -0700
Subject: bridge: multicast: restore router configuration on port link down/up

When a port goes through a link down/up the multicast router configuration
is not restored.

Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries")
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ff667e18b2d6..761fc733bf6d 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,6 +37,8 @@
 
 static void br_multicast_start_querier(struct net_bridge *br,
 				       struct bridge_mcast_own_query *query);
+static void br_multicast_add_router(struct net_bridge *br,
+				    struct net_bridge_port *port);
 unsigned int br_mdb_rehash_seq;
 
 static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -936,6 +938,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
 #if IS_ENABLED(CONFIG_IPV6)
 	br_multicast_enable(&port->ip6_own_query);
 #endif
+	if (port->multicast_router == 2 && hlist_unhashed(&port->rlist))
+		br_multicast_add_router(br, port);
 
 out:
 	spin_unlock(&br->multicast_lock);
-- 
cgit v1.2.3


From 34b99df4e6256ddafb663c6de0711dceceddfe0e Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 23 Jun 2015 08:34:39 +0300
Subject: ip: report the original address of ICMP messages

ICMP messages can trigger ICMP and local errors. In this case
serr->port is 0 and starting from Linux 4.0 we do not return
the original target address to the error queue readers.
Add function to define which errors provide addr_offset.
With this fix my ping command is not silent anymore.

Fixes: c247f0534cc5 ("ip: fix error queue empty skb handling")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 11 ++++++++++-
 net/ipv6/datagram.c    | 12 +++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7cfb0893f263..6ddde89996f4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -432,6 +432,15 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 		kfree_skb(skb);
 }
 
+/* For some errors we have valid addr_offset even with zero payload and
+ * zero port. Also, addr_offset should be supported if port is set.
+ */
+static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
+{
+	return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	       serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
+}
+
 /* IPv4 supports cmsg on all imcp errors and some timestamps
  *
  * Timestamp code paths do not initialize the fields expected by cmsg:
@@ -498,7 +507,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin && serr->port) {
+	if (sin && ipv4_datagram_support_addr(serr)) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 						   serr->addr_offset);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 762a58c772b8..62d908e64eeb 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	kfree_skb(skb);
 }
 
+/* For some errors we have valid addr_offset even with zero payload and
+ * zero port. Also, addr_offset should be supported if port is set.
+ */
+static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
+{
+	return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 ||
+	       serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	       serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
+}
+
 /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL.
  *
  * At one point, excluding local errors was a quick test to identify icmp/icmp6
@@ -389,7 +399,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin && serr->port) {
+	if (sin && ipv6_datagram_support_addr(serr)) {
 		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
-- 
cgit v1.2.3