From cd3bafc73d11eb51cb2d3691629718431e1768ce Mon Sep 17 00:00:00 2001
From: Hajime Tazaki <tazaki@sfc.wide.ad.jp>
Date: Wed, 4 Feb 2015 23:31:10 +0900
Subject: xfrm6: Fix a offset value for network header in _decode_session6

When a network-layer header has multiple IPv6 extension headers, then offset
for mobility header goes wrong. This regression breaks an xfrm policy lookup
for a particular receive packet. Binding update packets of Mobile IPv6
are all discarded without this fix.

Fixes: de3b7a06dfe1 ("xfrm6: Fix transport header offset in _decode_session6.")
Signed-off-by: Hajime Tazaki <tazaki@sfc.wide.ad.jp>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 48bf5a06847b..8d2d01b4800a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -200,6 +200,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 		case IPPROTO_MH:
+			offset += ipv6_optlen(exthdr);
 			if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
 
-- 
cgit v1.2.3


From dd3733b3e798daf778a1ec08557f388f00fdc2f6 Mon Sep 17 00:00:00 2001
From: Alexey Andriyanov <alan@al-an.info>
Date: Fri, 6 Feb 2015 22:32:20 +0300
Subject: ipvs: fix inability to remove a mixed-family RS

The current code prevents any operation with a mixed-family dest
unless IP_VS_CONN_F_TUNNEL flag is set. The problem is that it's impossible
for the client to follow this rule, because ip_vs_genl_parse_dest does
not even read the destination conn_flags when cmd = IPVS_CMD_DEL_DEST
(need_full_dest = 0).

Also, not every client can pass this flag when removing a dest. ipvsadm,
for example, does not support the "-i" command line option together with
the "-d" option.

This change disables any checks for mixed-family on IPVS_CMD_DEL_DEST command.

Signed-off-by: Alexey Andriyanov <alan@al-an.info>
Fixes: bc18d37f676f ("ipvs: Allow heterogeneous pools now that we support them")
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b8295a430a56..fdcda8be1f0f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3399,7 +3399,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 		if (udest.af == 0)
 			udest.af = svc->af;
 
-		if (udest.af != svc->af) {
+		if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
 			/* The synchronization protocol is incompatible
 			 * with mixed family services
 			 */
-- 
cgit v1.2.3


From 044a832a7779c0638bea2d0fea901c055b995f4a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 12 Jan 2015 13:38:49 +0100
Subject: xfrm: Fix local error reporting crash with interfamily tunnels

We set the outer mode protocol too early. As a result, the
local error handler might dispatch to the wrong address family
and report the error to a wrong socket type. We fix this by
setting the outer protocol to the skb after we accessed the
inner mode for the last time, right before we do the atcual
encapsulation where we switch finally to the outer mode.

Reported-by: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Tested-by: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_output.c | 2 +-
 net/ipv6/xfrm6_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d5f6bd9a210a..dab73813cb92 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 		return err;
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+	skb->protocol = htons(ETH_P_IP);
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
 int xfrm4_output_finish(struct sk_buff *skb)
 {
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-	skb->protocol = htons(ETH_P_IP);
 
 #ifdef CONFIG_NETFILTER
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index ca3f29b98ae5..010f8bd2d577 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -114,6 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 		return err;
 
 	skb->ignore_df = 1;
+	skb->protocol = htons(ETH_P_IPV6);
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -122,7 +123,6 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
 int xfrm6_output_finish(struct sk_buff *skb)
 {
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-	skb->protocol = htons(ETH_P_IPV6);
 
 #ifdef CONFIG_NETFILTER
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-- 
cgit v1.2.3


From ac37e2515c1a89c477459a2020b6bfdedabdb91b Mon Sep 17 00:00:00 2001
From: huaibin Wang <huaibin.wang@6wind.com>
Date: Wed, 11 Feb 2015 18:10:36 +0100
Subject: xfrm: release dst_orig in case of error in xfrm_lookup()

dst_orig should be released on error. Function like __xfrm_route_forward()
expects that behavior.
Since a recent commit, xfrm_lookup() may also be called by xfrm_lookup_route(),
which expects the opposite.
Let's introduce a new flag (XFRM_LOOKUP_KEEP_DST_REF) to tell what should be
done in case of error.

Fixes: f92ee61982d("xfrm: Generate blackhole routes only from route lookup functions")
Signed-off-by: huaibin Wang <huaibin.wang@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h      |  1 +
 net/xfrm/xfrm_policy.c | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index a8ae4e760778..0fb99a26e973 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -481,6 +481,7 @@ void dst_init(void);
 enum {
 	XFRM_LOOKUP_ICMP = 1 << 0,
 	XFRM_LOOKUP_QUEUE = 1 << 1,
+	XFRM_LOOKUP_KEEP_DST_REF = 1 << 2,
 };
 
 struct flowi;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cee479bc655c..638af0655aaf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2269,11 +2269,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		 * have the xfrm_state's. We need to wait for KM to
 		 * negotiate new SA's or bail out with error.*/
 		if (net->xfrm.sysctl_larval_drop) {
-			dst_release(dst);
-			xfrm_pols_put(pols, drop_pols);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-
-			return ERR_PTR(-EREMOTE);
+			err = -EREMOTE;
+			goto error;
 		}
 
 		err = -EAGAIN;
@@ -2324,7 +2322,8 @@ nopol:
 error:
 	dst_release(dst);
 dropdst:
-	dst_release(dst_orig);
+	if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
+		dst_release(dst_orig);
 	xfrm_pols_put(pols, drop_pols);
 	return ERR_PTR(err);
 }
@@ -2338,7 +2337,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    struct sock *sk, int flags)
 {
 	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
-					    flags | XFRM_LOOKUP_QUEUE);
+					    flags | XFRM_LOOKUP_QUEUE |
+					    XFRM_LOOKUP_KEEP_DST_REF);
 
 	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
 		return make_blackhole(net, dst_orig->ops->family, dst_orig);
-- 
cgit v1.2.3


From 520aa7414bb590f39d0d1591b06018e60cbc7cf4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 12 Feb 2015 22:15:31 +0100
Subject: netfilter: nft_compat: fix module refcount underflow

Feb 12 18:20:42 nfdev kernel: ------------[ cut here ]------------
Feb 12 18:20:42 nfdev kernel: WARNING: CPU: 4 PID: 4359 at kernel/module.c:963 module_put+0x9b/0xba()
Feb 12 18:20:42 nfdev kernel: CPU: 4 PID: 4359 Comm: ebtables-compat Tainted: G        W      3.19.0-rc6+ #43
[...]
Feb 12 18:20:42 nfdev kernel: Call Trace:
Feb 12 18:20:42 nfdev kernel: [<ffffffff815fd911>] dump_stack+0x4c/0x65
Feb 12 18:20:42 nfdev kernel: [<ffffffff8103e6f7>] warn_slowpath_common+0x9c/0xb6
Feb 12 18:20:42 nfdev kernel: [<ffffffff8109919f>] ? module_put+0x9b/0xba
Feb 12 18:20:42 nfdev kernel: [<ffffffff8103e726>] warn_slowpath_null+0x15/0x17
Feb 12 18:20:42 nfdev kernel: [<ffffffff8109919f>] module_put+0x9b/0xba
Feb 12 18:20:42 nfdev kernel: [<ffffffff813ecf7c>] nft_match_destroy+0x45/0x4c
Feb 12 18:20:42 nfdev kernel: [<ffffffff813e683f>] nf_tables_rule_destroy+0x28/0x70

Reported-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Tested-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
---
 net/netfilter/nft_compat.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 265e190f2218..b6364869c2e0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -578,8 +578,12 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 		struct xt_match *match = nft_match->ops.data;
 
 		if (strcmp(match->name, mt_name) == 0 &&
-		    match->revision == rev && match->family == family)
+		    match->revision == rev && match->family == family) {
+			if (!try_module_get(match->me))
+				return ERR_PTR(-ENOENT);
+
 			return &nft_match->ops;
+		}
 	}
 
 	match = xt_request_find_match(family, mt_name, rev);
@@ -648,8 +652,12 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 		struct xt_target *target = nft_target->ops.data;
 
 		if (strcmp(target->name, tg_name) == 0 &&
-		    target->revision == rev && target->family == family)
+		    target->revision == rev && target->family == family) {
+			if (!try_module_get(target->me))
+				return ERR_PTR(-ENOENT);
+
 			return &nft_target->ops;
+		}
 	}
 
 	target = xt_request_find_target(family, tg_name, rev);
-- 
cgit v1.2.3


From cef9ed86ed62eeffcd017882278bbece32001f86 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 13 Feb 2015 12:47:50 +0100
Subject: netfilter: xt_recent: don't reject rule if new hitcount exceeds table
 max

given:
-A INPUT -m recent --update --seconds 30 --hitcount 4
and
iptables-save > foo

then
iptables-restore < foo

will fail with:
kernel: xt_recent: hitcount (4) is larger than packets to be remembered (4) for table DEFAULT

Even when the check is fixed, the restore won't work if the hitcount is
increased to e.g. 6, since by the time checkentry runs it will find the
'old' incarnation of the table.

We can avoid this by increasing the maximum threshold silently; we only
have to rm all the current entries of the table (these entries would
not have enough room to handle the increased hitcount).

This even makes (not-very-useful)
-A INPUT -m recent --update --seconds 30 --hitcount 4
-A INPUT -m recent --update --seconds 30 --hitcount 42
work.

Fixes: abc86d0f99242b7f142b (netfilter: xt_recent: relax ip_pkt_list_tot restrictions)
Tracked-down-by: Chris Vine <chris@cvine.freeserve.co.uk>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_recent.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 30dbe34915ae..45e1b30e4fb2 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -378,12 +378,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 	mutex_lock(&recent_mutex);
 	t = recent_table_lookup(recent_net, info->name);
 	if (t != NULL) {
-		if (info->hit_count > t->nstamps_max_mask) {
-			pr_info("hitcount (%u) is larger than packets to be remembered (%u) for table %s\n",
-				info->hit_count, t->nstamps_max_mask + 1,
-				info->name);
-			ret = -EINVAL;
-			goto out;
+		if (nstamp_mask > t->nstamps_max_mask) {
+			spin_lock_bh(&recent_lock);
+			recent_table_flush(t);
+			t->nstamps_max_mask = nstamp_mask;
+			spin_unlock_bh(&recent_lock);
 		}
 
 		t->refcnt++;
-- 
cgit v1.2.3


From 78296c97ca1fd3b104f12e1f1fbc06c46635990b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 15 Feb 2015 19:03:45 -0800
Subject: netfilter: xt_socket: fix a stack corruption bug

As soon as extract_icmp6_fields() returns, its local storage (automatic
variables) is deallocated and can be overwritten.

Lets add an additional parameter to make sure storage is valid long
enough.

While we are at it, adds some const qualifiers.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: b64c9256a9b76 ("tproxy: added IPv6 support to the socket match")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_socket.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1ba67931eb1b..13332dbf291d 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -243,12 +243,13 @@ static int
 extract_icmp6_fields(const struct sk_buff *skb,
 		     unsigned int outside_hdrlen,
 		     int *protocol,
-		     struct in6_addr **raddr,
-		     struct in6_addr **laddr,
+		     const struct in6_addr **raddr,
+		     const struct in6_addr **laddr,
 		     __be16 *rport,
-		     __be16 *lport)
+		     __be16 *lport,
+		     struct ipv6hdr *ipv6_var)
 {
-	struct ipv6hdr *inside_iph, _inside_iph;
+	const struct ipv6hdr *inside_iph;
 	struct icmp6hdr *icmph, _icmph;
 	__be16 *ports, _ports[2];
 	u8 inside_nexthdr;
@@ -263,12 +264,14 @@ extract_icmp6_fields(const struct sk_buff *skb,
 	if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
 		return 1;
 
-	inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph);
+	inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
+					sizeof(*ipv6_var), ipv6_var);
 	if (inside_iph == NULL)
 		return 1;
 	inside_nexthdr = inside_iph->nexthdr;
 
-	inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph),
+	inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
+					      sizeof(*ipv6_var),
 					 &inside_nexthdr, &inside_fragoff);
 	if (inside_hdrlen < 0)
 		return 1; /* hjm: Packet has no/incomplete transport layer headers. */
@@ -315,10 +318,10 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
 static bool
 socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
 	struct sock *sk = skb->sk;
-	struct in6_addr *daddr = NULL, *saddr = NULL;
+	const struct in6_addr *daddr = NULL, *saddr = NULL;
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
 	int thoff = 0, uninitialized_var(tproto);
 	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
@@ -342,7 +345,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 
 	} else if (tproto == IPPROTO_ICMPV6) {
 		if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
-					 &sport, &dport))
+					 &sport, &dport, &ipv6_var))
 			return false;
 	} else {
 		return false;
-- 
cgit v1.2.3


From a1d1e9be5a1dafe0ddc2181a9201c2ae29c71eff Mon Sep 17 00:00:00 2001
From: David Ramos <daramos@stanford.edu>
Date: Fri, 13 Feb 2015 13:11:51 -0800
Subject: svcrpc: fix memory leak in gssp_accept_sec_context_upcall
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our UC-KLEE tool found a kernel memory leak of 512 bytes (on x86_64) for
each call to gssp_accept_sec_context_upcall()
(net/sunrpc/auth_gss/gss_rpc_upcall.c). Since it appears that this call
can be triggered by remote connections (at least, from a cursory a
glance at the call chain), it may be exploitable to cause kernel memory
exhaustion. We found the bug in kernel 3.16.3, but it appears to date
back to commit 9dfd87da1aeb0fd364167ad199f40fe96a6a87be (2013-08-20).

The gssp_accept_sec_context_upcall() function performs a pair of calls
to gssp_alloc_receive_pages() and gssp_free_receive_pages().  The first
allocates memory for arg->pages.  The second then frees the pages
pointed to by the arg->pages array, but not the array itself.

Reported-by: David A. Ramos <daramos@stanford.edu>
Fixes: 9dfd87da1aeb ("rpc: fix huge kmalloc's in gss-proxy”)
Signed-off-by: David A. Ramos <daramos@stanford.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index abbb7dcd1689..59eeed43eda2 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -217,6 +217,8 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
 
 	for (i = 0; i < arg->npages && arg->pages[i]; i++)
 		__free_page(arg->pages[i]);
+
+	kfree(arg->pages);
 }
 
 static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
-- 
cgit v1.2.3


From 1c4cff0cf55011792125b6041bc4e9713e46240f Mon Sep 17 00:00:00 2001
From: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
Date: Fri, 13 Feb 2015 14:47:05 -0800
Subject: gen_stats.c: Duplicate xstats buffer for later use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gnet_stats_copy_app() function gets called, more often than not, with its
second argument a pointer to an automatic variable in the caller's stack.
Therefore, to avoid copying garbage afterwards when calling
gnet_stats_finish_copy(), this data is better copied to a dynamically allocated
memory that gets freed after use.

[xiyou.wangcong@gmail.com: remove a useless kfree()]

Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_stats.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 0c08062d1796..1e2f46a69d50 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
 	return 0;
 
 nla_put_failure:
+	kfree(d->xstats);
+	d->xstats = NULL;
+	d->xstats_len = 0;
 	spin_unlock_bh(d->lock);
 	return -1;
 }
@@ -305,7 +308,9 @@ int
 gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
 {
 	if (d->compat_xstats) {
-		d->xstats = st;
+		d->xstats = kmemdup(st, len, GFP_ATOMIC);
+		if (!d->xstats)
+			goto err_out;
 		d->xstats_len = len;
 	}
 
@@ -313,6 +318,11 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
 		return gnet_stats_copy(d, TCA_STATS_APP, st, len);
 
 	return 0;
+
+err_out:
+	d->xstats_len = 0;
+	spin_unlock_bh(d->lock);
+	return -1;
 }
 EXPORT_SYMBOL(gnet_stats_copy_app);
 
@@ -345,6 +355,9 @@ gnet_stats_finish_copy(struct gnet_dump *d)
 			return -1;
 	}
 
+	kfree(d->xstats);
+	d->xstats = NULL;
+	d->xstats_len = 0;
 	spin_unlock_bh(d->lock);
 	return 0;
 }
-- 
cgit v1.2.3


From fba04a9e0c869498889b6445fd06cbe7da9bb834 Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Tue, 17 Feb 2015 13:33:46 +0300
Subject: ipv4: ip_check_defrag should correctly check return value of
 skb_copy_bits

skb_copy_bits() returns zero on success and negative value on error,
so it is needed to invert the condition in ip_check_defrag().

Fixes: 1bf3751ec90c ("ipv4: ip_check_defrag must not modify skb before unsharing")
Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e5b6d0ddcb58..2c8d98e728c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -664,7 +664,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
 	if (skb->protocol != htons(ETH_P_IP))
 		return skb;
 
-	if (!skb_copy_bits(skb, 0, &iph, sizeof(iph)))
+	if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0)
 		return skb;
 
 	if (iph.ihl < 5 || iph.version != 4)
-- 
cgit v1.2.3


From 34eea79e2664b314cab6a30fc582fdfa7a1bb1df Mon Sep 17 00:00:00 2001
From: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
Date: Tue, 17 Feb 2015 20:15:20 +0100
Subject: ematch: Fix auto-loading of ematch modules.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In tcf_em_validate(), after calling request_module() to load the
kind-specific module, set em->ops to NULL before returning -EAGAIN, so
that module_put() is not called again by tcf_em_tree_destroy().

Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
Acked-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/ematch.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 6742200b1307..fbb7ebfc58c6 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -228,6 +228,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
 				 * to replay the request.
 				 */
 				module_put(em->ops->owner);
+				em->ops = NULL;
 				err = -EAGAIN;
 			}
 #endif
-- 
cgit v1.2.3


From 7b4577a9da3702049650f7095506e9afd9f68849 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 17 Feb 2015 11:23:10 -0800
Subject: openvswitch: Fix net exit.

Open vSwitch allows moving internal vport to different namespace
while still connected to the bridge. But when namespace deleted
OVS does not detach these vports, that results in dangling
pointer to netdevice which causes kernel panic as follows.
This issue is fixed by detaching all ovs ports from the deleted
namespace at net-exit.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
IP: [<ffffffffa0aadaa5>] ovs_vport_locate+0x35/0x80 [openvswitch]
Oops: 0000 [#1] SMP
Call Trace:
 [<ffffffffa0aa6391>] lookup_vport+0x21/0xd0 [openvswitch]
 [<ffffffffa0aa65f9>] ovs_vport_cmd_get+0x59/0xf0 [openvswitch]
 [<ffffffff8167e07c>] genl_family_rcv_msg+0x1bc/0x3e0
 [<ffffffff8167e319>] genl_rcv_msg+0x79/0xc0
 [<ffffffff8167d919>] netlink_rcv_skb+0xb9/0xe0
 [<ffffffff8167deac>] genl_rcv+0x2c/0x40
 [<ffffffff8167cffd>] netlink_unicast+0x12d/0x1c0
 [<ffffffff8167d3da>] netlink_sendmsg+0x34a/0x6b0
 [<ffffffff8162e140>] sock_sendmsg+0xa0/0xe0
 [<ffffffff8162e5e8>] ___sys_sendmsg+0x408/0x420
 [<ffffffff8162f541>] __sys_sendmsg+0x51/0x90
 [<ffffffff8162f592>] SyS_sendmsg+0x12/0x20
 [<ffffffff81764ee9>] system_call_fastpath+0x12/0x17

Reported-by: Assaf Muller <amuller@redhat.com>
Fixes: 46df7b81454("openvswitch: Add support for network namespaces.")
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Reviewed-by: Thomas Graf <tgraf@noironetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 net/openvswitch/vport.h    |  2 ++
 2 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ae5e77cdc0ca..5bae7243c577 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2194,14 +2194,55 @@ static int __net_init ovs_init_net(struct net *net)
 	return 0;
 }
 
-static void __net_exit ovs_exit_net(struct net *net)
+static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
+					    struct list_head *head)
 {
-	struct datapath *dp, *dp_next;
 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+	struct datapath *dp;
+
+	list_for_each_entry(dp, &ovs_net->dps, list_node) {
+		int i;
+
+		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+			struct vport *vport;
+
+			hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
+				struct netdev_vport *netdev_vport;
+
+				if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
+					continue;
+
+				netdev_vport = netdev_vport_priv(vport);
+				if (dev_net(netdev_vport->dev) == dnet)
+					list_add(&vport->detach_list, head);
+			}
+		}
+	}
+}
+
+static void __net_exit ovs_exit_net(struct net *dnet)
+{
+	struct datapath *dp, *dp_next;
+	struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
+	struct vport *vport, *vport_next;
+	struct net *net;
+	LIST_HEAD(head);
 
 	ovs_lock();
 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
 		__dp_destroy(dp);
+
+	rtnl_lock();
+	for_each_net(net)
+		list_vports_from_net(net, dnet, &head);
+	rtnl_unlock();
+
+	/* Detach all vports from given namespace. */
+	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
+		list_del(&vport->detach_list);
+		ovs_dp_detach_port(vport);
+	}
+
 	ovs_unlock();
 
 	cancel_work_sync(&ovs_net->dp_notify_work);
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index f8ae295fb001..bc85331a6c60 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -103,6 +103,7 @@ struct vport_portids {
  * @ops: Class structure.
  * @percpu_stats: Points to per-CPU statistics used and maintained by vport
  * @err_stats: Points to error statistics used and maintained by vport
+ * @detach_list: list used for detaching vport in net-exit call.
  */
 struct vport {
 	struct rcu_head rcu;
@@ -117,6 +118,7 @@ struct vport {
 	struct pcpu_sw_netstats __percpu *percpu_stats;
 
 	struct vport_err_stats err_stats;
+	struct list_head detach_list;
 };
 
 /**
-- 
cgit v1.2.3


From 997d5c3f4427f38562cbe207ce05bb25fdcb993b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Feb 2015 05:47:55 -0800
Subject: sock: sock_dequeue_err_skb() needs hard irq safety

Non NAPI drivers can call skb_tstamp_tx() and then sock_queue_err_skb()
from hard IRQ context.

Therefore, sock_dequeue_err_skb() needs to block hard irq or
corruptions or hangs can happen.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 364a9e93243d1 ("sock: deduplicate errqueue dequeue")
Fixes: cb820f8e4b7f7 ("net: Provide a generic socket error queue delivery method for Tx time stamps.")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 88c613eab142..f80507823531 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3621,13 +3621,14 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 {
 	struct sk_buff_head *q = &sk->sk_error_queue;
 	struct sk_buff *skb, *skb_next;
+	unsigned long flags;
 	int err = 0;
 
-	spin_lock_bh(&q->lock);
+	spin_lock_irqsave(&q->lock, flags);
 	skb = __skb_dequeue(q);
 	if (skb && (skb_next = skb_peek(q)))
 		err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
-	spin_unlock_bh(&q->lock);
+	spin_unlock_irqrestore(&q->lock, flags);
 
 	sk->sk_err = err;
 	if (err)
-- 
cgit v1.2.3


From 65e9256c4ec569ed62bb89023daab7b72368b89f Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Fri, 20 Feb 2015 21:34:58 +0200
Subject: ethtool: Add hw-switch-offload to netdev_features_strings.

commit aafb3e98b279 (netdev: introduce new NETIF_F_HW_SWITCH_OFFLOAD feature
flag for switch device offloads) add a new feature without adding it to
netdev_features_strings array; this patch fixes this.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 91f74f3eb204..aa378ecef186 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_RXALL_BIT] =            "rx-all",
 	[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
 	[NETIF_F_BUSY_POLL_BIT] =        "busy-poll",
+	[NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload",
 };
 
 static const char
-- 
cgit v1.2.3


From 278f7b4fffce9ad267406cf8800df271d14f4a16 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 19 Feb 2015 12:13:13 +0300
Subject: caif: fix a signedness bug in cfpkt_iterate()

The cfpkt_iterate() function can return -EPROTO on error, but the
function is a u16 so the negative value gets truncated to a positive
unsigned short.  This causes a static checker warning.

The only caller which might care is cffrml_receive(), when it's checking
the frame checksum.  I modified cffrml_receive() so that it never says
-EPROTO is a valid checksum.

Also this isn't ever going to be inlined so I removed the "inline".

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfpkt.h | 2 +-
 net/caif/cffrml.c        | 2 +-
 net/caif/cfpkt_skbuff.c  | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
index 1c1ad46250d5..fe328c52c46b 100644
--- a/include/net/caif/cfpkt.h
+++ b/include/net/caif/cfpkt.h
@@ -171,7 +171,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos);
  * @return    Checksum of buffer.
  */
 
-u16 cfpkt_iterate(struct cfpkt *pkt,
+int cfpkt_iterate(struct cfpkt *pkt,
 		u16 (*iter_func)(u16 chks, void *buf, u16 len),
 		u16 data);
 
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 8bc7caa28e64..434ba8557826 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -84,7 +84,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
 	u16 tmp;
 	u16 len;
 	u16 hdrchks;
-	u16 pktchks;
+	int pktchks;
 	struct cffrml *this;
 	this = container_obj(layr);
 
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 1be0b521ac49..f6c3b2137eea 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -255,9 +255,9 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt)
 	return skb->len;
 }
 
-inline u16 cfpkt_iterate(struct cfpkt *pkt,
-			 u16 (*iter_func)(u16, void *, u16),
-			 u16 data)
+int cfpkt_iterate(struct cfpkt *pkt,
+		  u16 (*iter_func)(u16, void *, u16),
+		  u16 data)
 {
 	/*
 	 * Don't care about the performance hit of linearizing,
-- 
cgit v1.2.3


From a4176a9391868bfa87705bcd2e3b49e9b9dd2996 Mon Sep 17 00:00:00 2001
From: Matthew Thode <mthode@mthode.org>
Date: Tue, 17 Feb 2015 18:31:57 -0600
Subject: net: reject creation of netdev names with colons

colons are used as a separator in netdev device lookup in dev_ioctl.c

Specific functions are SIOCGIFTXQLEN SIOCETHTOOL SIOCSIFNAME

Signed-off-by: Matthew Thode <mthode@mthode.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8f9710c62e20..962ee9d71964 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -946,7 +946,7 @@ bool dev_valid_name(const char *name)
 		return false;
 
 	while (*name) {
-		if (*name == '/' || isspace(*name))
+		if (*name == '/' || *name == ':' || isspace(*name))
 			return false;
 		name++;
 	}
-- 
cgit v1.2.3


From 3f34b24a732bab9635c4b32823268c37c01b40f0 Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Fri, 20 Feb 2015 08:24:27 +0300
Subject: af_packet: allow packets defragmentation not only for hash fanout
 type

Packets defragmentation was introduced for PACKET_FANOUT_HASH only,
see 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4
fanouts")

It may be useful to have defragmentation enabled regardless of
fanout type. Without that, the AF_PACKET user may have to:
1. Collect fragments from different rings
2. Defragment by itself

Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9c28cec1a083..99fc628f7372 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1349,14 +1349,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 	}
 
+	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
+		skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
+		if (!skb)
+			return 0;
+	}
 	switch (f->type) {
 	case PACKET_FANOUT_HASH:
 	default:
-		if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
-			skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
-			if (!skb)
-				return 0;
-		}
 		idx = fanout_demux_hash(f, skb, num);
 		break;
 	case PACKET_FANOUT_LB:
-- 
cgit v1.2.3


From 2156d321b879cdadb95a633d046169cfebdbf784 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Sat, 21 Feb 2015 19:30:55 +0100
Subject: netfilter: nft_compat: don't truncate ethernet protocol type to u8

Use u16 for protocol and then cast it to __be16

>> net/netfilter/nft_compat.c:140:37: sparse: incorrect type in assignment (different base types)
   net/netfilter/nft_compat.c:140:37:    expected restricted __be16 [usertype] ethproto
   net/netfilter/nft_compat.c:140:37:    got unsigned char [unsigned] [usertype] proto
>> net/netfilter/nft_compat.c:351:37: sparse: incorrect type in assignment (different base types)
   net/netfilter/nft_compat.c:351:37:    expected restricted __be16 [usertype] ethproto
   net/netfilter/nft_compat.c:351:37:    got unsigned char [unsigned] [usertype] proto

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 1279cd85663e..213584cf04b3 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -123,7 +123,7 @@ static void
 nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 			   const struct nft_ctx *ctx,
 			   struct xt_target *target, void *info,
-			   union nft_entry *entry, u8 proto, bool inv)
+			   union nft_entry *entry, u16 proto, bool inv)
 {
 	par->net	= ctx->net;
 	par->table	= ctx->table->name;
@@ -137,7 +137,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
 	case NFPROTO_BRIDGE:
-		entry->ebt.ethproto = proto;
+		entry->ebt.ethproto = (__force __be16)proto;
 		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
 		break;
 	}
@@ -171,7 +171,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
 	[NFTA_RULE_COMPAT_FLAGS]	= { .type = NLA_U32 },
 };
 
-static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv)
+static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
 {
 	struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
 	u32 flags;
@@ -203,7 +203,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_target *target = expr->ops->data;
 	struct xt_tgchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
-	u8 proto = 0;
+	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
 	int ret;
@@ -334,7 +334,7 @@ static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
 static void
 nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 			  struct xt_match *match, void *info,
-			  union nft_entry *entry, u8 proto, bool inv)
+			  union nft_entry *entry, u16 proto, bool inv)
 {
 	par->net	= ctx->net;
 	par->table	= ctx->table->name;
@@ -348,7 +348,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
 	case NFPROTO_BRIDGE:
-		entry->ebt.ethproto = proto;
+		entry->ebt.ethproto = (__force __be16)proto;
 		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
 		break;
 	}
@@ -385,7 +385,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_match *match = expr->ops->data;
 	struct xt_mtchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
-	u8 proto = 0;
+	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
 	int ret;
-- 
cgit v1.2.3


From 02263db00b6cb98701332aa257c07ca549c2324b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Feb 2015 17:11:10 +0100
Subject: netfilter: nf_tables: fix addition/deletion of elements from
 commit/abort

We have several problems in this path:

1) There is a use-after-free when removing individual elements from
   the commit path.

2) We have to uninit() the data part of the element from the abort
   path to avoid a chain refcount leak.

3) We have to check for set->flags to see if there's a mapping, instead
   of the element flags.

4) We have to check for !(flags & NFT_SET_ELEM_INTERVAL_END) to skip
   elements that are part of the interval that have no data part, so
   they don't need to be uninit().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 199fd0f27b0e..a8c94620f20e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3612,12 +3612,11 @@ static int nf_tables_commit(struct sk_buff *skb)
 						 &te->elem,
 						 NFT_MSG_DELSETELEM, 0);
 			te->set->ops->get(te->set, &te->elem);
-			te->set->ops->remove(te->set, &te->elem);
 			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-			if (te->elem.flags & NFT_SET_MAP) {
-				nft_data_uninit(&te->elem.data,
-						te->set->dtype);
-			}
+			if (te->set->flags & NFT_SET_MAP &&
+			    !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
+				nft_data_uninit(&te->elem.data, te->set->dtype);
+			te->set->ops->remove(te->set, &te->elem);
 			nft_trans_destroy(trans);
 			break;
 		}
@@ -3658,7 +3657,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
-	struct nft_set *set;
+	struct nft_trans_elem *te;
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
@@ -3719,9 +3718,13 @@ static int nf_tables_abort(struct sk_buff *skb)
 			break;
 		case NFT_MSG_NEWSETELEM:
 			nft_trans_elem_set(trans)->nelems--;
-			set = nft_trans_elem_set(trans);
-			set->ops->get(set, &nft_trans_elem(trans));
-			set->ops->remove(set, &nft_trans_elem(trans));
+			te = (struct nft_trans_elem *)trans->data;
+			te->set->ops->get(te->set, &te->elem);
+			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
+			if (te->set->flags & NFT_SET_MAP &&
+			    !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
+				nft_data_uninit(&te->elem.data, te->set->dtype);
+			te->set->ops->remove(te->set, &te->elem);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELSETELEM:
-- 
cgit v1.2.3


From 528c943f3bb919aef75ab2fff4f00176f09a4019 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 21 Feb 2015 21:03:10 +0200
Subject: ipvs: add missing ip_vs_pe_put in sync code

ip_vs_conn_fill_param_sync() gets in param.pe a module
reference for persistence engine from __ip_vs_pe_getbyname()
but forgets to put it. Problem occurs in backup for
sync protocol v1 (2.6.39).

Also, pe_data usually comes in sync messages for
connection templates and ip_vs_conn_new() copies
the pointer only in this case. Make sure pe_data
is not leaked if it comes unexpectedly for normal
connections. Leak can happen only if bogus messages
are sent to backup server.

Fixes: fe5e7a1efb66 ("IPVS: Backup, Adding Version 1 receive capability")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_sync.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index c47ffd7a0a70..d93ceeb3ef04 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -896,6 +896,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 			IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
 			return;
 		}
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			kfree(param->pe_data);
 	}
 
 	if (opt)
@@ -1169,6 +1171,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
 				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
 				);
 #endif
+	ip_vs_pe_put(param.pe);
 	return 0;
 	/* Error exit */
 out:
-- 
cgit v1.2.3


From 52d6c8c6ca125872459054daa70f2f1c698c8e75 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 22 Feb 2015 17:03:41 -0800
Subject: net: pktgen: disable xmit_clone on virtual devices

Trying to use burst capability (aka xmit_more) on a virtual device
like bonding is not supported.

For example, skb might be queued multiple times on a qdisc, with
various list corruptions.

Fixes: 38b2cf2982dc ("net: pktgen: packet bursting via skb->xmit_more")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b4899f5b7388..508155b283dd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1134,6 +1134,9 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 
 		i += len;
+		if ((value > 1) &&
+		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+			return -ENOTSUPP;
 		pkt_dev->burst = value < 1 ? 1 : value;
 		sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
 		return count;
-- 
cgit v1.2.3


From 6514890f7aa8dd75fa46e282a1c7a8615e86f363 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 20 Feb 2015 13:33:16 -0500
Subject: tcp: fix tcp_should_expand_sndbuf() to use tcp_packets_in_flight()

tcp_should_expand_sndbuf() does not expand the send buffer if we have
filled the congestion window.

However, it should use tcp_packets_in_flight() instead of
tp->packets_out to make this check.

Testing has established that the difference matters a lot if there are
many SACKed packets, causing a needless performance shortfall.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8fdd27b17306..fb4cf8b8e121 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4770,7 +4770,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
 		return false;
 
 	/* If we filled the congestion window, do not expand.  */
-	if (tp->packets_out >= tp->snd_cwnd)
+	if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 46b9e4bb76ee26f1e024e048bb95af41b763f48f Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 23 Feb 2015 12:02:51 +0100
Subject: decnet: Fix obvious o/0 typo

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1d7c1256e845..3b81092771f8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1062,7 +1062,7 @@ source_ok:
 	if (decnet_debug_level & 16)
 		printk(KERN_DEBUG
 		       "dn_route_output_slow: initial checks complete."
-		       " dst=%o4x src=%04x oif=%d try_hard=%d\n",
+		       " dst=%04x src=%04x oif=%d try_hard=%d\n",
 		       le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
 		       fld.flowidn_oif, try_hard);
 
-- 
cgit v1.2.3


From 9b1dcbc8cf4679ecf090b343ac31bda6e55ddabe Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 12 Feb 2015 10:14:51 -0500
Subject: xprtrdma: Store RDMA credits in unsigned variables

Dan Carpenter's static checker pointed out:

   net/sunrpc/xprtrdma/rpc_rdma.c:879 rpcrdma_reply_handler()
   warn: can 'credits' be negative?

"credits" is defined as an int. The credits value comes from the
server as a 32-bit unsigned integer.

A malicious or broken server can plant a large unsigned integer in
that field which would result in an underflow in the following
logic, potentially triggering a deadlock of the mount point by
blocking the client from issuing more RPC requests.

net/sunrpc/xprtrdma/rpc_rdma.c:

  876          credits = be32_to_cpu(headerp->rm_credit);
  877          if (credits == 0)
  878                  credits = 1;    /* don't deadlock */
  879          else if (credits > r_xprt->rx_buf.rb_max_requests)
  880                  credits = r_xprt->rx_buf.rb_max_requests;
  881
  882          cwnd = xprt->cwnd;
  883          xprt->cwnd = credits << RPC_CWNDSHIFT;
  884          if (xprt->cwnd > cwnd)
  885                  xprt_release_rqst_cong(rqst->rq_task);

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: eba8ff660b2d ("xprtrdma: Move credit update to RPC . . .")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 3 ++-
 net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 7e9acd9361c5..91ffde82fa0c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -738,8 +738,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	struct rpc_xprt *xprt = rep->rr_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	__be32 *iptr;
-	int credits, rdmalen, status;
+	int rdmalen, status;
 	unsigned long cwnd;
+	u32 credits;
 
 	/* Check status. If bad, signal disconnect and return rep to pool */
 	if (rep->rr_len == ~0U) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index d1b70397c60f..0a16fb6f0885 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -285,7 +285,7 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
  */
 struct rpcrdma_buffer {
 	spinlock_t	rb_lock;	/* protects indexes */
-	int		rb_max_requests;/* client max requests */
+	u32		rb_max_requests;/* client max requests */
 	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
 	struct list_head rb_all;
 	int		rb_send_index;
-- 
cgit v1.2.3


From a948f8ce771a1f07c17ed8bcb51f59f69129a51c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Mon, 23 Feb 2015 18:38:24 +0100
Subject: irda: replace current->state by set_current_state()

Use helper functions to access current->state.
Direct assignments are prone to races and therefore buggy.

current->state = TASK_RUNNING can be replaced by __set_current_state()

Thanks to Peter Zijlstra for the exact definition of the problem.

Suggested-By: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/ircomm/ircomm_tty.c | 2 +-
 net/irda/irnet/irnet_ppp.c   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 40695b9751c1..9940a41efca1 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -811,7 +811,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
 			break;
 	}
 	spin_unlock_irqrestore(&self->spinlock, flags);
-	current->state = TASK_RUNNING;
+	__set_current_state(TASK_RUNNING);
 }
 
 /*
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 3c83a1e5ab03..1215693fdd22 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -305,7 +305,7 @@ irnet_ctrl_read(irnet_socket *	ap,
 
   /* Put ourselves on the wait queue to be woken up */
   add_wait_queue(&irnet_events.rwait, &wait);
-  current->state = TASK_INTERRUPTIBLE;
+  set_current_state(TASK_INTERRUPTIBLE);
   for(;;)
     {
       /* If there is unread events */
@@ -321,7 +321,7 @@ irnet_ctrl_read(irnet_socket *	ap,
       /* Yield and wait to be woken up */
       schedule();
     }
-  current->state = TASK_RUNNING;
+  __set_current_state(TASK_RUNNING);
   remove_wait_queue(&irnet_events.rwait, &wait);
 
   /* Did we got it ? */
-- 
cgit v1.2.3


From d720d8cec563ce4e4fa44a613d4f2dcb1caf2998 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 23 Feb 2015 18:12:56 +0000
Subject: net: compat: Ignore MSG_CMSG_COMPAT in compat_sys_{send, recv}msg

With commit a7526eb5d06b (net: Unbreak compat_sys_{send,recv}msg), the
MSG_CMSG_COMPAT flag is blocked at the compat syscall entry points,
changing the kernel compat behaviour from the one before the commit it
was trying to fix (1be374a0518a, net: Block MSG_CMSG_COMPAT in
send(m)msg and recv(m)msg).

On 32-bit kernels (!CONFIG_COMPAT), MSG_CMSG_COMPAT is 0 and the native
32-bit sys_sendmsg() allows flag 0x80000000 to be set (it is ignored by
the kernel). However, on a 64-bit kernel, the compat ABI is different
with commit a7526eb5d06b.

This patch changes the compat_sys_{send,recv}msg behaviour to the one
prior to commit 1be374a0518a.

The problem was found running 32-bit LTP (sendmsg01) binary on an arm64
kernel. Arguably, LTP should not pass 0xffffffff as flags to sendmsg()
but the general rule is not to break user ABI (even when the user
behaviour is not entirely sane).

Fixes: a7526eb5d06b (net: Unbreak compat_sys_{send,recv}msg)
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/compat.c b/net/compat.c
index 3236b4167a32..94d3d5e97883 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -711,24 +711,18 @@ static unsigned char nas[21] = {
 
 COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags)
 {
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
 	return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
 COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
 		       unsigned int, vlen, unsigned int, flags)
 {
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
 	return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 			      flags | MSG_CMSG_COMPAT);
 }
 
 COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags)
 {
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
 	return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
@@ -751,9 +745,6 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
 	int datagrams;
 	struct timespec ktspec;
 
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
-
 	if (timeout == NULL)
 		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				      flags | MSG_CMSG_COMPAT, NULL);
-- 
cgit v1.2.3


From 77751427a1ff25b27d47a4c36b12c3c8667855ac Mon Sep 17 00:00:00 2001
From: Marcelo Leitner <mleitner@redhat.com>
Date: Mon, 23 Feb 2015 11:17:13 -0300
Subject: ipv6: addrconf: validate new MTU before applying it

Currently we don't check if the new MTU is valid or not and this allows
one to configure a smaller than minimum allowed by RFCs or even bigger
than interface own MTU, which is a problem as it may lead to packet
drops.

If you have a daemon like NetworkManager running, this may be exploited
by remote attackers by forging RA packets with an invalid MTU, possibly
leading to a DoS. (NetworkManager currently only validates for values
too small, but not for too big ones.)

The fix is just to make sure the new value is valid. That is, between
IPV6_MIN_MTU and interface's MTU.

Note that similar check is already performed at
ndisc_router_discovery(), for when kernel itself parses the RA.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 98e4a63d72bb..b6030025f411 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4903,6 +4903,21 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static
+int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct inet6_dev *idev = ctl->extra1;
+	int min_mtu = IPV6_MIN_MTU;
+	struct ctl_table lctl;
+
+	lctl = *ctl;
+	lctl.extra1 = &min_mtu;
+	lctl.extra2 = idev ? &idev->dev->mtu : NULL;
+
+	return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos);
+}
+
 static void dev_disable_change(struct inet6_dev *idev)
 {
 	struct netdev_notifier_info info;
@@ -5054,7 +5069,7 @@ static struct addrconf_sysctl_table
 			.data		= &ipv6_devconf.mtu6,
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.proc_handler	= addrconf_sysctl_mtu,
 		},
 		{
 			.procname	= "accept_ra",
-- 
cgit v1.2.3


From 104f5a6206f4b3133c675e3d41eca2ca4c41406b Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 8 Feb 2015 12:36:07 +0200
Subject: mac80211: clear sdata->radar_required

If ieee80211_vif_use_channel() fails, we have to clear
sdata->radar_required (which we might have just set).

Failing to do it results in stale radar_required field
which prevents starting new scan requests.

Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Eliad Peller <eliad@wizery.com>
[use false instead of 0]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/chan.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index ff0d2db09df9..5bcd4e5589d3 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1508,6 +1508,8 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
 	if (ieee80211_chanctx_refcount(local, ctx) == 0)
 		ieee80211_free_chanctx(local, ctx);
 
+	sdata->radar_required = false;
+
 	/* Unreserving may ready an in-place reservation. */
 	if (use_reserved_switch)
 		ieee80211_vif_use_reserved_switch(local);
@@ -1566,6 +1568,9 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
 	ieee80211_recalc_smps_chanctx(local, ctx);
 	ieee80211_recalc_radar_chanctx(local, ctx);
  out:
+	if (ret)
+		sdata->radar_required = false;
+
 	mutex_unlock(&local->chanctx_mtx);
 	return ret;
 }
-- 
cgit v1.2.3


From 5528fae88697268a7176dd6c8d7ab368c04368be Mon Sep 17 00:00:00 2001
From: Samuel Tan <samueltan@chromium.org>
Date: Mon, 9 Feb 2015 21:29:15 +0200
Subject: nl80211: use loop index as type for net detect frequency results

We currently add nested members of the NL80211_ATTR_SCAN_FREQUENCIES
as NLA_U32 attributes of type NL80211_ATTR_WIPHY_FREQ in
cfg80211_net_detect_results. However, since there can be an arbitrary number of
frequency results, we should use the loop index of the loop used to add the
frequency results to NL80211_ATTR_SCAN_FREQUENCIES as the type (i.e. nla_type)
for each result attribute, rather than a fixed type.

This change is in line with how nested members are added to
NL80211_ATTR_SCAN_FREQUENCIES in the functions nl80211_send_wowlan_nd and
nl80211_add_scan_req.

Signed-off-by: Samuel Tan <samueltan@chromium.org>
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d78fd8b54515..3c7fb0459e58 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12528,9 +12528,7 @@ static int cfg80211_net_detect_results(struct sk_buff *msg,
 			}
 
 			for (j = 0; j < match->n_channels; j++) {
-				if (nla_put_u32(msg,
-						NL80211_ATTR_WIPHY_FREQ,
-						match->channels[j])) {
+				if (nla_put_u32(msg, j, match->channels[j])) {
 					nla_nest_cancel(msg, nl_freqs);
 					nla_nest_cancel(msg, nl_match);
 					goto out;
-- 
cgit v1.2.3


From a18c7192aabac73078f35e5ef4ce28ad5f8cfe8e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 24 Feb 2015 10:56:42 +0100
Subject: nl80211: fix memory leak in monitor flags parsing

If monitor flags parsing results in active monitor but that
isn't supported, the already allocated message is leaked.
Fix this by moving the allocation after this check.

Reported-by: Christian Engelmayer <cengelma@gmx.at>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3c7fb0459e58..be2501538011 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2654,10 +2654,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 			return err;
 	}
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
 	err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
 				  info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
 				  &flags);
@@ -2666,6 +2662,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	    !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
 		return -EOPNOTSUPP;
 
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
 	wdev = rdev_add_virtual_intf(rdev,
 				nla_data(info->attrs[NL80211_ATTR_IFNAME]),
 				type, err ? NULL : &flags, &params);
-- 
cgit v1.2.3


From 28981e5eb45fe13e1d2c9e0e2e189dc5c8682006 Mon Sep 17 00:00:00 2001
From: Jason Abele <jason@aether.com>
Date: Tue, 17 Feb 2015 16:10:41 -0800
Subject: cfg80211: fix n_reg_rules to match world_regdom

There are currently 8 rules in the world_regdom, but only the first 6
are applied due to an incorrect value for n_reg_rules.  This causes
channels 149-165 and 60GHz to be disabled.

Signed-off-by: Jason Abele <jason@aether.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index b586d0dcb09e..48dfc7b4e981 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -228,7 +228,7 @@ static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work);
 
 /* We keep a static world regulatory domain in case of the absence of CRDA */
 static const struct ieee80211_regdomain world_regdom = {
-	.n_reg_rules = 6,
+	.n_reg_rules = 8,
 	.alpha2 =  "00",
 	.reg_rules = {
 		/* IEEE 802.11b/g, channels 1..11 */
-- 
cgit v1.2.3


From 81daf735f9fe35ef6bc4073068748b221d64fb47 Mon Sep 17 00:00:00 2001
From: Junjie Mao <junjie_mao@yeah.net>
Date: Wed, 28 Jan 2015 10:03:26 +0800
Subject: cfg80211: calls nl80211_exit on error

nl80211_exit should be called in cfg80211_init if nl80211_init succeeds
but regulatory_init or create_singlethread_workqueue fails.

Signed-off-by: Junjie Mao <junjie_mao@yeah.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3af0ecf1cc16..2a0bbd22854b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1199,6 +1199,7 @@ out_fail_wq:
 	regulatory_exit();
 out_fail_reg:
 	debugfs_remove(ieee80211_debugfs_dir);
+	nl80211_exit();
 out_fail_nl80211:
 	unregister_netdevice_notifier(&cfg80211_netdev_notifier);
 out_fail_notifier:
-- 
cgit v1.2.3


From 17dce15801d5602719936045a7e84ff7dc6b6da2 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 19 Feb 2015 12:57:40 +0100
Subject: mac80211/minstrel: fix !x!=0 confusion

Commit 06d961a8e210 ("mac80211/minstrel: use the new rate control API")
inverted the condition 'if (msr->sample_limit != 0)' to
'if (!msr->sample_limit != 0)'. But it is confusing both to people and
compilers (gcc5):
net/mac80211/rc80211_minstrel.c: In function 'minstrel_get_rate':
net/mac80211/rc80211_minstrel.c:376:26: warning: logical not is only applied to the left hand side of comparison
   if (!msr->sample_limit != 0)
                          ^

Let there be only 'if (!msr->sample_limit)'.

Fixes: 06d961a8e210 ("mac80211/minstrel: use the new rate control API")
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 7c86a002df95..ef6e8a6c4253 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -373,7 +373,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 		rate++;
 		mi->sample_deferred++;
 	} else {
-		if (!msr->sample_limit != 0)
+		if (!msr->sample_limit)
 			return;
 
 		mi->sample_packets++;
-- 
cgit v1.2.3


From 4e10fd5b4a7f4100007147558c304da3e73b25cf Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Tue, 24 Feb 2015 14:14:35 -0500
Subject: rtnetlink: avoid 0 sized arrays

Arrays (when not in a struct) "shall have a value greater than zero".

GCC complains when it's not the case here.

Fixes: ba7d49b1f0 ("rtnetlink: provide api for getting and setting slave info")
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ab293a3066b3..1385de0fa080 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2012,8 +2012,8 @@ replay:
 	}
 
 	if (1) {
-		struct nlattr *attr[ops ? ops->maxtype + 1 : 0];
-		struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
+		struct nlattr *attr[ops ? ops->maxtype + 1 : 1];
+		struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1];
 		struct nlattr **data = NULL;
 		struct nlattr **slave_data = NULL;
 		struct net *dest_net, *link_net = NULL;
-- 
cgit v1.2.3


From 41a50d621a321b4c15273cc1b5ed41437f4acdfb Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Tue, 24 Feb 2015 08:18:28 +0300
Subject: af_packet: don't pass empty blocks for PACKET_V3

Before da413eec729d ("packet: Fixed TPACKET V3 to signal poll when block is
closed rather than every packet") poll listening for an af_packet socket was
not signaled if there was no packets to process. After the patch poll is
signaled evety time when block retire timer expires. That happens because
af_packet closes the current block on timeout even if the block is empty.

Passing empty blocks to the user not only wastes CPU but also wastes ring
buffer space increasing probability of packets dropping on small timeouts.

Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Cc: Dan Collins <dan@dcollins.co.nz>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Guy Harris <guy@alum.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 99fc628f7372..5bf1e968a728 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -698,6 +698,10 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
 
 	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
 		if (!frozen) {
+			if (!BLOCK_NUM_PKTS(pbd)) {
+				/* An empty block. Just refresh the timer. */
+				goto refresh_timer;
+			}
 			prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
 			if (!prb_dispatch_next_block(pkc, po))
 				goto refresh_timer;
@@ -798,7 +802,11 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
 		h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
 		h1->ts_last_pkt.ts_nsec	= last_pkt->tp_nsec;
 	} else {
-		/* Ok, we tmo'd - so get the current time */
+		/* Ok, we tmo'd - so get the current time.
+		 *
+		 * It shouldn't really happen as we don't close empty
+		 * blocks. See prb_retire_rx_blk_timer_expired().
+		 */
 		struct timespec ts;
 		getnstimeofday(&ts);
 		h1->ts_last_pkt.ts_sec = ts.tv_sec;
-- 
cgit v1.2.3


From 9c1c98a3bb7b7593b60264b9a07e001e68b46697 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Thu, 26 Feb 2015 15:50:50 +0200
Subject: mac80211: Send EAPOL frames at lowest rate

The current minstrel_ht rate control behavior is somewhat optimistic in
trying to find optimum TX rate. While this is usually fine for normal
Data frames, there are cases where a more conservative set of retry
parameters would be beneficial to make the connection more robust.

EAPOL frames are critical to the authentication and especially the
EAPOL-Key message 4/4 (the last message in the 4-way handshake) is
important to get through to the AP. If that message is lost, the only
recovery mechanism in many cases is to reassociate with the AP and start
from scratch. This can often be avoided by trying to send the frame with
more conservative rate and/or with more link layer retries.

In most cases, minstrel_ht is currently using the initial EAPOL-Key
frames for probing higher rates and this results in only five link layer
transmission attempts (one at high(ish) MCS and four at MCS0). While
this works with most APs, it looks like there are some deployed APs that
may have issues with the EAPOL frames using HT MCS immediately after
association. Similarly, there may be issues in cases where the signal
strength or radio environment is not good enough to be able to get
frames through even at couple of MCS 0 tries.

The best approach for this would likely to be to reduce the TX rate for
the last rate (3rd rate parameter in the set) to a low basic rate (say,
6 Mbps on 5 GHz and 2 or 5.5 Mbps on 2.4 GHz), but doing that cleanly
requires some more effort. For now, we can start with a simple one-liner
that forces the minimum rate to be used for EAPOL frames similarly how
the TX rate is selected for the IEEE 802.11 Management frames. This does
result in a small extra latency added to the cases where the AP would be
able to receive the higher rate, but taken into account how small number
of EAPOL frames are used, this is likely to be insignificant. A future
optimization in the minstrel_ht design can also allow this patch to be
reverted to get back to the more optimized initial TX rate.

It should also be noted that many drivers that do not use minstrel as
the rate control algorithm are already doing similar workarounds by
forcing the lowest TX rate to be used for EAPOL frames.

Cc: stable@vger.kernel.org
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 88a18ffe2975..07bd8db00af8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -566,6 +566,7 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
 		if (tx->sdata->control_port_no_encrypt)
 			info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 		info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
+		info->flags |= IEEE80211_TX_CTL_USE_MINRATE;
 	}
 
 	return TX_CONTINUE;
-- 
cgit v1.2.3


From 76cb4be993c03bf9ec65a58b13f12c679bb041e4 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 24 Feb 2015 18:34:01 +0300
Subject: sunrpc: integer underflow in rsc_parse()

If we call groups_alloc() with invalid values then it's might lead to
memory corruption.  For example, with a negative value then we might not
allocate enough for sizeof(struct group_info).

(We're doing this in the caller for consistency with other callers of
groups_alloc().  The other alternative might be to move the check out of
all the callers into groups_alloc().)

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Simo Sorce <simo@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 224a82f24d3c..1095be9c80ab 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -463,6 +463,8 @@ static int rsc_parse(struct cache_detail *cd,
 		/* number of additional gid's */
 		if (get_int(&mesg, &N))
 			goto out;
+		if (N < 0 || N > NGROUPS_MAX)
+			goto out;
 		status = -ENOMEM;
 		rsci.cred.cr_group_info = groups_alloc(N);
 		if (rsci.cred.cr_group_info == NULL)
-- 
cgit v1.2.3


From 4c4b52d9b2df45e8216d3e30b5452e4a364d2cac Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 25 Feb 2015 16:31:54 +0100
Subject: rhashtable: remove indirection for grow/shrink decision functions

Currently, all real users of rhashtable default their grow and shrink
decision functions to rht_grow_above_75() and rht_shrink_below_30(),
so that there's currently no need to have this explicitly selectable.

It can/should be generic and private inside rhashtable until a real
use case pops up. Since we can make this private, we'll save us this
additional indirection layer and can improve insertion/deletion time
as well.

Reference: http://patchwork.ozlabs.org/patch/443040/
Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 13 -----------
 lib/rhashtable.c           | 56 ++++++++++++++--------------------------------
 lib/test_rhashtable.c      |  1 +
 net/netfilter/nft_hash.c   |  2 --
 net/netlink/af_netlink.c   |  2 --
 net/tipc/socket.c          |  2 --
 6 files changed, 18 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index cb2104be2135..d438eeb08bff 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,12 +79,6 @@ struct rhashtable;
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
- * @grow_decision: If defined, may return true if table should expand
- * @shrink_decision: If defined, may return true if table should shrink
- *
- * Note: when implementing the grow and shrink decision function, min/max
- * shift must be enforced, otherwise, resizing watermarks they set may be
- * useless.
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
@@ -98,10 +92,6 @@ struct rhashtable_params {
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
-	bool			(*grow_decision)(const struct rhashtable *ht,
-						 size_t new_size);
-	bool			(*shrink_decision)(const struct rhashtable *ht,
-						   size_t new_size);
 };
 
 /**
@@ -193,9 +183,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 
-bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
-bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
-
 int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index bcf119bfdef4..090641db4c0d 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -247,26 +247,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
  * @ht:		hash table
  * @new_size:	new table size
  */
-bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
 {
 	/* Expand table when exceeding 75% load */
 	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
 	       (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
 }
-EXPORT_SYMBOL_GPL(rht_grow_above_75);
 
 /**
  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
  * @ht:		hash table
  * @new_size:	new table size
  */
-bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
+static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 {
 	/* Shrink table beneath 30% load */
 	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
 	       (atomic_read(&ht->shift) > ht->p.min_shift);
 }
-EXPORT_SYMBOL_GPL(rht_shrink_below_30);
 
 static void lock_buckets(struct bucket_table *new_tbl,
 			 struct bucket_table *old_tbl, unsigned int hash)
@@ -528,40 +526,19 @@ static void rht_deferred_worker(struct work_struct *work)
 	list_for_each_entry(walker, &ht->walkers, list)
 		walker->resize = true;
 
-	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+	if (rht_grow_above_75(ht, tbl->size))
 		rhashtable_expand(ht);
-	else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
+	else if (rht_shrink_below_30(ht, tbl->size))
 		rhashtable_shrink(ht);
-
 unlock:
 	mutex_unlock(&ht->mutex);
 }
 
-static void rhashtable_probe_expand(struct rhashtable *ht)
-{
-	const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Only adjust the table if no resizing is currently in progress. */
-	if (tbl == new_tbl && ht->p.grow_decision &&
-	    ht->p.grow_decision(ht, tbl->size))
-		schedule_work(&ht->run_work);
-}
-
-static void rhashtable_probe_shrink(struct rhashtable *ht)
-{
-	const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Only adjust the table if no resizing is currently in progress. */
-	if (tbl == new_tbl && ht->p.shrink_decision &&
-	    ht->p.shrink_decision(ht, tbl->size))
-		schedule_work(&ht->run_work);
-}
-
 static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-				struct bucket_table *tbl, u32 hash)
+				struct bucket_table *tbl,
+				const struct bucket_table *old_tbl, u32 hash)
 {
+	bool no_resize_running = tbl == old_tbl;
 	struct rhash_head *head;
 
 	hash = rht_bucket_index(tbl, hash);
@@ -577,8 +554,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
-
-	rhashtable_probe_expand(ht);
+	if (no_resize_running && rht_grow_above_75(ht, tbl->size))
+		schedule_work(&ht->run_work);
 }
 
 /**
@@ -608,7 +585,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 	hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
 
 	lock_buckets(tbl, old_tbl, hash);
-	__rhashtable_insert(ht, obj, tbl, hash);
+	__rhashtable_insert(ht, obj, tbl, old_tbl, hash);
 	unlock_buckets(tbl, old_tbl, hash);
 
 	rcu_read_unlock();
@@ -690,8 +667,11 @@ found:
 	unlock_buckets(new_tbl, old_tbl, new_hash);
 
 	if (ret) {
+		bool no_resize_running = new_tbl == old_tbl;
+
 		atomic_dec(&ht->nelems);
-		rhashtable_probe_shrink(ht);
+		if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size))
+			schedule_work(&ht->run_work);
 	}
 
 	rcu_read_unlock();
@@ -861,7 +841,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
 		goto exit;
 	}
 
-	__rhashtable_insert(ht, obj, new_tbl, new_hash);
+	__rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash);
 
 exit:
 	unlock_buckets(new_tbl, old_tbl, new_hash);
@@ -1123,8 +1103,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (!ht->p.hash_rnd)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
-	if (ht->p.grow_decision || ht->p.shrink_decision)
-		INIT_WORK(&ht->run_work, rht_deferred_worker);
+	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
 	return 0;
 }
@@ -1142,8 +1121,7 @@ void rhashtable_destroy(struct rhashtable *ht)
 {
 	ht->being_destroyed = true;
 
-	if (ht->p.grow_decision || ht->p.shrink_decision)
-		cancel_work_sync(&ht->run_work);
+	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
 	bucket_table_free(rht_dereference(ht->tbl, ht));
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index f9e9d734446a..67c7593d1dd6 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -201,6 +201,7 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = jhash,
+		.max_shift = 1, /* we expand/shrink manually here */
 		.nulls_base = (3U << RHT_BASE_SHIFT),
 	};
 	int err;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 61e6c407476a..c82df0a48fcd 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -192,8 +192,6 @@ static int nft_hash_init(const struct nft_set *set,
 		.key_offset = offsetof(struct nft_hash_elem, key),
 		.key_len = set->klen,
 		.hashfn = jhash,
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	return rhashtable_init(priv, &params);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2702673f0f23..05919bf3f670 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3126,8 +3126,6 @@ static int __init netlink_proto_init(void)
 		.key_len = sizeof(u32), /* portid */
 		.hashfn = jhash,
 		.max_shift = 16, /* 64K */
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	if (err != 0)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f73e975af80b..b4d4467d0bb0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2364,8 +2364,6 @@ int tipc_sk_rht_init(struct net *net)
 		.hashfn = jhash,
 		.max_shift = 20, /* 1M */
 		.min_shift = 8,  /* 256 */
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	return rhashtable_init(&tn->sk_rht, &rht_params);
-- 
cgit v1.2.3


From 505ce4154ac86c250aa4a84a536dd9fc56479bb5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Feb 2015 16:19:00 -0600
Subject: net: Verify permission to dest_net in newlink

When applicable verify that the caller has permision to create a
network device in another network namespace.  This check is already
present when moving a network device between network namespaces in
setlink so all that is needed is to duplicate that check in newlink.

This change almost backports cleanly, but there are context conflicts
as the code that follows was added in v4.0-rc1

Fixes: b51642f6d77b net: Enable a userns root rtnl calls that are safe for unprivilged users
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1385de0fa080..b237959c7497 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2122,6 +2122,10 @@ replay:
 		if (IS_ERR(dest_net))
 			return PTR_ERR(dest_net);
 
+		err = -EPERM;
+		if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN))
+			goto out;
+
 		if (tb[IFLA_LINK_NETNSID]) {
 			int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
 
-- 
cgit v1.2.3


From 06615bed60c1fb7c37adddb75bdc80da873b5edb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Feb 2015 16:20:07 -0600
Subject: net: Verify permission to link_net in newlink

When applicable verify that the caller has permisson to the underlying
network namespace for a newly created network device.

Similary checks exist for the network namespace a network device will
be created in.

Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b237959c7497..2c49355d16c2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2134,6 +2134,9 @@ replay:
 				err =  -EINVAL;
 				goto out;
 			}
+			err = -EPERM;
+			if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+				goto out;
 		}
 
 		dev = rtnl_create_link(link_net ? : dest_net, ifname,
-- 
cgit v1.2.3


From cac5e65e8a7ea074f2626d2eaa53aa308452dec4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Feb 2015 09:42:50 -0800
Subject: net: do not use rcu in rtnl_dump_ifinfo()

We did a failed attempt in the past to only use rcu in rtnl dump
operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during
netlink dump callbacks")

Now that dumps are holding RTNL anyway, there is no need to also
use rcu locking, as it forbids any scheduling ability, like
GFP_KERNEL allocations that controlling path should use instead
of GFP_ATOMIC whenever possible.

This should fix following splat Cong Wang reported :

 [ INFO: suspicious RCU usage. ]
 3.19.0+ #805 Tainted: G        W

 include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section!

 other info that might help us debug this:

 rcu_scheduler_active = 1, debug_locks = 0
 2 locks held by ip/771:
  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff8182b8f4>] netlink_dump+0x21/0x26c
  #1:  (rcu_read_lock){......}, at: [<ffffffff817d785b>] rcu_read_lock+0x0/0x6e

 stack backtrace:
 CPU: 3 PID: 771 Comm: ip Tainted: G        W       3.19.0+ #805
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6
  ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd
  00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758
 Call Trace:
  [<ffffffff81a27457>] dump_stack+0x4c/0x65
  [<ffffffff810b539b>] lockdep_rcu_suspicious+0x107/0x110
  [<ffffffff8109796f>] rcu_preempt_sleep_check+0x45/0x47
  [<ffffffff8109e457>] ___might_sleep+0x1d/0x1cb
  [<ffffffff8109e67d>] __might_sleep+0x78/0x80
  [<ffffffff814b9b1f>] idr_alloc+0x45/0xd1
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff814b9f9d>] ? idr_for_each+0x53/0x101
  [<ffffffff817c1383>] alloc_netid+0x61/0x69
  [<ffffffff817c14c3>] __peernet2id+0x79/0x8d
  [<ffffffff817c1ab7>] peernet2id+0x13/0x1f
  [<ffffffff817d8673>] rtnl_fill_ifinfo+0xa8d/0xc20
  [<ffffffff810b17d9>] ? __lock_is_held+0x39/0x52
  [<ffffffff817d894f>] rtnl_dump_ifinfo+0x149/0x213
  [<ffffffff8182b9c2>] netlink_dump+0xef/0x26c
  [<ffffffff8182bcba>] netlink_recvmsg+0x17b/0x2c5
  [<ffffffff817b0adc>] __sock_recvmsg+0x4e/0x59
  [<ffffffff817b1b40>] sock_recvmsg+0x3f/0x51
  [<ffffffff817b1f9a>] ___sys_recvmsg+0xf6/0x1d9
  [<ffffffff8115dc67>] ? handle_pte_fault+0x6e1/0xd3d
  [<ffffffff8100a3a0>] ? native_sched_clock+0x35/0x37
  [<ffffffff8109f45b>] ? sched_clock_local+0x12/0x72
  [<ffffffff8109f6ac>] ? sched_clock_cpu+0x9e/0xb7
  [<ffffffff810cb7ab>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff811abde8>] ? __fcheck_files+0x4c/0x58
  [<ffffffff811ac556>] ? __fget_light+0x2d/0x52
  [<ffffffff817b376f>] __sys_recvmsg+0x42/0x60
  [<ffffffff817b379f>] SyS_recvmsg+0x12/0x1c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids")
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2c49355d16c2..25b4b5d23485 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1300,7 +1300,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
-	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
 	/* A hack to preserve kernel<->userspace interface.
@@ -1322,7 +1321,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, head, index_hlist) {
+		hlist_for_each_entry(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1344,7 +1343,6 @@ cont:
 		}
 	}
 out:
-	rcu_read_unlock();
 	cb->args[1] = idx;
 	cb->args[0] = h;
 
-- 
cgit v1.2.3


From 56b08fdcf637955d3023d769afd6cdabc526ba22 Mon Sep 17 00:00:00 2001
From: Arvid Brodin <arvid.brodin@alten.se>
Date: Fri, 27 Feb 2015 21:26:03 +0100
Subject: net/hsr: Fix NULL pointer dereference and refcnt bugs when deleting a
 HSR interface.

To repeat:

$ sudo ip link del hsr0
BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
IP: [<ffffffff8187f495>] hsr_del_port+0x15/0xa0
etc...

Bug description:

As part of the hsr master device destruction, hsr_del_port() is called for each of
the hsr ports. At each such call, the master device is updated regarding features
and mtu. When the master device is freed before the slave interfaces, master will
be NULL in hsr_del_port(), which led to a NULL pointer dereference.

Additionally, dev_put() was called on the master device itself in hsr_del_port(),
causing a refcnt error.

A third bug in the same code path was that the rtnl lock was not taken before
hsr_del_port() was called as part of hsr_dev_destroy().

The reporter (Nicolas Dichtel) also said: "hsr_netdev_notify() supposes that the
port will always be available when the notification is for an hsr interface. It's
wrong. For example, netdev_wait_allrefs() may resend NETDEV_UNREGISTER.". As a
precaution against this, a check for port == NULL was added in hsr_dev_notify().

Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Fixes: 51f3c605318b056a ("net/hsr: Move slave init to hsr_slave.c.")
Signed-off-by: Arvid Brodin <arvid.brodin@alten.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c |  3 +++
 net/hsr/hsr_main.c   |  4 ++++
 net/hsr/hsr_slave.c  | 10 +++++++---
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index a138d75751df..44d27469ae55 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -359,8 +359,11 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
 	struct hsr_port *port;
 
 	hsr = netdev_priv(hsr_dev);
+
+	rtnl_lock();
 	hsr_for_each_port(hsr, port)
 		hsr_del_port(port);
+	rtnl_unlock();
 
 	del_timer_sync(&hsr->prune_timer);
 	del_timer_sync(&hsr->announce_timer);
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 779d28b65417..cd37d0011b42 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -36,6 +36,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 			return NOTIFY_DONE;	/* Not an HSR device */
 		hsr = netdev_priv(dev);
 		port = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+		if (port == NULL) {
+			/* Resend of notification concerning removed device? */
+			return NOTIFY_DONE;
+		}
 	} else {
 		hsr = port->hsr;
 	}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index a348dcbcd683..7d37366cc695 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -181,8 +181,10 @@ void hsr_del_port(struct hsr_port *port)
 	list_del_rcu(&port->port_list);
 
 	if (port != master) {
-		netdev_update_features(master->dev);
-		dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
+		if (master != NULL) {
+			netdev_update_features(master->dev);
+			dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
+		}
 		netdev_rx_handler_unregister(port->dev);
 		dev_set_promiscuity(port->dev, -1);
 	}
@@ -192,5 +194,7 @@ void hsr_del_port(struct hsr_port *port)
 	 */
 
 	synchronize_rcu();
-	dev_put(port->dev);
+
+	if (port != master)
+		dev_put(port->dev);
 }
-- 
cgit v1.2.3


From c03ae533a9c4de83a35105f9bfd7152d916b4680 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 28 Feb 2015 11:51:36 +0100
Subject: rxrpc: terminate retrans loop when sending of skb fails

Typo, 'stop' is never set to true.
Seems intent is to not attempt to retransmit more packets after sendmsg
returns an error.

This change is based on code inspection only.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-ack.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index c6be17a959a6..40404183a5da 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -218,7 +218,8 @@ static void rxrpc_resend(struct rxrpc_call *call)
 	struct rxrpc_header *hdr;
 	struct sk_buff *txb;
 	unsigned long *p_txb, resend_at;
-	int loop, stop;
+	bool stop;
+	int loop;
 	u8 resend;
 
 	_enter("{%d,%d,%d,%d},",
@@ -226,7 +227,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
 	       atomic_read(&call->sequence),
 	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
 
-	stop = 0;
+	stop = false;
 	resend = 0;
 	resend_at = 0;
 
@@ -255,7 +256,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
 			_proto("Tx DATA %%%u { #%d }",
 			       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
 			if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
-				stop = 0;
+				stop = true;
 				sp->resend_at = jiffies + 3;
 			} else {
 				sp->resend_at =
-- 
cgit v1.2.3


From 765dd3bb44711b4ba36c1e06f9c4b7bfe73ffef7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 28 Feb 2015 11:51:37 +0100
Subject: rxrpc: don't multiply with HZ twice

rxrpc_resend_timeout has an initial value of 4 * HZ; use it as-is.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-ack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index 40404183a5da..e0547f521f20 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -260,7 +260,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
 				sp->resend_at = jiffies + 3;
 			} else {
 				sp->resend_at =
-					jiffies + rxrpc_resend_timeout * HZ;
+					jiffies + rxrpc_resend_timeout;
 			}
 		}
 
-- 
cgit v1.2.3


From acf8dd0a9d0b9e4cdb597c2f74802f79c699e802 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Mon, 2 Mar 2015 18:27:11 +0100
Subject: udp: only allow UFO for packets from SOCK_DGRAM sockets

If an over-MTU UDP datagram is sent through a SOCK_RAW socket to a
UFO-capable device, ip_ufo_append_data() sets skb->ip_summed to
CHECKSUM_PARTIAL unconditionally as all GSO code assumes transport layer
checksum is to be computed on segmentation. However, in this case,
skb->csum_start and skb->csum_offset are never set as raw socket
transmit path bypasses udp_send_skb() where they are usually set. As a
result, driver may access invalid memory when trying to calculate the
checksum and store the result (as observed in virtio_net driver).

Moreover, the very idea of modifying the userspace provided UDP header
is IMHO against raw socket semantics (I wasn't able to find a document
clearly stating this or the opposite, though). And while allowing
CHECKSUM_NONE in the UFO case would be more efficient, it would be a bit
too intrusive change just to handle a corner case like this. Therefore
disallowing UFO for packets from SOCK_DGRAM seems to be the best option.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 3 ++-
 net/ipv6/ip6_output.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d68199d9b2b0..a7aea2048a0d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -888,7 +888,8 @@ static int __ip_append_data(struct sock *sk,
 	cork->length += length;
 	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
+	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+	    (sk->sk_type == SOCK_DGRAM)) {
 		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
 					 hh_len, fragheaderlen, transhdrlen,
 					 maxfraglen, flags);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7deebf102cba..0a04a37305d5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1298,7 +1298,8 @@ emsgsize:
 	if (((length > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO) &&
+	    (sk->sk_type == SOCK_DGRAM)) {
 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen,
 					  transhdrlen, mtu, flags, rt);
-- 
cgit v1.2.3


From d0c22119f574b851e63360c6b8660fe9593bbc3c Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Mon, 2 Mar 2015 14:28:52 -0500
Subject: mac80211: drop unencrypted frames in mesh fwding

The mesh forwarding path was not checking that data
frames were protected when running an encrypted network;
add the necessary check.

Cc: stable@vger.kernel.org
Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1101563357ea..944bdc04e913 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2214,6 +2214,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	hdr = (struct ieee80211_hdr *) skb->data;
 	mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
 
+	if (ieee80211_drop_unencrypted(rx, hdr->frame_control))
+		return RX_DROP_MONITOR;
+
 	/* frame is in RMC, don't forward */
 	if (ieee80211_is_data(hdr->frame_control) &&
 	    is_multicast_ether_addr(hdr->addr1) &&
-- 
cgit v1.2.3


From aa75ebc275b2a91b193654a177daf900ad6703f0 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Tue, 10 Feb 2015 12:48:44 +0100
Subject: mac80211: disable u-APSD queues by default

Some APs experience problems when working with
U-APSD. Decreasing the probability of that
happening by using legacy mode for all ACs but VO
isn't enough.

Cisco 4410N originally forced us to enable VO by
default only because it treated non-VO ACs as
legacy.

However some APs (notably Netgear R7000) silently
reclassify packets to different ACs. Since u-APSD
ACs require trigger frames for frame retrieval
clients would never see some frames (e.g. ARP
responses) or would fetch them accidentally after
a long time.

It makes little sense to enable u-APSD queues by
default because it needs userspace applications to
be aware of it to actually take advantage of the
possible additional powersavings. Implicitly
depending on driver autotrigger frame support
doesn't make much sense.

Cc: stable@vger.kernel.org
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3afe36824703..c0e089c194f1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -58,13 +58,24 @@ struct ieee80211_local;
 #define IEEE80211_UNSET_POWER_LEVEL	INT_MIN
 
 /*
- * Some APs experience problems when working with U-APSD. Decrease the
- * probability of that happening by using legacy mode for all ACs but VO.
- * The AP that caused us trouble was a Cisco 4410N. It ignores our
- * setting, and always treats non-VO ACs as legacy.
+ * Some APs experience problems when working with U-APSD. Decreasing the
+ * probability of that happening by using legacy mode for all ACs but VO isn't
+ * enough.
+ *
+ * Cisco 4410N originally forced us to enable VO by default only because it
+ * treated non-VO ACs as legacy.
+ *
+ * However some APs (notably Netgear R7000) silently reclassify packets to
+ * different ACs. Since u-APSD ACs require trigger frames for frame retrieval
+ * clients would never see some frames (e.g. ARP responses) or would fetch them
+ * accidentally after a long time.
+ *
+ * It makes little sense to enable u-APSD queues by default because it needs
+ * userspace applications to be aware of it to actually take advantage of the
+ * possible additional powersavings. Implicitly depending on driver autotrigger
+ * frame support doesn't make much sense.
  */
-#define IEEE80211_DEFAULT_UAPSD_QUEUES \
-	IEEE80211_WMM_IE_STA_QOSINFO_AC_VO
+#define IEEE80211_DEFAULT_UAPSD_QUEUES 0
 
 #define IEEE80211_DEFAULT_MAX_SP_LEN		\
 	IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
-- 
cgit v1.2.3


From 71e168b151babf4334e2e26c92230a6bda3b1f24 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 3 Mar 2015 13:53:31 +0100
Subject: net: bridge: add compile-time assert for cb struct size

make build fail if structure no longer fits into ->cb storage.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br.c b/net/bridge/br.c
index fb57ab6b24f9..02c24cf63c34 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -190,6 +190,8 @@ static int __init br_init(void)
 {
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+
 	err = stp_proto_register(&br_stp_proto);
 	if (err < 0) {
 		pr_err("bridge: can't register sap for STP\n");
-- 
cgit v1.2.3


From f4f8e73850589008095b1da3c8c17cf68bd1c62a Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Mon, 2 Mar 2015 18:49:56 -0800
Subject: openvswitch: Fix serialization of non-masked set actions.

Set actions consist of a regular OVS_KEY_ATTR_* attribute nested inside
of a OVS_ACTION_ATTR_SET action attribute. When converting masked actions
back to regular set actions, the inner attribute length was not changed,
ie, double the length being serialized. This patch fixes the bug.

Fixes: 83d2b9b ("net: openvswitch: Support masked set actions.")
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 216f20b90aa5..22b18c145c92 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2253,14 +2253,20 @@ static int masked_set_action_to_set_action_attr(const struct nlattr *a,
 						struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
+	struct nlattr *nla;
 	size_t key_len = nla_len(ovs_key) / 2;
 
 	/* Revert the conversion we did from a non-masked set action to
 	 * masked set action.
 	 */
-	if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
+	nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+	if (!nla)
 		return -EMSGSIZE;
 
+	if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8670c3a55e91cb27a4b4d4d4c4fa35b0149e1abf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:04:18 +0000
Subject: netfilter: nf_tables: fix transaction race condition

A race condition exists in the rule transaction code for rules that
get added and removed within the same transaction.

The new rule starts out as inactive in the current and active in the
next generation and is inserted into the ruleset. When it is deleted,
it is additionally set to inactive in the next generation as well.

On commit the next generation is begun, then the actions are finalized.
For the new rule this would mean clearing out the inactive bit for
the previously current, now next generation.

However nft_rule_clear() clears out the bits for *both* generations,
activating the rule in the current generation, where it should be
deactivated due to being deleted. The rule will thus be active until
the deletion is finalized, removing the rule from the ruleset.

Similarly, when aborting a transaction for the same case, the undo
of insertion will remove it from the RCU protected rule list, the
deletion will clear out all bits. However until the next RCU
synchronization after all operations have been undone, the rule is
active on CPUs which can still see the rule on the list.

Generally, there may never be any modifications of the current
generations' inactive bit since this defeats the entire purpose of
atomicity. Change nft_rule_clear() to only touch the next generations
bit to fix this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a8c94620f20e..6fb532bf0fdb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -227,7 +227,7 @@ nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
 
 static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
 {
-	rule->genmask = 0;
+	rule->genmask &= ~(1 << gencursor_next(net));
 }
 
 static int
-- 
cgit v1.2.3


From 9889840f5988ecfd43b00c9abb83c1804e21406b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:04:19 +0000
Subject: netfilter: nf_tables: check for overflow of rule dlen field

Check that the space required for the expressions doesn't exceed the
size of the dlen field, which would lead to the iterators crashing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6fb532bf0fdb..7baafd5ab520 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1968,6 +1968,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			n++;
 		}
 	}
+	/* Check for overflow of dlen field */
+	err = -EFBIG;
+	if (size >= 1 << 12)
+		goto err1;
 
 	if (nla[NFTA_RULE_USERDATA])
 		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
-- 
cgit v1.2.3


From 86f1ec32318159a24de349f0a38e79b9d2b3131a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:04:20 +0000
Subject: netfilter: nf_tables: fix userdata length overflow

The NFT_USERDATA_MAXLEN is defined to 256, however we only have a u8
to store its size. Introduce a struct nft_userdata which contains a
length field and indicate its presence using a single bit in the rule.

The length field of struct nft_userdata is also a u8, however we don't
store zero sized data, so the actual length is udata->len + 1.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 22 +++++++++++++++++++---
 net/netfilter/nf_tables_api.c     | 28 +++++++++++++++++++---------
 2 files changed, 38 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9eaaa7884586..decb9a095ae7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -119,6 +119,22 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
 			   const struct nft_data *data,
 			   enum nft_data_types type);
 
+
+/**
+ *	struct nft_userdata - user defined data associated with an object
+ *
+ *	@len: length of the data
+ *	@data: content
+ *
+ *	The presence of user data is indicated in an object specific fashion,
+ *	so a length of zero can't occur and the value "len" indicates data
+ *	of length len + 1.
+ */
+struct nft_userdata {
+	u8			len;
+	unsigned char		data[0];
+};
+
 /**
  *	struct nft_set_elem - generic representation of set elements
  *
@@ -380,7 +396,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *	@handle: rule handle
  *	@genmask: generation mask
  *	@dlen: length of expression data
- *	@ulen: length of user data (used for comments)
+ *	@udata: user data is appended to the rule
  *	@data: expression data
  */
 struct nft_rule {
@@ -388,7 +404,7 @@ struct nft_rule {
 	u64				handle:42,
 					genmask:2,
 					dlen:12,
-					ulen:8;
+					udata:1;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(struct nft_expr))));
 };
@@ -476,7 +492,7 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
 	return (struct nft_expr *)&rule->data[rule->dlen];
 }
 
-static inline void *nft_userdata(const struct nft_rule *rule)
+static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
 {
 	return (void *)&rule->data[rule->dlen];
 }
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7baafd5ab520..74e4b876c96e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1711,9 +1711,12 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	}
 	nla_nest_end(skb, list);
 
-	if (rule->ulen &&
-	    nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
-		goto nla_put_failure;
+	if (rule->udata) {
+		struct nft_userdata *udata = nft_userdata(rule);
+		if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
+			    udata->data) < 0)
+			goto nla_put_failure;
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -1896,11 +1899,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_userdata *udata;
 	struct nft_trans *trans = NULL;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
-	unsigned int size, i, n, ulen = 0;
+	unsigned int size, i, n, ulen = 0, usize = 0;
 	int err, rem;
 	bool create;
 	u64 handle, pos_handle;
@@ -1973,11 +1977,14 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	if (size >= 1 << 12)
 		goto err1;
 
-	if (nla[NFTA_RULE_USERDATA])
+	if (nla[NFTA_RULE_USERDATA]) {
 		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+		if (ulen > 0)
+			usize = sizeof(struct nft_userdata) + ulen;
+	}
 
 	err = -ENOMEM;
-	rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
 	if (rule == NULL)
 		goto err1;
 
@@ -1985,10 +1992,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 	rule->handle = handle;
 	rule->dlen   = size;
-	rule->ulen   = ulen;
+	rule->udata  = ulen ? 1 : 0;
 
-	if (ulen)
-		nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
+	if (ulen) {
+		udata = nft_userdata(rule);
+		udata->len = ulen - 1;
+		nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
+	}
 
 	expr = nft_expr_first(rule);
 	for (i = 0; i < n; i++) {
-- 
cgit v1.2.3


From 59900e0a019e7c2bdb7809a03ed5742d311b15b3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 4 Mar 2015 17:55:27 +0100
Subject: netfilter: nf_tables: fix error handling of rule replacement

In general, if a transaction object is added to the list successfully,
we can rely on the abort path to undo what we've done. This allows us to
simplify the error handling of the rule replacement path in
nf_tables_newrule().

This implicitly fixes an unnecessary removal of the old rule, which
needs to be left in place if we fail to replace.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 74e4b876c96e..6ab777912237 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2045,12 +2045,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 err3:
 	list_del_rcu(&rule->list);
-	if (trans) {
-		list_del_rcu(&nft_trans_rule(trans)->list);
-		nft_rule_clear(net, nft_trans_rule(trans));
-		nft_trans_destroy(trans);
-		chain->use++;
-	}
 err2:
 	nf_tables_rule_destroy(&ctx, rule);
 err1:
-- 
cgit v1.2.3


From 9145736d4862145684009d6a72a6e61324a9439e Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 3 Mar 2015 23:16:16 +0900
Subject: net: ping: Return EAFNOSUPPORT when appropriate.

1. For an IPv4 ping socket, ping_check_bind_addr does not check
   the family of the socket address that's passed in. Instead,
   make it behave like inet_bind, which enforces either that the
   address family is AF_INET, or that the family is AF_UNSPEC and
   the address is 0.0.0.0.
2. For an IPv6 ping socket, ping_check_bind_addr returns EINVAL
   if the socket family is not AF_INET6. Return EAFNOSUPPORT
   instead, for consistency with inet6_bind.
3. Make ping_v4_sendmsg and ping_v6_sendmsg return EAFNOSUPPORT
   instead of EINVAL if an incorrect socket address structure is
   passed in.
4. Make IPv6 ping sockets be IPv6-only. The code does not support
   IPv4, and it cannot easily be made to support IPv4 because
   the protocol numbers for ICMP and ICMPv6 are different. This
   makes connect(::ffff:192.0.2.1) fail with EAFNOSUPPORT instead
   of making the socket unusable.

Among other things, this fixes an oops that can be triggered by:

    int s = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
    struct sockaddr_in6 sin6 = {
        .sin6_family = AF_INET6,
        .sin6_addr = in6addr_any,
    };
    bind(s, (struct sockaddr *) &sin6, sizeof(sin6));

Change-Id: If06ca86d9f1e4593c0d6df174caca3487c57a241
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 12 ++++++++++--
 net/ipv6/ping.c |  5 +++--
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e9f66e1cda50..208d5439e59b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk)
 	kgid_t low, high;
 	int ret = 0;
 
+	if (sk->sk_family == AF_INET6)
+		sk->sk_ipv6only = 1;
+
 	inet_get_ping_group_range_net(net, &low, &high);
 	if (gid_lte(low, group) && gid_lte(group, high))
 		return 0;
@@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 		if (addr_len < sizeof(*addr))
 			return -EINVAL;
 
+		if (addr->sin_family != AF_INET &&
+		    !(addr->sin_family == AF_UNSPEC &&
+		      addr->sin_addr.s_addr == htonl(INADDR_ANY)))
+			return -EAFNOSUPPORT;
+
 		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
 			 sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
@@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 			return -EINVAL;
 
 		if (addr->sin6_family != AF_INET6)
-			return -EINVAL;
+			return -EAFNOSUPPORT;
 
 		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
 			 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
@@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 		if (msg->msg_namelen < sizeof(*usin))
 			return -EINVAL;
 		if (usin->sin_family != AF_INET)
-			return -EINVAL;
+			return -EAFNOSUPPORT;
 		daddr = usin->sin_addr.s_addr;
 		/* no remote port */
 	} else {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index bd46f736f61d..a2dfff6ff227 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -102,9 +102,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	if (msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
-		if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
-		    u->sin6_family != AF_INET6) {
+		if (msg->msg_namelen < sizeof(*u))
 			return -EINVAL;
+		if (u->sin6_family != AF_INET6) {
+			return -EAFNOSUPPORT;
 		}
 		if (sk->sk_bound_dev_if &&
 		    sk->sk_bound_dev_if != u->sin6_scope_id) {
-- 
cgit v1.2.3


From 3e32e733d1bbb3f227259dc782ef01d5706bdae0 Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Thu, 5 Mar 2015 10:29:39 +0300
Subject: ipv4: ip_check_defrag should not assume that skb_network_offset is
 zero

ip_check_defrag() may be used by af_packet to defragment outgoing packets.
skb_network_offset() of af_packet's outgoing packets is not zero.

Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2c8d98e728c0..145a50c4d566 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag);
 struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
 {
 	struct iphdr iph;
+	int netoff;
 	u32 len;
 
 	if (skb->protocol != htons(ETH_P_IP))
 		return skb;
 
-	if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0)
+	netoff = skb_network_offset(skb);
+
+	if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
 		return skb;
 
 	if (iph.ihl < 5 || iph.version != 4)
 		return skb;
 
 	len = ntohs(iph.tot_len);
-	if (skb->len < len || len < (iph.ihl * 4))
+	if (skb->len < netoff + len || len < (iph.ihl * 4))
 		return skb;
 
 	if (ip_is_fragment(&iph)) {
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (skb) {
-			if (!pskb_may_pull(skb, iph.ihl*4))
+			if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
 				return skb;
-			if (pskb_trim_rcsum(skb, len))
+			if (pskb_trim_rcsum(skb, netoff + len))
 				return skb;
 			memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 			if (ip_defrag(skb, user))
-- 
cgit v1.2.3


From 6c09fa09d468d730eecd7122122175da772d3b09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 5 Mar 2015 08:03:06 -0800
Subject: tcp: align tcp_xmit_size_goal() on tcp_tso_autosize()

With some mss values, it is possible tcp_xmit_size_goal() puts
one segment more in TSO packet than tcp_tso_autosize().

We send then one TSO packet followed by one single MSS.

It is not a serious bug, but we can do slightly better, especially
for drivers using netif_set_gso_max_size() to lower gso_max_size.

Using same formula avoids these corner cases and makes
tcp_xmit_size_goal() a bit faster.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 605ad7f184b6 ("tcp: refine TSO autosizing")
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d72a0fcd928..995a2259bcfc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 				       int large_allowed)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 new_size_goal, size_goal, hlen;
+	u32 new_size_goal, size_goal;
 
 	if (!large_allowed || !sk_can_gso(sk))
 		return mss_now;
 
-	/* Maybe we should/could use sk->sk_prot->max_header here ? */
-	hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
-	       inet_csk(sk)->icsk_ext_hdr_len +
-	       tp->tcp_header_len;
-
-	new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+	/* Note : tcp_tso_autosize() will eventually split this later */
+	new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
 	new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
 
 	/* We try hard to avoid divides here */
-- 
cgit v1.2.3


From 2c3fbe3cf28fbd7001545a92a83b4f8acfd9fa36 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 4 Mar 2015 10:39:03 +0100
Subject: net: irda: fix wait_until_sent poll timeout

In case an infinite timeout (0) is requested, the irda wait_until_sent
implementation would use a zero poll timeout rather than the default
200ms.

Note that wait_until_sent is currently never called with a 0-timeout
argument due to a bug in tty_wait_until_sent.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>	# v2.6.12
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/irda/ircomm/ircomm_tty.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 40695b9751c1..4efe486baee6 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -798,7 +798,9 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
 	orig_jiffies = jiffies;
 
 	/* Set poll time to 200 ms */
-	poll_time = IRDA_MIN(timeout, msecs_to_jiffies(200));
+	poll_time = msecs_to_jiffies(200);
+	if (timeout)
+		poll_time = min_t(unsigned long, timeout, poll_time);
 
 	spin_lock_irqsave(&self->spinlock, flags);
 	while (self->tx_skb && self->tx_skb->len) {
-- 
cgit v1.2.3


From 1711fd9addf214823b993468567cab1f8254fc51 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 7 Mar 2015 21:08:46 +0000
Subject: sunrpc: fix braino in ->poll()

POLL_OUT isn't what callers of ->poll() are expecting to see; it's
actually __SI_POLL | 2 and it's a siginfo code, not a poll bitmap
bit...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
Cc: Bruce Fields <bfields@fieldses.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 33fb105d4352..5199bb1a017e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -921,7 +921,7 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait,
 	poll_wait(filp, &queue_wait, wait);
 
 	/* alway allow write */
-	mask = POLL_OUT | POLLWRNORM;
+	mask = POLLOUT | POLLWRNORM;
 
 	if (!rp)
 		return mask;
-- 
cgit v1.2.3


From c247f0534cc5a5a547a343903f42295a471844e2 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 7 Mar 2015 20:33:22 -0500
Subject: ip: fix error queue empty skb handling

When reading from the error queue, msg_name and msg_control are only
populated for some errors. A new exception for empty timestamp skbs
added a false positive on icmp errors without payload.

`traceroute -M udpconn` only displayed gateways that return payload
with the icmp error: the embedded network headers are pulled before
sock_queue_err_skb, leaving an skb with skb->len == 0 otherwise.

Fix this regression by refining when msg_name and msg_control
branches are taken. The solutions for the two fields are independent.

msg_name only makes sense for errors that configure serr->port and
serr->addr_offset. Test the first instead of skb->len. This also fixes
another issue. saddr could hold the wrong data, as serr->addr_offset
is not initialized  in some code paths, pointing to the start of the
network header. It is only valid when serr->port is set (non-zero).

msg_control support differs between IPv4 and IPv6. IPv4 only honors
requests for ICMP and timestamps with SOF_TIMESTAMPING_OPT_CMSG. The
skb->len test can simply be removed, because skb->dev is also tested
and never true for empty skbs. IPv6 honors requests for all errors
aside from local errors and timestamps on empty skbs.

In both cases, make the policy more explicit by moving this logic to
a new function that decides whether to process msg_control and that
optionally prepares the necessary fields in skb->cb[]. After this
change, the IPv4 and IPv6 paths are more similar.

The last case is rxrpc. Here, simply refine to only match timestamps.

Fixes: 49ca0d8bfaf3 ("net-timestamp: no-payload option")

Reported-by: Jan Niehusmann <jan@gondor.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes
  v1->v2
  - fix local origin test inversion in ip6_datagram_support_cmsg
  - make v4 and v6 code paths more similar by introducing analogous
    ipv4_datagram_support_cmsg
  - fix compile bug in rxrpc
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 33 +++++++++++++++++++++++----------
 net/ipv6/datagram.c    | 39 ++++++++++++++++++++++++++++-----------
 net/rxrpc/ar-error.c   |  4 ++--
 3 files changed, 53 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 31d8c71986b4..5cd99271d3a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 		kfree_skb(skb);
 }
 
-static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
-					  const struct sk_buff *skb,
-					  int ee_origin)
+/* IPv4 supports cmsg on all imcp errors and some timestamps
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ipv4_datagram_support_cmsg(const struct sock *sk,
+				       struct sk_buff *skb,
+				       int ee_origin)
 {
-	struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+	struct in_pktinfo *info;
+
+	if (ee_origin == SO_EE_ORIGIN_ICMP)
+		return true;
 
-	if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
-	    (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+	if (ee_origin == SO_EE_ORIGIN_LOCAL)
+		return false;
+
+	/* Support IP_PKTINFO on tstamp packets if requested, to correlate
+	 * timestamp with egress dev. Not possible for packets without dev
+	 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+	 */
+	if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
 	    (!skb->dev))
 		return false;
 
+	info = PKTINFO_SKB_CB(skb);
 	info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
 	info->ipi_ifindex = skb->dev->ifindex;
 	return true;
@@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin && skb->len) {
+	if (sin && serr->port) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 						   serr->addr_offset);
@@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
 
-	if (skb->len &&
-	    (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
-	     ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
+	if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c215be70cac0..ace8daca5c83 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -325,14 +325,34 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	kfree_skb(skb);
 }
 
-static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
+/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL.
+ *
+ * At one point, excluding local errors was a quick test to identify icmp/icmp6
+ * errors. This is no longer true, but the test remained, so the v6 stack,
+ * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
+				      struct sock_exterr_skb *serr)
 {
-	int ifindex = skb->dev ? skb->dev->ifindex : -1;
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	    serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6)
+		return true;
+
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
+		return false;
+
+	if (!skb->dev)
+		return false;
 
 	if (skb->protocol == htons(ETH_P_IPV6))
-		IP6CB(skb)->iif = ifindex;
+		IP6CB(skb)->iif = skb->dev->ifindex;
 	else
-		PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
+		PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
+
+	return true;
 }
 
 /*
@@ -369,7 +389,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin && skb->len) {
+	if (sin && serr->port) {
 		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
@@ -394,14 +414,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
-	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
+
+	if (ip6_datagram_support_cmsg(skb, serr)) {
 		sin->sin6_family = AF_INET6;
-		if (np->rxopt.all) {
-			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
-			    serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
-				ip6_datagram_prepare_pktinfo_errqueue(skb);
+		if (np->rxopt.all)
 			ip6_datagram_recv_common_ctl(sk, msg, skb);
-		}
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 5394b6be46ec..0610efa83d72 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -42,7 +42,8 @@ void rxrpc_UDP_error_report(struct sock *sk)
 		_leave("UDP socket errqueue empty");
 		return;
 	}
-	if (!skb->len) {
+	serr = SKB_EXT_ERR(skb);
+	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
 		_leave("UDP empty message");
 		kfree_skb(skb);
 		return;
@@ -50,7 +51,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
 
 	rxrpc_new_skb(skb);
 
-	serr = SKB_EXT_ERR(skb);
 	addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
 	port = serr->port;
 
-- 
cgit v1.2.3


From 969439016d2cf61fef53a973d7e6d2061c3793b1 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 23 Feb 2015 20:37:54 +0100
Subject: can: add missing initialisations in CAN related skbuffs

When accessing CAN network interfaces with AF_PACKET sockets e.g. by dhclient
this can lead to a skb_under_panic due to missing skb initialisations.

Add the missing initialisations at the CAN skbuff creation times on driver
level (rx path) and in the network layer (tx path).

Reported-by: Austin Schuh <austin@peloton-tech.com>
Reported-by: Daniel Steer <daniel.steer@mclaren.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c | 8 ++++++++
 net/can/af_can.c      | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 3c82e02e3dae..b0f69248cb71 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -579,6 +579,10 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 
@@ -603,6 +607,10 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 66e08040ced7..32d710eaf1fc 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -259,6 +259,9 @@ int can_send(struct sk_buff *skb, int loop)
 		goto inval_skb;
 	}
 
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 
-- 
cgit v1.2.3


From 82f17091e68254d1612b42cf23291cad63cfaf04 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Mon, 9 Mar 2015 11:51:04 -0700
Subject: net: delete stale packet_mclist entries

When an interface is deleted from a net namespace the ifindex in the
corresponding entries in PF_PACKET sockets' mclists becomes stale.
This can create inconsistencies if later an interface with the same ifindex
is moved from a different namespace (not that unlikely since ifindexes are
per-namespace).
In particular we saw problems with dev->promiscuity, resulting
in "promiscuity touches roof, set promiscuity failed. promiscuity
feature of device might be broken" warnings and EOVERFLOW failures of
setsockopt(PACKET_ADD_MEMBERSHIP).
This patch deletes the mclist entries for interfaces that are deleted.
Since this now causes setsockopt(PACKET_DROP_MEMBERSHIP) to fail with
EADDRNOTAVAIL if called after the interface is deleted, also make
packet_mc_drop not fail.

Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5bf1e968a728..f8db7064d81c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3123,11 +3123,18 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 	return 0;
 }
 
-static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
+static void packet_dev_mclist_delete(struct net_device *dev,
+				     struct packet_mclist **mlp)
 {
-	for ( ; i; i = i->next) {
-		if (i->ifindex == dev->ifindex)
-			packet_dev_mc(dev, i, what);
+	struct packet_mclist *ml;
+
+	while ((ml = *mlp) != NULL) {
+		if (ml->ifindex == dev->ifindex) {
+			packet_dev_mc(dev, ml, -1);
+			*mlp = ml->next;
+			kfree(ml);
+		} else
+			mlp = &ml->next;
 	}
 }
 
@@ -3204,12 +3211,11 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
 					packet_dev_mc(dev, ml, -1);
 				kfree(ml);
 			}
-			rtnl_unlock();
-			return 0;
+			break;
 		}
 	}
 	rtnl_unlock();
-	return -EADDRNOTAVAIL;
+	return 0;
 }
 
 static void packet_flush_mclist(struct sock *sk)
@@ -3559,7 +3565,7 @@ static int packet_notifier(struct notifier_block *this,
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 			if (po->mclist)
-				packet_dev_mclist(dev, po->mclist, -1);
+				packet_dev_mclist_delete(dev, &po->mclist);
 			/* fallthrough */
 
 		case NETDEV_DOWN:
-- 
cgit v1.2.3


From e6441bae326271090755e1707196ad05aa1dc703 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 9 Mar 2015 16:16:22 -0400
Subject: tipc: fix bug in link failover handling

In commit c637c1035534867b85b78b453c38c495b58e2c5a
("tipc: resolve race problem at unicast message reception") we
introduced a new mechanism for delivering buffers upwards from link
to socket layer.

That code contains a bug in how we handle the new link input queue
during failover. When a link is reset, some of its users may be blocked
because of congestion, and in order to resolve this, we add any pending
wakeup pseudo messages to the link's input queue, and deliver them to
the socket. This misses the case where the other, remaining link also
may have congested users. Currently, the owner node's reference to the
remaining link's input queue is unconditionally overwritten by the
reset link's input queue. This has the effect that wakeup events from
the remaining link may be unduely delayed (but not lost) for a
potentially long period.

We fix this by adding the pending events from the reset link to the
input queue that is currently referenced by the node, whichever one
it is.

This commit should be applied to both net and net-next.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index a4cf364316de..14f09b3cb87c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -464,10 +464,11 @@ void tipc_link_reset(struct tipc_link *l_ptr)
 	/* Clean up all queues, except inputq: */
 	__skb_queue_purge(&l_ptr->outqueue);
 	__skb_queue_purge(&l_ptr->deferred_queue);
-	skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq);
-	if (!skb_queue_empty(&l_ptr->inputq))
+	if (!owner->inputq)
+		owner->inputq = &l_ptr->inputq;
+	skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq);
+	if (!skb_queue_empty(owner->inputq))
 		owner->action_flags |= TIPC_MSG_EVT;
-	owner->inputq = &l_ptr->inputq;
 	l_ptr->next_out = NULL;
 	l_ptr->unacked_window = 0;
 	l_ptr->checkpoint = 1;
-- 
cgit v1.2.3


From 5778d39d070b4ac5f889928175b7f2d53ae7504e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 9 Mar 2015 17:03:40 -0700
Subject: net_sched: fix struct tc_u_hnode layout in u32

We dynamically allocate divisor+1 entries for ->ht[] in tc_u_hnode:

  ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);

So ->ht is supposed to be the last field of this struct, however
this is broken, since an rcu head is appended after it.

Fixes: 1ce87720d456 ("net: sched: make cls_u32 lockless")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 09487afbfd51..95fdf4e40051 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -78,8 +78,11 @@ struct tc_u_hnode {
 	struct tc_u_common	*tp_c;
 	int			refcnt;
 	unsigned int		divisor;
-	struct tc_u_knode __rcu	*ht[1];
 	struct rcu_head		rcu;
+	/* The 'ht' field MUST be the last field in structure to allow for
+	 * more entries allocated at end of structure.
+	 */
+	struct tc_u_knode __rcu	*ht[1];
 };
 
 struct tc_u_common {
-- 
cgit v1.2.3


From 7768eed8bf1d2e5eefa38c573f15f737a9824052 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 10 Mar 2015 19:03:46 +0100
Subject: net: add comment for sock_efree() usage

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 93c8b20c91e4..78e89eb7eb70 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1655,6 +1655,10 @@ void sock_rfree(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_rfree);
 
+/*
+ * Buffer destructor for skbs that are not used directly in read or write
+ * path, e.g. for error handler skbs. Automatically called from kfree_skb.
+ */
 void sock_efree(struct sk_buff *skb)
 {
 	sock_put(skb->sk);
-- 
cgit v1.2.3


From 4363890079674db7b00cf1bb0e6fa430e846e86b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Mar 2015 21:58:32 -0400
Subject: net: Handle unregister properly when netdev namespace change fails.

If rtnl_newlink() fails on it's call to dev_change_net_namespace(), we
have to make use of the ->dellink() method, if present, just like we
do when rtnl_configure_link() fails.

Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 25b4b5d23485..ee0608bb3bc0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2166,28 +2166,28 @@ replay:
 			}
 		}
 		err = rtnl_configure_link(dev, ifm);
-		if (err < 0) {
-			if (ops->newlink) {
-				LIST_HEAD(list_kill);
-
-				ops->dellink(dev, &list_kill);
-				unregister_netdevice_many(&list_kill);
-			} else {
-				unregister_netdevice(dev);
-			}
-			goto out;
-		}
-
+		if (err < 0)
+			goto out_unregister;
 		if (link_net) {
 			err = dev_change_net_namespace(dev, dest_net, ifname);
 			if (err < 0)
-				unregister_netdevice(dev);
+				goto out_unregister;
 		}
 out:
 		if (link_net)
 			put_net(link_net);
 		put_net(dest_net);
 		return err;
+out_unregister:
+		if (ops->newlink) {
+			LIST_HEAD(list_kill);
+
+			ops->dellink(dev, &list_kill);
+			unregister_netdevice_many(&list_kill);
+		} else {
+			unregister_netdevice(dev);
+		}
+		goto out;
 	}
 }
 
-- 
cgit v1.2.3


From 9949afa42be0b76f5832db112ce51bb6b35b2abb Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 10 Mar 2015 17:17:03 -0400
Subject: tcp: fix tcp_cong_avoid_ai() credit accumulation bug with decreases
 in w

The recent change to tcp_cong_avoid_ai() to handle stretch ACKs
introduced a bug where snd_cwnd_cnt could accumulate a very large
value while w was large, and then if w was reduced snd_cwnd could be
incremented by a large delta, leading to a large burst and high packet
loss. This was tickled when CUBIC's bictcp_update() sets "ca->cnt =
100 * cwnd".

This bug crept in while preparing the upstream version of
814d488c6126.

Testing: This patch has been tested in datacenter netperf transfers
and live youtube.com and google.com servers.

Fixes: 814d488c6126 ("tcp: fix the timid additive increase on stretch ACKs")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d694088214cd..62856e185a93 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
  */
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
 {
+	/* If credits accumulated at a higher w, apply them gently now. */
+	if (tp->snd_cwnd_cnt >= w) {
+		tp->snd_cwnd_cnt = 0;
+		tp->snd_cwnd++;
+	}
+
 	tp->snd_cwnd_cnt += acked;
 	if (tp->snd_cwnd_cnt >= w) {
 		u32 delta = tp->snd_cwnd_cnt / w;
-- 
cgit v1.2.3


From d578e18ce93f5d33a7120fd57c453e22a4c0fc37 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 10 Mar 2015 17:17:04 -0400
Subject: tcp: restore 1.5x per RTT limit to CUBIC cwnd growth in congestion
 avoidance

Commit 814d488c6126 ("tcp: fix the timid additive increase on stretch
ACKs") fixed a bug where tcp_cong_avoid_ai() would either credit a
connection with an increase of snd_cwnd_cnt, or increase snd_cwnd, but
not both, resulting in cwnd increasing by 1 packet on at most every
alternate invocation of tcp_cong_avoid_ai().

Although the commit correctly implemented the CUBIC algorithm, which
can increase cwnd by as much as 1 packet per 1 packet ACKed (2x per
RTT), in practice that could be too aggressive: in tests on network
paths with small buffers, YouTube server retransmission rates nearly
doubled.

This commit restores CUBIC to a maximum cwnd growth rate of 1 packet
per 2 packets ACKed (1.5x per RTT). In YouTube tests this restored
retransmit rates to low levels.

Testing: This patch has been tested in datacenter netperf transfers
and live youtube.com and google.com servers.

Fixes: 9cd981dcf174 ("tcp: fix stretch ACK bugs in CUBIC")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4b276d1ed980..06d3d665a9fd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -306,8 +306,10 @@ tcp_friendliness:
 		}
 	}
 
-	if (ca->cnt == 0)			/* cannot be zero */
-		ca->cnt = 1;
+	/* The maximum rate of cwnd increase CUBIC allows is 1 packet per
+	 * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
+	 */
+	ca->cnt = max(ca->cnt, 2U);
 }
 
 static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-- 
cgit v1.2.3


From b1cb59cf2efe7971d3d72a7b963d09a512d994c9 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Wed, 11 Mar 2015 14:29:17 +0300
Subject: net: sysctl_net_core: check SNDBUF and RCVBUF for min length

sysctl has sysctl.net.core.rmem_*/wmem_* parameters which can be
set to incorrect values. Given that 'struct sk_buff' allocates from
rcvbuf, incorrectly set buffer length could result to memory
allocation failures. For example, set them as follows:

    # sysctl net.core.rmem_default=64
      net.core.wmem_default = 64
    # sysctl net.core.wmem_default=64
      net.core.wmem_default = 64
    # ping localhost -s 1024 -i 0 > /dev/null

This could result to the following failure:

skbuff: skb_over_panic: text:ffffffff81628db4 len:-32 put:-32
head:ffff88003a1cc200 data:ffff88003a1cc200 tail:0xffffffe0 end:0xc0 dev:<NULL>
kernel BUG at net/core/skbuff.c:102!
invalid opcode: 0000 [#1] SMP
...
task: ffff88003b7f5550 ti: ffff88003ae88000 task.ti: ffff88003ae88000
RIP: 0010:[<ffffffff8155fbd1>]  [<ffffffff8155fbd1>] skb_put+0xa1/0xb0
RSP: 0018:ffff88003ae8bc68  EFLAGS: 00010296
RAX: 000000000000008d RBX: 00000000ffffffe0 RCX: 0000000000000000
RDX: ffff88003fdcf598 RSI: ffff88003fdcd9c8 RDI: ffff88003fdcd9c8
RBP: ffff88003ae8bc88 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000001 R11: 00000000000002b2 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88003d3f7300 R15: ffff88000012a900
FS:  00007fa0e2b4a840(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000d0f7e0 CR3: 000000003b8fb000 CR4: 00000000000006f0
Stack:
 ffff88003a1cc200 00000000ffffffe0 00000000000000c0 ffffffff818cab1d
 ffff88003ae8bd68 ffffffff81628db4 ffff88003ae8bd48 ffff88003b7f5550
 ffff880031a09408 ffff88003b7f5550 ffff88000012aa48 ffff88000012ab00
Call Trace:
 [<ffffffff81628db4>] unix_stream_sendmsg+0x2c4/0x470
 [<ffffffff81556f56>] sock_write_iter+0x146/0x160
 [<ffffffff811d9612>] new_sync_write+0x92/0xd0
 [<ffffffff811d9cd6>] vfs_write+0xd6/0x180
 [<ffffffff811da499>] SyS_write+0x59/0xd0
 [<ffffffff81651532>] system_call_fastpath+0x12/0x17
Code: 00 00 48 89 44 24 10 8b 87 c8 00 00 00 48 89 44 24 08 48 8b 87 d8 00
      00 00 48 c7 c7 30 db 91 81 48 89 04 24 31 c0 e8 4f a8 0e 00 <0f> 0b
      eb fe 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83
RIP  [<ffffffff8155fbd1>] skb_put+0xa1/0xb0
RSP <ffff88003ae8bc68>
Kernel panic - not syncing: Fatal exception

Moreover, the possible minimum is 1, so we can get another kernel panic:
...
BUG: unable to handle kernel paging request at ffff88013caee5c0
IP: [<ffffffff815604cf>] __alloc_skb+0x12f/0x1f0
...

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 433424804284..8ce351ffceb1 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,6 +25,8 @@
 static int zero = 0;
 static int one = 1;
 static int ushort_max = USHRT_MAX;
+static int min_sndbuf = SOCK_MIN_SNDBUF;
+static int min_rcvbuf = SOCK_MIN_RCVBUF;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
@@ -237,7 +239,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_sndbuf,
 	},
 	{
 		.procname	= "rmem_max",
@@ -245,7 +247,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_rcvbuf,
 	},
 	{
 		.procname	= "wmem_default",
@@ -253,7 +255,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_sndbuf,
 	},
 	{
 		.procname	= "rmem_default",
@@ -261,7 +263,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_rcvbuf,
 	},
 	{
 		.procname	= "dev_weight",
-- 
cgit v1.2.3


From c29390c6dfeee0944ac6b5610ebbe403944378fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Mar 2015 18:42:02 -0700
Subject: xps: must clear sender_cpu before forwarding

John reported that my previous commit added a regression
on his router.

This is because sender_cpu & napi_id share a common location,
so get_xps_queue() can see garbage and perform an out of bound access.

We need to make sure sender_cpu is cleared before doing the transmit,
otherwise any NIC busy poll enabled (skb_mark_napi_id()) can trigger
this bug.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: John <jw@nuclearfallout.net>
Bisected-by: John <jw@nuclearfallout.net>
Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++++
 net/core/skbuff.c      | 2 +-
 net/ipv4/ip_forward.c  | 1 +
 net/ipv6/ip6_output.c  | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 30007afe70b3..f54d6659713a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 	to->l4_hash = from->l4_hash;
 };
 
+static inline void skb_sender_cpu_clear(struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	skb->sender_cpu = 0;
+#endif
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f80507823531..434e78e5254d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4173,7 +4173,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
-	skb->sender_cpu = 0;
+	skb_sender_cpu_clear(skb);
 	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 787b3c294ce6..d9bc28ac5d1b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb)
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
+	skb_sender_cpu_clear(skb);
 	return dst_output(skb);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0a04a37305d5..7e80b61b51ff 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
+	skb_sender_cpu_clear(skb);
 	return dst_output(skb);
 }
 
-- 
cgit v1.2.3


From 3a8dd9711e0792f64394edafadd66c2d1f1904df Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 11 Mar 2015 15:43:55 -0400
Subject: sock: fix possible NULL sk dereference in __skb_tstamp_tx

Test that sk != NULL before reading sk->sk_tsflags.

Fixes: 49ca0d8bfaf3 ("net-timestamp: no-payload option")
Reported-by: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 434e78e5254d..8e4ac97c8477 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3733,9 +3733,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
-	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+	bool tsonly;
 
-	if (!sk || !skb_may_tx_timestamp(sk, tsonly))
+	if (!sk)
+		return;
+
+	tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+	if (!skb_may_tx_timestamp(sk, tsonly))
 		return;
 
 	if (tsonly)
-- 
cgit v1.2.3


From f862e07cf95d5b62a5fc5e981dd7d0dbaf33a501 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Mar 2015 22:46:59 +0100
Subject: rds: avoid potential stack overflow

The rds_iw_update_cm_id function stores a large 'struct rds_sock' object
on the stack in order to pass a pair of addresses. This happens to just
fit withint the 1024 byte stack size warning limit on x86, but just
exceed that limit on ARM, which gives us this warning:

net/rds/iw_rdma.c:200:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]

As the use of this large variable is basically bogus, we can rearrange
the code to not do that. Instead of passing an rds socket into
rds_iw_get_device, we now just pass the two addresses that we have
available in rds_iw_update_cm_id, and we change rds_iw_get_mr accordingly,
to create two address structures on the stack there.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/iw_rdma.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index a817705ce2d0..dba8d0864f18 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 			int *unpinned);
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 
-static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
+static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst,
+			     struct rds_iw_device **rds_iwdev,
+			     struct rdma_cm_id **cm_id)
 {
 	struct rds_iw_device *iwdev;
 	struct rds_iw_cm_id *i_cm_id;
@@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
 				src_addr->sin_port,
 				dst_addr->sin_addr.s_addr,
 				dst_addr->sin_port,
-				rs->rs_bound_addr,
-				rs->rs_bound_port,
-				rs->rs_conn_addr,
-				rs->rs_conn_port);
+				src->sin_addr.s_addr,
+				src->sin_port,
+				dst->sin_addr.s_addr,
+				dst->sin_port);
 #ifdef WORKING_TUPLE_DETECTION
-			if (src_addr->sin_addr.s_addr == rs->rs_bound_addr &&
-			    src_addr->sin_port == rs->rs_bound_port &&
-			    dst_addr->sin_addr.s_addr == rs->rs_conn_addr &&
-			    dst_addr->sin_port == rs->rs_conn_port) {
+			if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr &&
+			    src_addr->sin_port == src->sin_port &&
+			    dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr &&
+			    dst_addr->sin_port == dst->sin_port) {
 #else
 			/* FIXME - needs to compare the local and remote
 			 * ipaddr/port tuple, but the ipaddr is the only
@@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
 			 * zero'ed.  It doesn't appear to be properly populated
 			 * during connection setup...
 			 */
-			if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) {
+			if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) {
 #endif
 				spin_unlock_irq(&iwdev->spinlock);
 				*rds_iwdev = iwdev;
@@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
 {
 	struct sockaddr_in *src_addr, *dst_addr;
 	struct rds_iw_device *rds_iwdev_old;
-	struct rds_sock rs;
 	struct rdma_cm_id *pcm_id;
 	int rc;
 
 	src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
 	dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
 
-	rs.rs_bound_addr = src_addr->sin_addr.s_addr;
-	rs.rs_bound_port = src_addr->sin_port;
-	rs.rs_conn_addr = dst_addr->sin_addr.s_addr;
-	rs.rs_conn_port = dst_addr->sin_port;
-
-	rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id);
+	rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id);
 	if (rc)
 		rds_iw_remove_cm_id(rds_iwdev, cm_id);
 
@@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
 	struct rds_iw_device *rds_iwdev;
 	struct rds_iw_mr *ibmr = NULL;
 	struct rdma_cm_id *cm_id;
+	struct sockaddr_in src = {
+		.sin_addr.s_addr = rs->rs_bound_addr,
+		.sin_port = rs->rs_bound_port,
+	};
+	struct sockaddr_in dst = {
+		.sin_addr.s_addr = rs->rs_conn_addr,
+		.sin_port = rs->rs_conn_port,
+	};
 	int ret;
 
-	ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id);
+	ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id);
 	if (ret || !cm_id) {
 		ret = -ENODEV;
 		goto out;
-- 
cgit v1.2.3


From 78146572b9cd20452da47951812f35b1ad4906be Mon Sep 17 00:00:00 2001
From: Ian Wilson <iwilson@brocade.com>
Date: Thu, 12 Mar 2015 09:37:58 +0000
Subject: netfilter: Zero the tuple in nfnl_cthelper_parse_tuple()

nfnl_cthelper_parse_tuple() is called from nfnl_cthelper_new(),
nfnl_cthelper_get() and nfnl_cthelper_del().  In each case they pass
a pointer to an nf_conntrack_tuple data structure local variable:

    struct nf_conntrack_tuple tuple;
    ...
    ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]);

The problem is that this local variable is not initialized, and
nfnl_cthelper_parse_tuple() only initializes two fields: src.l3num and
dst.protonum.  This leaves all other fields with undefined values
based on whatever is on the stack:

    tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
    tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);

The symptom observed was that when the rpc and tns helpers were added
then traffic to port 1536 was being sent to user-space.

Signed-off-by: Ian Wilson <iwilson@brocade.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index a5599fc51a6f..54330fb5efaf 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
 	if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
 		return -EINVAL;
 
+	/* Not all fields are initialized so first zero the tuple */
+	memset(tuple, 0, sizeof(struct nf_conntrack_tuple));
+
 	tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
 	tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
 
-- 
cgit v1.2.3


From 8051a2a518fcf3827a143470083ad6008697ff17 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 12 Mar 2015 11:53:41 +1030
Subject: 9p/trans_virtio: fix hot-unplug

On device hot-unplug, 9p/virtio currently will kfree channel while
it might still be in use.

Of course, it might stay used forever, so it's an extremely ugly hack,
but it seems better than use-after-free that we have now.

[ Unused variable removed, whitespace cleanup, msg single-lined --RR ]
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 net/9p/trans_virtio.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index d8e376a5f0f1..36a1a739ad68 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -658,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 static void p9_virtio_remove(struct virtio_device *vdev)
 {
 	struct virtio_chan *chan = vdev->priv;
-
-	if (chan->inuse)
-		p9_virtio_close(chan->client);
-	vdev->config->del_vqs(vdev);
+	unsigned long warning_time;
 
 	mutex_lock(&virtio_9p_lock);
+
+	/* Remove self from list so we don't get new users. */
 	list_del(&chan->chan_list);
+	warning_time = jiffies;
+
+	/* Wait for existing users to close. */
+	while (chan->inuse) {
+		mutex_unlock(&virtio_9p_lock);
+		msleep(250);
+		if (time_after(jiffies, warning_time + 10 * HZ)) {
+			dev_emerg(&vdev->dev,
+				  "p9_virtio_remove: waiting for device in use.\n");
+			warning_time = jiffies;
+		}
+		mutex_lock(&virtio_9p_lock);
+	}
+
 	mutex_unlock(&virtio_9p_lock);
+
+	vdev->config->del_vqs(vdev);
+
 	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
 	kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
 	kfree(chan->tag);
-- 
cgit v1.2.3


From d8bdff59cea141d2e5f7e98c1b11d3e0271640bd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 13 Mar 2015 10:52:14 +1100
Subject: netfilter: Fix potential crash in nft_hash walker

When we get back an EAGAIN from rhashtable_walk_next we were
treating it as a valid object which obviously doesn't work too
well.

Luckily this is hard to trigger so it seems nobody has run into
it yet.

This patch fixes it by redoing the next call when we get an EAGAIN.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_hash.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c82df0a48fcd..37c15e674884 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 				iter->err = err;
 				goto out;
 			}
+
+			continue;
 		}
 
 		if (iter->count < iter->skip)
-- 
cgit v1.2.3


From c8e2c80d7ec00d020320f905822bf49c5ad85250 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 13 Mar 2015 09:49:59 -0700
Subject: inet_diag: fix possible overflow in inet_diag_dump_one_icsk()

inet_diag_dump_one_icsk() allocates too small skb.

Add inet_sk_attr_size() helper right before inet_sk_diag_fill()
so that it can be updated if/when new attributes are added.

iproute2/ss currently does not use this dump_one() interface,
this might explain nobody noticed this problem yet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 81751f12645f..592aff37366b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler(
 	mutex_unlock(&inet_diag_table_mutex);
 }
 
+static size_t inet_sk_attr_size(void)
+{
+	return	  nla_total_size(sizeof(struct tcp_info))
+		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+		+ nla_total_size(1) /* INET_DIAG_TOS */
+		+ nla_total_size(1) /* INET_DIAG_TCLASS */
+		+ nla_total_size(sizeof(struct inet_diag_meminfo))
+		+ nla_total_size(sizeof(struct inet_diag_msg))
+		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+		+ nla_total_size(TCP_CA_NAME_MAX)
+		+ nla_total_size(sizeof(struct tcpvegas_info))
+		+ 64;
+}
+
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,		      	
@@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 	if (err)
 		goto out;
 
-	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
-			sizeof(struct inet_diag_meminfo) +
-			sizeof(struct tcp_info) + 64, GFP_KERNEL);
+	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
 		err = -ENOMEM;
 		goto out;
-- 
cgit v1.2.3


From 4c906c279886550d2aaac6facf71d709158e4e3c Mon Sep 17 00:00:00 2001
From: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Date: Fri, 13 Mar 2015 07:08:22 -0700
Subject: bridge: reset bridge mtu after deleting an interface

On adding an interface br_add_if() sets the MTU to the min of
all the interfaces. Do the same thing on removing an interface too
in br_del_if.

Signed-off-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b087d278c679..1849d96b3c91 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -563,6 +563,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
+	dev_set_mtu(br->dev, br_min_mtu(br));
+
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);
 	spin_unlock_bh(&br->lock);
-- 
cgit v1.2.3


From 3eeff778e00c956875c70b145c52638c313dfb23 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 14 Mar 2015 05:22:21 +0000
Subject: caif: fix MSG_OOB test in caif_seqpkt_recvmsg()

It should be checking flags, not msg->msg_flags.  It's ->sendmsg()
instances that need to look for that in ->msg_flags, ->recvmsg() ones
(including the other ->recvmsg() instance in that file, as well as
unix_dgram_recvmsg() this one claims to be imitating) check in flags.
Braino had been introduced in commit dcda13 ("caif: Bugfix - use MSG_TRUNC
in receive") back in 2010, so it goes quite a while back.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 769b185fefbd..a6e2da0bc718 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -281,7 +281,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int copylen;
 
 	ret = -EOPNOTSUPP;
-	if (m->msg_flags&MSG_OOB)
+	if (flags & MSG_OOB)
 		goto read_error;
 
 	skb = skb_recv_datagram(sk, flags, 0 , &ret);
-- 
cgit v1.2.3


From 7d985ed1dca5c90535d67ce92ef6ca520302340a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 14 Mar 2015 05:34:56 +0000
Subject: rxrpc: bogus MSG_PEEK test in rxrpc_recvmsg()

[I would really like an ACK on that one from dhowells; it appears to be
quite straightforward, but...]

MSG_PEEK isn't passed to ->recvmsg() via msg->msg_flags; as the matter of
fact, neither the kernel users of rxrpc, nor the syscalls ever set that bit
in there.  It gets passed via flags; in fact, another such check in the same
function is done correctly - as flags & MSG_PEEK.

It had been that way (effectively disabled) for 8 years, though, so the patch
needs beating up - that case had never been tested.  If it is correct, it's
-stable fodder.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-recvmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4575485ad1b4..19a560626dc4 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (!skb) {
 			/* nothing remains on the queue */
 			if (copied &&
-			    (msg->msg_flags & MSG_PEEK || timeo == 0))
+			    (flags & MSG_PEEK || timeo == 0))
 				goto out;
 
 			/* wait for a message to turn up */
-- 
cgit v1.2.3


From 0f611d28fc2e13cfec64e1c544c16a086886805a Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Thu, 12 Mar 2015 08:53:30 +0200
Subject: mac80211: count interfaces correctly for combination checks

Since moving the interface combination checks to mac80211, it's
broken because it now only considers interfaces with an assigned
channel context, so for example any interface that isn't active
can still be up, which is clearly an issue; also, in particular
P2P-Device wdevs are an issue since they never have a chanctx.

Fix this by counting running interfaces instead the ones with a
channel context assigned.

Cc: stable@vger.kernel.org [3.16+]
Fixes: 73de86a38962b ("cfg80211/mac80211: move interface counting for combination check to mac80211")
Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
[rewrite commit message, dig out the commit it fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8428f4a95479..747bdcf72e92 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3178,7 +3178,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
 		wdev_iter = &sdata_iter->wdev;
 
 		if (sdata_iter == sdata ||
-		    rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL ||
+		    !ieee80211_sdata_running(sdata_iter) ||
 		    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
 			continue;
 
-- 
cgit v1.2.3


From 70a3fd6c61c46c07c63cab935dca9a17d8de1709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Mar 2015 08:53:29 +0200
Subject: mac80211: ask for ECSA IE to be considered for beacon parse CRC

When a beacon from the AP contains only the ECSA IE, and not a CSA IE
as well, this ECSA IE is not considered for calculating the CRC and
the beacon might be dropped as not being interesting. This is clearly
wrong, it should be handled and the channel switch should be executed.

Fix this by including the ECSA IE ID in the bitmap of interesting IEs.

Reported-by: Gil Tribush <gil.tribush@intel.com>
Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 10ac6324c1d0..cde8cd3d6595 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3204,7 +3204,8 @@ static const u64 care_about_ies =
 	(1ULL << WLAN_EID_CHANNEL_SWITCH) |
 	(1ULL << WLAN_EID_PWR_CONSTRAINT) |
 	(1ULL << WLAN_EID_HT_CAPABILITY) |
-	(1ULL << WLAN_EID_HT_OPERATION);
+	(1ULL << WLAN_EID_HT_OPERATION) |
+	(1ULL << WLAN_EID_EXT_CHANSWITCH_ANN);
 
 static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgmt *mgmt, size_t len,
-- 
cgit v1.2.3


From 496fcc294daab18799e190c0264863d653588d1f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Mar 2015 08:53:27 +0200
Subject: nl80211: ignore HT/VHT capabilities without QoS/WMM

As HT/VHT depend heavily on QoS/WMM, it's not a good idea to
let userspace add clients that have HT/VHT but not QoS/WMM.
Since it does so in certain cases we've observed (client is
using HT IEs but not QoS/WMM) just ignore the HT/VHT info at
this point and don't pass it down to the drivers which might
unconditionally use it.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index be2501538011..b6f84f6a2a09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4400,6 +4400,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
+	/* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT
+	 * as userspace might just pass through the capabilities from the IEs
+	 * directly, rather than enforcing this restriction and returning an
+	 * error in this case.
+	 */
+	if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
+		params.ht_capa = NULL;
+		params.vht_capa = NULL;
+	}
+
 	/* When you run into this, adjust the code below for the new flag */
 	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
 
-- 
cgit v1.2.3


From f84eaa1068315409ffbef57e6fea312180787db3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Mar 2015 08:53:26 +0200
Subject: mac80211: ignore CSA to same channel

If the AP is confused and starts doing a CSA to the same channel,
just ignore that request instead of trying to act it out since it
was likely sent in error anyway.

In the case of the bug I was investigating the GO was misbehaving
and sending out a beacon with CSA IEs still included after having
actually done the channel switch.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c0e089c194f1..8d53d65bd2ab 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -464,6 +464,7 @@ struct ieee80211_if_managed {
 	unsigned int flags;
 
 	bool csa_waiting_bcn;
+	bool csa_ignored_same_chan;
 
 	bool beacon_crc_valid;
 	u32 beacon_crc;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cde8cd3d6595..142f66aece18 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1150,6 +1150,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
+	if (cfg80211_chandef_identical(&csa_ie.chandef,
+				       &sdata->vif.bss_conf.chandef)) {
+		if (ifmgd->csa_ignored_same_chan)
+			return;
+		sdata_info(sdata,
+			   "AP %pM tries to chanswitch to same channel, ignore\n",
+			   ifmgd->associated->bssid);
+		ifmgd->csa_ignored_same_chan = true;
+		return;
+	}
+
 	mutex_lock(&local->mtx);
 	mutex_lock(&local->chanctx_mtx);
 	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
@@ -1210,6 +1221,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	sdata->vif.csa_active = true;
 	sdata->csa_chandef = csa_ie.chandef;
 	sdata->csa_block_tx = csa_ie.mode;
+	ifmgd->csa_ignored_same_chan = false;
 
 	if (sdata->csa_block_tx)
 		ieee80211_stop_vif_queues(local, sdata,
@@ -2090,6 +2102,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	sdata->vif.csa_active = false;
 	ifmgd->csa_waiting_bcn = false;
+	ifmgd->csa_ignored_same_chan = false;
 	if (sdata->csa_block_tx) {
 		ieee80211_wake_vif_queues(local, sdata,
 					  IEEE80211_QUEUE_STOP_REASON_CSA);
-- 
cgit v1.2.3


From d6b6cb1d3e6f78d55c2d4043d77d0d8def3f3b99 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 17 Mar 2015 13:21:42 +0100
Subject: netfilter: nf_tables: allow to change chain policy without hook if it
 exists

If there's an existing base chain, we have to allow to change the
default policy without indicating the hook information.

However, if the chain doesn't exists, we have to enforce the presence of
the hook attribute.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6ab777912237..ac1a9528dbf2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 
 	if (nla[NFTA_CHAIN_POLICY]) {
 		if ((chain != NULL &&
-		    !(chain->flags & NFT_BASE_CHAIN)) ||
+		    !(chain->flags & NFT_BASE_CHAIN)))
+			return -EOPNOTSUPP;
+
+		if (chain == NULL &&
 		    nla[NFTA_CHAIN_HOOK] == NULL)
 			return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From 37355565ba57fd45f78f0934305be2761b641f8f Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 16 Mar 2015 15:56:05 +0100
Subject: ip6_tunnel: fix error code when tunnel exists

After commit 2b0bb01b6edb, the kernel returns -ENOBUFS when user tries to add
an existing tunnel with ioctl API:
$ ip -6 tunnel add ip6tnl1 mode ip6ip6 dev eth1
add tunnel "ip6tnl0" failed: No buffer space available

It's confusing, the right error is EEXIST.

This patch also change a bit the code returned:
 - ENOBUFS -> ENOMEM
 - ENOENT -> ENODEV

Fixes: 2b0bb01b6edb ("ip6_tunnel: Return an error when adding an existing tunnel.")
CC: Steffen Klassert <steffen.klassert@secunet.com>
Reported-by: Pierre Cheynier <me@pierre-cheynier.net>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 266a264ec212..ddd94eca19b3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -314,7 +314,7 @@ out:
  *   Create tunnel matching given parameters.
  *
  * Return:
- *   created tunnel or NULL
+ *   created tunnel or error pointer
  **/
 
 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
@@ -322,7 +322,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	struct net_device *dev;
 	struct ip6_tnl *t;
 	char name[IFNAMSIZ];
-	int err;
+	int err = -ENOMEM;
 
 	if (p->name[0])
 		strlcpy(name, p->name, IFNAMSIZ);
@@ -348,7 +348,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 failed_free:
 	ip6_dev_free(dev);
 failed:
-	return NULL;
+	return ERR_PTR(err);
 }
 
 /**
@@ -362,7 +362,7 @@ failed:
  *   tunnel device is created and registered for use.
  *
  * Return:
- *   matching tunnel or NULL
+ *   matching tunnel or error pointer
  **/
 
 static struct ip6_tnl *ip6_tnl_locate(struct net *net,
@@ -380,13 +380,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 		    ipv6_addr_equal(remote, &t->parms.raddr)) {
 			if (create)
-				return NULL;
+				return ERR_PTR(-EEXIST);
 
 			return t;
 		}
 	}
 	if (!create)
-		return NULL;
+		return ERR_PTR(-ENODEV);
 	return ip6_tnl_create(net, p);
 }
 
@@ -1420,7 +1420,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			}
 			ip6_tnl_parm_from_user(&p1, &p);
 			t = ip6_tnl_locate(net, &p1, 0);
-			if (t == NULL)
+			if (IS_ERR(t))
 				t = netdev_priv(dev);
 		} else {
 			memset(&p, 0, sizeof(p));
@@ -1445,7 +1445,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		ip6_tnl_parm_from_user(&p1, &p);
 		t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
 		if (cmd == SIOCCHGTUNNEL) {
-			if (t != NULL) {
+			if (!IS_ERR(t)) {
 				if (t->dev != dev) {
 					err = -EEXIST;
 					break;
@@ -1457,14 +1457,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			else
 				err = ip6_tnl_update(t, &p1);
 		}
-		if (t) {
+		if (!IS_ERR(t)) {
 			err = 0;
 			ip6_tnl_parm_to_user(&p, &t->parms);
 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 				err = -EFAULT;
 
-		} else
-			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		} else {
+			err = PTR_ERR(t);
+		}
 		break;
 	case SIOCDELTUNNEL:
 		err = -EPERM;
@@ -1478,7 +1479,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			err = -ENOENT;
 			ip6_tnl_parm_from_user(&p1, &p);
 			t = ip6_tnl_locate(net, &p1, 0);
-			if (t == NULL)
+			if (IS_ERR(t))
 				break;
 			err = -EPERM;
 			if (t->dev == ip6n->fb_tnl_dev)
@@ -1672,12 +1673,13 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net *net = dev_net(dev);
-	struct ip6_tnl *nt;
+	struct ip6_tnl *nt, *t;
 
 	nt = netdev_priv(dev);
 	ip6_tnl_netlink_parms(data, &nt->parms);
 
-	if (ip6_tnl_locate(net, &nt->parms, 0))
+	t = ip6_tnl_locate(net, &nt->parms, 0);
+	if (!IS_ERR(t))
 		return -EEXIST;
 
 	return ip6_tnl_create2(dev);
@@ -1697,8 +1699,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
 	ip6_tnl_netlink_parms(data, &p);
 
 	t = ip6_tnl_locate(net, &p, 0);
-
-	if (t) {
+	if (!IS_ERR(t)) {
 		if (t->dev != dev)
 			return -EEXIST;
 	} else
-- 
cgit v1.2.3


From cb7cf8a33ff73cf638481d1edf883d8968f934f8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 16 Mar 2015 12:19:24 -0700
Subject: inet: Clean up inet_csk_wait_for_connect() vs. might_sleep()

I got the following trace with current net-next kernel :

[14723.885290] WARNING: CPU: 26 PID: 22658 at kernel/sched/core.c:7285 __might_sleep+0x89/0xa0()
[14723.885325] do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810e8734>] prepare_to_wait_exclusive+0x34/0xa0
[14723.885355] CPU: 26 PID: 22658 Comm: netserver Not tainted 4.0.0-dbg-DEV #1379
[14723.885359]  ffffffff81a223a8 ffff881fae9e7ca8 ffffffff81650b5d 0000000000000001
[14723.885364]  ffff881fae9e7cf8 ffff881fae9e7ce8 ffffffff810a72e7 0000000000000000
[14723.885367]  ffffffff81a57620 000000000000093a 0000000000000000 ffff881fae9e7e64
[14723.885371] Call Trace:
[14723.885377]  [<ffffffff81650b5d>] dump_stack+0x4c/0x65
[14723.885382]  [<ffffffff810a72e7>] warn_slowpath_common+0x97/0xe0
[14723.885386]  [<ffffffff810a73e6>] warn_slowpath_fmt+0x46/0x50
[14723.885390]  [<ffffffff810f4c5d>] ? trace_hardirqs_on_caller+0x10d/0x1d0
[14723.885393]  [<ffffffff810e8734>] ? prepare_to_wait_exclusive+0x34/0xa0
[14723.885396]  [<ffffffff810e8734>] ? prepare_to_wait_exclusive+0x34/0xa0
[14723.885399]  [<ffffffff810ccdc9>] __might_sleep+0x89/0xa0
[14723.885403]  [<ffffffff81581846>] lock_sock_nested+0x36/0xb0
[14723.885406]  [<ffffffff815829a3>] ? release_sock+0x173/0x1c0
[14723.885411]  [<ffffffff815ea1f7>] inet_csk_accept+0x157/0x2a0
[14723.885415]  [<ffffffff810e8900>] ? abort_exclusive_wait+0xc0/0xc0
[14723.885419]  [<ffffffff8161b96d>] inet_accept+0x2d/0x150
[14723.885424]  [<ffffffff8157db6f>] SYSC_accept4+0xff/0x210
[14723.885428]  [<ffffffff8165a451>] ? retint_swapgs+0xe/0x44
[14723.885431]  [<ffffffff810f4c5d>] ? trace_hardirqs_on_caller+0x10d/0x1d0
[14723.885437]  [<ffffffff81369c0e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[14723.885441]  [<ffffffff8157ef40>] SyS_accept+0x10/0x20
[14723.885444]  [<ffffffff81659872>] system_call_fastpath+0x12/0x17
[14723.885447] ---[ end trace ff74cd83355b1873 ]---

In commit 26cabd31259ba43f68026ce3f62b78094124333f
Peter added a sched_annotate_sleep() in sk_wait_event()

Is the following patch needed as well ?

Alternative would be to use sk_wait_event() from inet_csk_wait_for_connect()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 14d02ea905b6..3e44b9b0b78e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
 			timeo = schedule_timeout(timeo);
+		sched_annotate_sleep();
 		lock_sock(sk);
 		err = 0;
 		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
-- 
cgit v1.2.3


From ced585c83b27deca427c606a34dd3eaa6b96d82b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 17 Mar 2015 20:25:57 +0100
Subject: act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome

Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().

Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.

In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).

That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.

Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.

Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.

All that will allow us to transparently use tcf_bpf() for both BPF
flavours.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_bpf.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 82c5d7fc1988..5f6288fa3f12 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -25,21 +25,41 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
 		   struct tcf_result *res)
 {
 	struct tcf_bpf *b = a->priv;
-	int action;
-	int filter_res;
+	int action, filter_res;
 
 	spin_lock(&b->tcf_lock);
+
 	b->tcf_tm.lastuse = jiffies;
 	bstats_update(&b->tcf_bstats, skb);
-	action = b->tcf_action;
 
 	filter_res = BPF_PROG_RUN(b->filter, skb);
-	if (filter_res == 0) {
-		/* Return code 0 from the BPF program
-		 * is being interpreted as a drop here.
-		 */
-		action = TC_ACT_SHOT;
+
+	/* A BPF program may overwrite the default action opcode.
+	 * Similarly as in cls_bpf, if filter_res == -1 we use the
+	 * default action specified from tc.
+	 *
+	 * In case a different well-known TC_ACT opcode has been
+	 * returned, it will overwrite the default one.
+	 *
+	 * For everything else that is unkown, TC_ACT_UNSPEC is
+	 * returned.
+	 */
+	switch (filter_res) {
+	case TC_ACT_PIPE:
+	case TC_ACT_RECLASSIFY:
+	case TC_ACT_OK:
+		action = filter_res;
+		break;
+	case TC_ACT_SHOT:
+		action = filter_res;
 		b->tcf_qstats.drops++;
+		break;
+	case TC_ACT_UNSPEC:
+		action = b->tcf_action;
+		break;
+	default:
+		action = TC_ACT_UNSPEC;
+		break;
 	}
 
 	spin_unlock(&b->tcf_lock);
-- 
cgit v1.2.3


From 4017a7ee693d1cae6735c0dac21594a7c6416c4c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 2 Mar 2015 01:10:28 +0100
Subject: netfilter: restore rule tracing via nfnetlink_log
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since fab4085 ("netfilter: log: nf_log_packet() as real unified
interface"), the loginfo structure that is passed to nf_log_packet() is
used to explicitly indicate the logger type you want to use.

This is a problem for people tracing rules through nfnetlink_log since
packets are always routed to the NF_LOG_TYPE logger after the
aforementioned patch.

We can fix this by removing the trace loginfo structures, but that still
changes the log level from 4 to 5 for tracing messages and there may be
someone relying on this outthere. So let's just introduce a new
nf_log_trace() function that restores the former behaviour.

Reported-by: Markus Kötter <koetter@rrzn.uni-hannover.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_log.h  | 10 ++++++++++
 net/ipv4/netfilter/ip_tables.c  |  6 +++---
 net/ipv6/netfilter/ip6_tables.c |  6 +++---
 net/netfilter/nf_log.c          | 24 ++++++++++++++++++++++++
 net/netfilter/nf_tables_core.c  |  8 ++++----
 5 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 534e1f2ac4fc..57639fca223a 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -79,6 +79,16 @@ void nf_log_packet(struct net *net,
 		   const struct nf_loginfo *li,
 		   const char *fmt, ...);
 
+__printf(8, 9)
+void nf_log_trace(struct net *net,
+		  u_int8_t pf,
+		  unsigned int hooknum,
+		  const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct nf_loginfo *li,
+		  const char *fmt, ...);
+
 struct nf_log_buf;
 
 struct nf_log_buf *nf_log_buf_open(void);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 99e810f84671..cf5e82f39d3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb,
 		    &chainname, &comment, &rulenum) != 0)
 			break;
 
-	nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
-		      "TRACE: %s:%s:%s:%u ",
-		      tablename, chainname, comment, rulenum);
+	nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
+		     "TRACE: %s:%s:%s:%u ",
+		     tablename, chainname, comment, rulenum);
 }
 #endif
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e080fbbbc0e5..bb00c6f2a885 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb,
 		    &chainname, &comment, &rulenum) != 0)
 			break;
 
-	nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
-		      "TRACE: %s:%s:%s:%u ",
-		      tablename, chainname, comment, rulenum);
+	nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
+		     "TRACE: %s:%s:%s:%u ",
+		     tablename, chainname, comment, rulenum);
 }
 #endif
 
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 0d8448f19dfe..675d12c69e32 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -212,6 +212,30 @@ void nf_log_packet(struct net *net,
 }
 EXPORT_SYMBOL(nf_log_packet);
 
+void nf_log_trace(struct net *net,
+		  u_int8_t pf,
+		  unsigned int hooknum,
+		  const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct nf_loginfo *loginfo, const char *fmt, ...)
+{
+	va_list args;
+	char prefix[NF_LOG_PREFIXLEN];
+	const struct nf_logger *logger;
+
+	rcu_read_lock();
+	logger = rcu_dereference(net->nf.nf_loggers[pf]);
+	if (logger) {
+		va_start(args, fmt);
+		vsnprintf(prefix, sizeof(prefix), fmt, args);
+		va_end(args);
+		logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_trace);
+
 #define S_SIZE (1024 - (sizeof(unsigned int) + 1))
 
 struct nf_log_buf {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3b90eb2b2c55..2d298dccb6dd 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt,
 {
 	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
 
-	nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
-		      pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
-		      chain->table->name, chain->name, comments[type],
-		      rulenum);
+	nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+		     pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+		     chain->table->name, chain->name, comments[type],
+		     rulenum);
 }
 
 unsigned int
-- 
cgit v1.2.3


From 3d8c6dce53a349df8878d078e56bf429bad572f9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Mar 2015 13:56:06 +0100
Subject: netfilter: xt_TPROXY: fix invflags check in tproxy_tg6_check()

We have to check for IP6T_INV_PROTO in invflags, instead of flags.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Balazs Scheidler <bazsi@balabit.hu>
---
 net/netfilter/xt_TPROXY.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ef8a926752a9..50e1e5aaf4ce 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_ip6 *i = par->entryinfo;
 
-	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
-	    && !(i->flags & IP6T_INV_PROTO))
+	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&
+	    !(i->invflags & IP6T_INV_PROTO))
 		return 0;
 
 	pr_info("Can be used only in combination with "
-- 
cgit v1.2.3


From 8e199dfd82ee097b522b00344af6448715d8ee0c Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 19 Mar 2015 11:22:32 +0100
Subject: ipv6: call ipv6_proxy_select_ident instead of ipv6_select_ident in
 udp6_ufo_fragment

Matt Grant reported frequent crashes in ipv6_select_ident when
udp6_ufo_fragment is called from openvswitch on a skb that doesn't
have a dst_entry set.

ipv6_proxy_select_ident generates the frag_id without using the dst
associated with the skb.  This approach was suggested by Vladislav
Yasevich.

Fixes: 0508c07f5e0c ("ipv6: Select fragment id during UFO segmentation if not set.")
Cc: Vladislav Yasevich <vyasevic@redhat.com>
Reported-by: Matt Grant <matt@mattgrant.net.nz>
Tested-by: Matt Grant <matt@mattgrant.net.nz>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp_offload.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ab889bb16b3c..be2c0ba82c85 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
 		fptr->nexthdr = nexthdr;
 		fptr->reserved = 0;
-		if (skb_shinfo(skb)->ip6_frag_id)
-			fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-		else
-			ipv6_select_ident(fptr,
-					  (struct rt6_info *)skb_dst(skb));
+		if (!skb_shinfo(skb)->ip6_frag_id)
+			ipv6_proxy_select_ident(skb);
+		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
 
 		/* Fragment the skb. ipv6 header and the remaining fields of the
 		 * fragment header are updated in ipv6_gso_segment()
-- 
cgit v1.2.3


From 73ba57bfae4a1914f6a6dac71e3168dd900e00af Mon Sep 17 00:00:00 2001
From: Steven Barth <cyrus@openwrt.org>
Date: Thu, 19 Mar 2015 16:16:04 +0100
Subject: ipv6: fix backtracking for throw routes

for throw routes to trigger evaluation of other policy rules
EAGAIN needs to be propagated up to fib_rules_lookup
similar to how its done for IPv4

A simple testcase for verification is:

ip -6 rule add lookup 33333 priority 33333
ip -6 route add throw 2001:db8::1
ip -6 route add 2001:db8::1 via fe80::1 dev wlan0 table 33333
ip route get 2001:db8::1

Signed-off-by: Steven Barth <cyrus@openwrt.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b4d5e1d97c1b..27ca79682efb 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 				goto again;
 			flp6->saddr = saddr;
 		}
+		err = rt->dst.error;
 		goto out;
 	}
 again:
-- 
cgit v1.2.3


From d22e1537181188e5dc8cbc51451832625035bdc2 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Thu, 19 Mar 2015 19:19:30 -0400
Subject: tcp: fix tcp fin memory accounting

tcp_send_fin() does not account for the memory it allocates properly, so
sk_forward_alloc can be negative in cases where we've sent a FIN:

ss example output (ss -amn | grep -B1 f4294):
tcp    FIN-WAIT-1 0      1            192.168.0.1:45520         192.0.2.1:8080
	skmem:(r0,rb87380,t0,tb87380,f4294966016,w1280,o0,bl0)
Acked-by: Eric Dumazet <edumazet@google.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..1db253e36045 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk)
 	} else {
 		/* Socket is locked, keep trying until memory is available. */
 		for (;;) {
-			skb = alloc_skb_fclone(MAX_TCP_HEADER,
-					       sk->sk_allocation);
+			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
 			if (skb)
 				break;
 			yield();
 		}
-
-		/* Reserve space for headers and prepare control bits. */
-		skb_reserve(skb, MAX_TCP_HEADER);
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
 		tcp_init_nondata_skb(skb, tp->write_seq,
 				     TCPHDR_ACK | TCPHDR_FIN);
-- 
cgit v1.2.3


From 91edd096e224941131f896b86838b1e59553696a Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 20 Mar 2015 16:48:13 +0000
Subject: net: compat: Update get_compat_msghdr() to match
 copy_msghdr_from_user() behaviour

Commit db31c55a6fb2 (net: clamp ->msg_namelen instead of returning an
error) introduced the clamping of msg_namelen when the unsigned value
was larger than sizeof(struct sockaddr_storage). This caused a
msg_namelen of -1 to be valid. The native code was subsequently fixed by
commit dbb490b96584 (net: socket: error on a negative msg_namelen).

In addition, the native code sets msg_namelen to 0 when msg_name is
NULL. This was done in commit (6a2a2b3ae075 net:socket: set msg_namelen
to 0 if msg_name is passed as NULL in msghdr struct from userland) and
subsequently updated by 08adb7dabd48 (fold verify_iovec() into
copy_msghdr_from_user()).

This patch brings the get_compat_msghdr() in line with
copy_msghdr_from_user().

Fixes: db31c55a6fb2 (net: clamp ->msg_namelen instead of returning an error)
Cc: David S. Miller <davem@davemloft.net>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/compat.c b/net/compat.c
index 94d3d5e97883..f7bd286a8280 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
 		return -EFAULT;
+
+	if (!uaddr)
+		kmsg->msg_namelen = 0;
+
+	if (kmsg->msg_namelen < 0)
+		return -EINVAL;
+
 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
 	kmsg->msg_control = compat_ptr(tmp3);
-- 
cgit v1.2.3


From 4de930efc23b92ddf88ce91c405ee645fe6e27ea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Fri, 20 Mar 2015 17:41:43 +0000
Subject: net: validate the range we feed to iov_iter_init() in
 sys_sendto/sys_recvfrom

Cc: stable@vger.kernel.org # v3.19
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index bbedbfcb42c2..245330ca0015 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
 
 	if (len > INT_MAX)
 		len = INT_MAX;
+	if (unlikely(!access_ok(VERIFY_READ, buff, len)))
+		return -EFAULT;
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 
 	if (size > INT_MAX)
 		size = INT_MAX;
+	if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
+		return -EFAULT;
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
-- 
cgit v1.2.3


From 749177ccc74f9c6d0f51bd78a15c652a2134aa11 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 21 Mar 2015 19:25:05 +0100
Subject: netfilter: nft_compat: set IP6T_F_PROTO flag if protocol is set

ip6tables extensions check for this flag to restrict match/target to a
given protocol. Without this flag set, SYNPROXY6 returns an error.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nft_compat.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 213584cf04b3..65f3e2b6be44 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -133,6 +133,9 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 		entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
 		break;
 	case AF_INET6:
+		if (proto)
+			entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
 		entry->e6.ipv6.proto = proto;
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
@@ -344,6 +347,9 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 		entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
 		break;
 	case AF_INET6:
+		if (proto)
+			entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
 		entry->e6.ipv6.proto = proto;
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
-- 
cgit v1.2.3


From d0c294c53a771ae7e84506dfbd8c18c30f078735 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Mon, 23 Mar 2015 15:14:00 +0100
Subject: tcp: prevent fetching dst twice in early demux code

On s390x, gcc 4.8 compiles this part of tcp_v6_early_demux()

        struct dst_entry *dst = sk->sk_rx_dst;

        if (dst)
                dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);

to code reading sk->sk_rx_dst twice, once for the test and once for
the argument of ip6_dst_check() (dst_check() is inline). This allows
ip6_dst_check() to be called with null first argument, causing a crash.

Protect sk->sk_rx_dst access by READ_ONCE() both in IPv4 and IPv6
TCP early demux code.

Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.")
Fixes: c7109986db3c ("ipv6: Early TCP socket demux")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 net/ipv6/tcp_ipv6.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed4783b..f1756ee02207 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
 		if (sk->sk_state != TCP_TIME_WAIT) {
-			struct dst_entry *dst = sk->sk_rx_dst;
+			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
 			if (dst)
 				dst = dst_check(dst, 0);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5d46832c6f72..b283a498f7a4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1585,7 +1585,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
 		if (sk->sk_state != TCP_TIME_WAIT) {
-			struct dst_entry *dst = sk->sk_rx_dst;
+			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
 			if (dst)
 				dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
-- 
cgit v1.2.3


From d079535d5e1bf5e2e7c856bae2483414ea21e137 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 23 Mar 2015 16:31:09 -0700
Subject: net: use for_each_netdev_safe() in rtnl_group_changelink()

In case we move the whole dev group to another netns,
we should call for_each_netdev_safe(), otherwise we get
a soft lockup:

 NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ip:798]
 irq event stamp: 255424
 hardirqs last  enabled at (255423): [<ffffffff81a2aa95>] restore_args+0x0/0x30
 hardirqs last disabled at (255424): [<ffffffff81a2ad5a>] apic_timer_interrupt+0x6a/0x80
 softirqs last  enabled at (255422): [<ffffffff81079ebc>] __do_softirq+0x2c1/0x3a9
 softirqs last disabled at (255417): [<ffffffff8107a190>] irq_exit+0x41/0x95
 CPU: 0 PID: 798 Comm: ip Not tainted 4.0.0-rc4+ #881
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 task: ffff8800d1b88000 ti: ffff880119530000 task.ti: ffff880119530000
 RIP: 0010:[<ffffffff810cad11>]  [<ffffffff810cad11>] debug_lockdep_rcu_enabled+0x28/0x30
 RSP: 0018:ffff880119533778  EFLAGS: 00000246
 RAX: ffff8800d1b88000 RBX: 0000000000000002 RCX: 0000000000000038
 RDX: 0000000000000000 RSI: ffff8800d1b888c8 RDI: ffff8800d1b888c8
 RBP: ffff880119533778 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 000000000000b5c2 R12: 0000000000000246
 R13: ffff880119533708 R14: 00000000001d5a40 R15: ffff88011a7d5a40
 FS:  00007fc01315f740(0000) GS:ffff88011a600000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: 00007f367a120988 CR3: 000000011849c000 CR4: 00000000000007f0
 Stack:
  ffff880119533798 ffffffff811ac868 ffffffff811ac831 ffffffff811ac828
  ffff8801195337c8 ffffffff811ac8c9 ffff8801195339b0 ffff8801197633e0
  0000000000000000 ffff8801195339b0 ffff8801195337d8 ffffffff811ad2d7
 Call Trace:
  [<ffffffff811ac868>] rcu_read_lock+0x37/0x6e
  [<ffffffff811ac831>] ? rcu_read_unlock+0x5f/0x5f
  [<ffffffff811ac828>] ? rcu_read_unlock+0x56/0x5f
  [<ffffffff811ac8c9>] __fget+0x2a/0x7a
  [<ffffffff811ad2d7>] fget+0x13/0x15
  [<ffffffff811be732>] proc_ns_fget+0xe/0x38
  [<ffffffff817c7714>] get_net_ns_by_fd+0x11/0x59
  [<ffffffff817df359>] rtnl_link_get_net+0x33/0x3e
  [<ffffffff817df3d7>] do_setlink+0x73/0x87b
  [<ffffffff810b28ce>] ? trace_hardirqs_off+0xd/0xf
  [<ffffffff81a2aa95>] ? retint_restore_args+0xe/0xe
  [<ffffffff817e0301>] rtnl_newlink+0x40c/0x699
  [<ffffffff817dffe0>] ? rtnl_newlink+0xeb/0x699
  [<ffffffff81a29246>] ? _raw_spin_unlock+0x28/0x33
  [<ffffffff8143ed1e>] ? security_capable+0x18/0x1a
  [<ffffffff8107da51>] ? ns_capable+0x4d/0x65
  [<ffffffff817de5ce>] rtnetlink_rcv_msg+0x181/0x194
  [<ffffffff817de407>] ? rtnl_lock+0x17/0x19
  [<ffffffff817de407>] ? rtnl_lock+0x17/0x19
  [<ffffffff817de44d>] ? __rtnl_unlock+0x17/0x17
  [<ffffffff818327c6>] netlink_rcv_skb+0x4d/0x93
  [<ffffffff817de42f>] rtnetlink_rcv+0x26/0x2d
  [<ffffffff81830f18>] netlink_unicast+0xcb/0x150
  [<ffffffff8183198e>] netlink_sendmsg+0x501/0x523
  [<ffffffff8115cba9>] ? might_fault+0x59/0xa9
  [<ffffffff817b5398>] ? copy_from_user+0x2a/0x2c
  [<ffffffff817b7b74>] sock_sendmsg+0x34/0x3c
  [<ffffffff817b7f6d>] ___sys_sendmsg+0x1b8/0x255
  [<ffffffff8115c5eb>] ? handle_pte_fault+0xbd5/0xd4a
  [<ffffffff8100a2b0>] ? native_sched_clock+0x35/0x37
  [<ffffffff8109e94b>] ? sched_clock_local+0x12/0x72
  [<ffffffff8109eb9c>] ? sched_clock_cpu+0x9e/0xb7
  [<ffffffff810cadbf>] ? rcu_read_lock_held+0x3b/0x3d
  [<ffffffff811ac1d8>] ? __fcheck_files+0x4c/0x58
  [<ffffffff811ac946>] ? __fget_light+0x2d/0x52
  [<ffffffff817b8adc>] __sys_sendmsg+0x42/0x60
  [<ffffffff817b8b0c>] SyS_sendmsg+0x12/0x1c
  [<ffffffff81a29e32>] system_call_fastpath+0x12/0x17

Fixes: e7ed828f10bd8 ("netlink: support setting devgroup parameters")
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ee0608bb3bc0..7ebed55b5f7d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1932,10 +1932,10 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
 		struct ifinfomsg *ifm,
 		struct nlattr **tb)
 {
-	struct net_device *dev;
+	struct net_device *dev, *aux;
 	int err;
 
-	for_each_netdev(net, dev) {
+	for_each_netdev_safe(net, dev, aux) {
 		if (dev->group == group) {
 			err = do_setlink(skb, dev, ifm, tb, NULL, 0);
 			if (err < 0)
-- 
cgit v1.2.3


From 6fd99094de2b83d1d4c8457f2c83483b2828e75a Mon Sep 17 00:00:00 2001
From: "D.S. Ljungmark" <ljungmark@modio.se>
Date: Wed, 25 Mar 2015 09:28:15 +0100
Subject: ipv6: Don't reduce hop limit for an interface

A local route may have a lower hop_limit set than global routes do.

RFC 3756, Section 4.2.7, "Parameter Spoofing"

>   1.  The attacker includes a Current Hop Limit of one or another small
>       number which the attacker knows will cause legitimate packets to
>       be dropped before they reach their destination.

>   As an example, one possible approach to mitigate this threat is to
>   ignore very small hop limits.  The nodes could implement a
>   configurable minimum hop limit, and ignore attempts to set it below
>   said limit.

Signed-off-by: D.S. Ljungmark <ljungmark@modio.se>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 471ed24aabae..14ecdaf06bf7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1218,7 +1218,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (rt)
 		rt6_set_expires(rt, jiffies + (HZ * lifetime));
 	if (ra_msg->icmph.icmp6_hop_limit) {
-		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+		/* Only set hop_limit on the interface if it is higher than
+		 * the current hop_limit.
+		 */
+		if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) {
+			in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+		} else {
+			ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n");
+		}
 		if (rt)
 			dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
 				       ra_msg->icmph.icmp6_hop_limit);
-- 
cgit v1.2.3


From f243e5a7859a24d10975afb9a1708cac624ba6f1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 25 Mar 2015 14:45:03 -0700
Subject: ipmr,ip6mr: call ip6mr_free_table() on failure path

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 2 +-
 net/ipv6/ip6mr.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d78427652d2..92825443fad6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -268,7 +268,7 @@ static int __net_init ipmr_rules_init(struct net *net)
 	return 0;
 
 err2:
-	kfree(mrt);
+	ipmr_free_table(mrt);
 err1:
 	fib_rules_unregister(ops);
 	return err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 34b682617f50..52028f449a89 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -252,7 +252,7 @@ static int __net_init ip6mr_rules_init(struct net *net)
 	return 0;
 
 err2:
-	kfree(mrt);
+	ip6mr_free_table(mrt);
 err1:
 	fib_rules_unregister(ops);
 	return err;
-- 
cgit v1.2.3


From 4217291e592da0e4258b652e82e5428639d29acc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 26 Mar 2015 17:56:38 +0100
Subject: netns: don't clear nsid too early on removal

With the current code, ids are removed too early.
Suppose you have an ipip interface that stands in the netns foo and its link
part in the netns bar (so the netns bar has an nsid into the netns foo).
Now, you remove the netns bar:
 - the bar nsid into the netns foo is removed
 - the netns exit method of ipip is called, thus our ipip iface is removed:
   => a netlink message is sent in the netns foo to advertise this deletion
   => this netlink message requests an nsid for bar, thus a new nsid is
      allocated for bar and never removed.

We must remove nsids when we are sure that nobody will refer to netns currently
cleaned.

Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cb5290b8c428..5221f975a4cc 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -349,7 +349,7 @@ static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 static void cleanup_net(struct work_struct *work)
 {
 	const struct pernet_operations *ops;
-	struct net *net, *tmp;
+	struct net *net, *tmp, *peer;
 	struct list_head net_kill_list;
 	LIST_HEAD(net_exit_list);
 
@@ -365,14 +365,6 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry(net, &net_kill_list, cleanup_list) {
 		list_del_rcu(&net->list);
 		list_add_tail(&net->exit_list, &net_exit_list);
-		for_each_net(tmp) {
-			int id = __peernet2id(tmp, net, false);
-
-			if (id >= 0)
-				idr_remove(&tmp->netns_ids, id);
-		}
-		idr_destroy(&net->netns_ids);
-
 	}
 	rtnl_unlock();
 
@@ -398,12 +390,26 @@ static void cleanup_net(struct work_struct *work)
 	 */
 	rcu_barrier();
 
+	rtnl_lock();
 	/* Finally it is safe to free my network namespace structure */
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
+		/* Unreference net from all peers (no need to loop over
+		 * net_exit_list because idr_destroy() will be called for each
+		 * element of this list.
+		 */
+		for_each_net(peer) {
+			int id = __peernet2id(peer, net, false);
+
+			if (id >= 0)
+				idr_remove(&peer->netns_ids, id);
+		}
+		idr_destroy(&net->netns_ids);
+
 		list_del_init(&net->exit_list);
 		put_user_ns(net->user_ns);
 		net_drop_ns(net);
 	}
+	rtnl_unlock();
 }
 static DECLARE_WORK(net_cleanup_work, cleanup_net);
 
-- 
cgit v1.2.3


From 4ad19de8774e2a7b075b3e8ea48db85adcf33fa6 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Fri, 27 Mar 2015 12:24:22 +0300
Subject: net: tcp6: fix double call of tcp_v6_fill_cb()

tcp_v6_fill_cb() will be called twice if socket's state changes from
TCP_TIME_WAIT to TCP_LISTEN. That can result in control buffer data
corruption because in the second tcp_v6_fill_cb() call it's not copying
IP6CB(skb) anymore, but 'seq', 'end_seq', etc., so we can get weird and
unpredictable results. Performance loss of up to 1200% has been observed
in LTP/vxlan03 test.

This can be fixed by copying inet6_skb_parm to the beginning of 'cb'
only if xfrm6_policy_check() and tcp_v6_fill_cb() are going to be
called again.

Fixes: 2dc49d1680b53 ("tcp6: don't move IP6CB before xfrm6_policy_check()")

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b283a498f7a4..1f5e62229aaa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1411,6 +1411,15 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 	TCP_SKB_CB(skb)->sacked = 0;
 }
 
+static void tcp_v6_restore_cb(struct sk_buff *skb)
+{
+	/* We need to move header back to the beginning if xfrm6_policy_check()
+	 * and tcp_v6_fill_cb() are going to be called again.
+	 */
+	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
+		sizeof(struct inet6_skb_parm));
+}
+
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	const struct tcphdr *th;
@@ -1543,6 +1552,7 @@ do_time_wait:
 			inet_twsk_deschedule(tw, &tcp_death_row);
 			inet_twsk_put(tw);
 			sk = sk2;
+			tcp_v6_restore_cb(skb);
 			goto process;
 		}
 		/* Fall through to ACK */
@@ -1551,6 +1561,7 @@ do_time_wait:
 		tcp_v6_timewait_ack(sk, skb);
 		break;
 	case TCP_TW_RST:
+		tcp_v6_restore_cb(skb);
 		goto no_tcp_socket;
 	case TCP_TW_SUCCESS:
 		;
-- 
cgit v1.2.3


From f9c72d10d6fbf949558cd088389a42213ed7b12d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Tue, 31 Mar 2015 12:03:28 -0400
Subject: sunrpc: make debugfs file creation failure non-fatal

We currently have a problem that SELinux policy is being enforced when
creating debugfs files. If a debugfs file is created as a side effect of
doing some syscall, then that creation can fail if the SELinux policy
for that process prevents it.

This seems wrong. We don't do that for files under /proc, for instance,
so Bruce has proposed a patch to fix that.

While discussing that patch however, Greg K.H. stated:

    "No kernel code should care / fail if a debugfs function fails, so
     please fix up the sunrpc code first."

This patch converts all of the sunrpc debugfs setup code to be void
return functins, and the callers to not look for errors from those
functions.

This should allow rpc_clnt and rpc_xprt creation to work, even if the
kernel fails to create debugfs files for some reason.

Symptoms were failing krb5 mounts on systems using gss-proxy and
selinux.

Fixes: 388f0c776781 "sunrpc: add a debugfs rpc_xprt directory..."
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/debug.h | 18 +++++++--------
 net/sunrpc/clnt.c            |  4 +---
 net/sunrpc/debugfs.c         | 52 ++++++++++++++++++++++++--------------------
 net/sunrpc/sunrpc_syms.c     |  7 +-----
 net/sunrpc/xprt.c            |  7 +-----
 5 files changed, 41 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index c57d8ea0716c..59a7889e15db 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -60,17 +60,17 @@ struct rpc_xprt;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 void		rpc_register_sysctl(void);
 void		rpc_unregister_sysctl(void);
-int		sunrpc_debugfs_init(void);
+void		sunrpc_debugfs_init(void);
 void		sunrpc_debugfs_exit(void);
-int		rpc_clnt_debugfs_register(struct rpc_clnt *);
+void		rpc_clnt_debugfs_register(struct rpc_clnt *);
 void		rpc_clnt_debugfs_unregister(struct rpc_clnt *);
-int		rpc_xprt_debugfs_register(struct rpc_xprt *);
+void		rpc_xprt_debugfs_register(struct rpc_xprt *);
 void		rpc_xprt_debugfs_unregister(struct rpc_xprt *);
 #else
-static inline int
+static inline void
 sunrpc_debugfs_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline void
@@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void)
 	return;
 }
 
-static inline int
+static inline void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-	return 0;
+	return;
 }
 
 static inline void
@@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
 	return;
 }
 
-static inline int
+static inline void
 rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 {
-	return 0;
+	return;
 }
 
 static inline void
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 612aa73bbc60..e6ce1517367f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -303,9 +303,7 @@ static int rpc_client_register(struct rpc_clnt *clnt,
 	struct super_block *pipefs_sb;
 	int err;
 
-	err = rpc_clnt_debugfs_register(clnt);
-	if (err)
-		return err;
+	rpc_clnt_debugfs_register(clnt);
 
 	pipefs_sb = rpc_get_sb_net(net);
 	if (pipefs_sb) {
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e811f390f9f6..82962f7e6e88 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -129,48 +129,52 @@ static const struct file_operations tasks_fops = {
 	.release	= tasks_release,
 };
 
-int
+void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-	int len, err;
+	int len;
 	char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
+	struct rpc_xprt *xprt;
 
 	/* Already registered? */
-	if (clnt->cl_debugfs)
-		return 0;
+	if (clnt->cl_debugfs || !rpc_clnt_dir)
+		return;
 
 	len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
 	if (len >= sizeof(name))
-		return -EINVAL;
+		return;
 
 	/* make the per-client dir */
 	clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
 	if (!clnt->cl_debugfs)
-		return -ENOMEM;
+		return;
 
 	/* make tasks file */
-	err = -ENOMEM;
 	if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
 				 clnt, &tasks_fops))
 		goto out_err;
 
-	err = -EINVAL;
 	rcu_read_lock();
+	xprt = rcu_dereference(clnt->cl_xprt);
+	/* no "debugfs" dentry? Don't bother with the symlink. */
+	if (!xprt->debugfs) {
+		rcu_read_unlock();
+		return;
+	}
 	len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
-			rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name);
+			xprt->debugfs->d_name.name);
 	rcu_read_unlock();
+
 	if (len >= sizeof(name))
 		goto out_err;
 
-	err = -ENOMEM;
 	if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
 		goto out_err;
 
-	return 0;
+	return;
 out_err:
 	debugfs_remove_recursive(clnt->cl_debugfs);
 	clnt->cl_debugfs = NULL;
-	return err;
 }
 
 void
@@ -226,33 +230,33 @@ static const struct file_operations xprt_info_fops = {
 	.release	= xprt_info_release,
 };
 
-int
+void
 rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 {
 	int len, id;
 	static atomic_t	cur_id;
 	char		name[9]; /* 8 hex digits + NULL term */
 
+	if (!rpc_xprt_dir)
+		return;
+
 	id = (unsigned int)atomic_inc_return(&cur_id);
 
 	len = snprintf(name, sizeof(name), "%x", id);
 	if (len >= sizeof(name))
-		return -EINVAL;
+		return;
 
 	/* make the per-client dir */
 	xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
 	if (!xprt->debugfs)
-		return -ENOMEM;
+		return;
 
 	/* make tasks file */
 	if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
 				 xprt, &xprt_info_fops)) {
 		debugfs_remove_recursive(xprt->debugfs);
 		xprt->debugfs = NULL;
-		return -ENOMEM;
 	}
-
-	return 0;
 }
 
 void
@@ -266,14 +270,17 @@ void __exit
 sunrpc_debugfs_exit(void)
 {
 	debugfs_remove_recursive(topdir);
+	topdir = NULL;
+	rpc_clnt_dir = NULL;
+	rpc_xprt_dir = NULL;
 }
 
-int __init
+void __init
 sunrpc_debugfs_init(void)
 {
 	topdir = debugfs_create_dir("sunrpc", NULL);
 	if (!topdir)
-		goto out;
+		return;
 
 	rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
 	if (!rpc_clnt_dir)
@@ -283,10 +290,9 @@ sunrpc_debugfs_init(void)
 	if (!rpc_xprt_dir)
 		goto out_remove;
 
-	return 0;
+	return;
 out_remove:
 	debugfs_remove_recursive(topdir);
 	topdir = NULL;
-out:
-	return -ENOMEM;
+	rpc_clnt_dir = NULL;
 }
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index e37fbed87956..ee5d3d253102 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -98,10 +98,7 @@ init_sunrpc(void)
 	if (err)
 		goto out4;
 
-	err = sunrpc_debugfs_init();
-	if (err)
-		goto out5;
-
+	sunrpc_debugfs_init();
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	rpc_register_sysctl();
 #endif
@@ -109,8 +106,6 @@ init_sunrpc(void)
 	init_socket_xprt();	/* clnt sock transport */
 	return 0;
 
-out5:
-	unregister_rpc_pipefs();
 out4:
 	unregister_pernet_subsys(&sunrpc_net_ops);
 out3:
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e3015aede0d9..9949722d99ce 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1331,7 +1331,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
  */
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
-	int err;
 	struct rpc_xprt	*xprt;
 	struct xprt_class *t;
 
@@ -1372,11 +1371,7 @@ found:
 		return ERR_PTR(-ENOMEM);
 	}
 
-	err = rpc_xprt_debugfs_register(xprt);
-	if (err) {
-		xprt_destroy(xprt);
-		return ERR_PTR(err);
-	}
+	rpc_xprt_debugfs_register(xprt);
 
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
-- 
cgit v1.2.3


From fa2d8ff4e3522b4e05f590575d3eb8087f3a8cdc Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 30 Mar 2015 13:57:41 +0200
Subject: openvswitch: Return vport module ref before destruction

Return module reference before invoking the respective vport
->destroy() function. This is needed as ovs_vport_del() is not
invoked inside an RCU read side critical section so the kfree
can occur immediately before returning to ovs_vport_del().

Returning the module reference before ->destroy() is safe because
the module unregistration is blocked on ovs_lock which we hold
while destroying the datapath.

Fixes: 62b9c8d0372d ("ovs: Turn vports with dependencies into separate modules")
Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index ec2954ffc690..067a3fff1d2c 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport)
 	ASSERT_OVSL();
 
 	hlist_del_rcu(&vport->hash_node);
-
-	vport->ops->destroy(vport);
-
 	module_put(vport->ops->owner);
+	vport->ops->destroy(vport);
 }
 
 /**
-- 
cgit v1.2.3


From ed4ac4221776a5103faf71a4032ca00178d6e66b Mon Sep 17 00:00:00 2001
From: Eugene Crosser <Eugene.Crosser@ru.ibm.com>
Date: Mon, 30 Mar 2015 15:40:42 +0200
Subject: af_iucv: fix AF_IUCV sendmsg() errno

When sending over AF_IUCV socket, errno was incorrectly set to
ENOMEM even when other values where appropriate, notably EAGAIN.
With this patch, error indicator returned by sock_alloc_send_skb()
is passed to the caller, rather than being overwritten with ENOMEM.

Signed-off-by: Eugene Crosser <Eugene.Crosser@ru.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 2e9953b2db84..53d931172088 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1114,10 +1114,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			noblock, &err);
 	else
 		skb = sock_alloc_send_skb(sk, len, noblock, &err);
-	if (!skb) {
-		err = -ENOMEM;
+	if (!skb)
 		goto out;
-	}
 	if (iucv->transport == AF_IUCV_TRANS_HIPER)
 		skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
 	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
-- 
cgit v1.2.3


From 7e436905780659d6dc12d0581944934bf91a9919 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 1 Apr 2015 09:42:50 +0800
Subject: tipc: fix a slab object leak

When remove TIPC module, there is a warning to remind us that a slab
object is leaked like:

root@localhost:~# rmmod tipc
[   19.056226] =============================================================================
[   19.057549] BUG TIPC (Not tainted): Objects remaining in TIPC on kmem_cache_close()
[   19.058736] -----------------------------------------------------------------------------
[   19.058736]
[   19.060287] INFO: Slab 0xffffea0000519a00 objects=23 used=1 fp=0xffff880014668b00 flags=0x100000000004080
[   19.061915] INFO: Object 0xffff880014668000 @offset=0
[   19.062717] kmem_cache_destroy TIPC: Slab cache still has objects

This is because the listening socket of TIPC topology server is not
closed before TIPC proto handler is unregistered with proto_unregister().
However, as the socket is closed in tipc_exit_net() which is called by
unregister_pernet_subsys() during unregistering TIPC namespace operation,
the warning can be eliminated if calling unregister_pernet_subsys() is
moved before calling proto_unregister().

Fixes: e05b31f4bf89 ("tipc: make tipc socket support net namespace")
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 935205e6bcfe..be1c9fa60b09 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -152,11 +152,11 @@ out_netlink:
 static void __exit tipc_exit(void)
 {
 	tipc_bearer_cleanup();
+	unregister_pernet_subsys(&tipc_net_ops);
 	tipc_netlink_stop();
 	tipc_netlink_compat_stop();
 	tipc_socket_stop();
 	tipc_unregister_sysctl();
-	unregister_pernet_subsys(&tipc_net_ops);
 
 	pr_info("Deactivated\n");
 }
-- 
cgit v1.2.3


From 788211d81bfdf9b6a547d0530f206ba6ee76b107 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 1 Apr 2015 14:20:42 +0200
Subject: mac80211: fix RX A-MPDU session reorder timer deletion

There's an issue with the way the RX A-MPDU reorder timer is
deleted that can cause a kernel crash like this:

 * tid_rx is removed - call_rcu(ieee80211_free_tid_rx)
 * station is destroyed
 * reorder timer fires before ieee80211_free_tid_rx() runs,
   accessing the station, thus potentially crashing due to
   the use-after-free

The station deletion is protected by synchronize_net(), but
that isn't enough -- ieee80211_free_tid_rx() need not have
run when that returns (it deletes the timer.) We could use
rcu_barrier() instead of synchronize_net(), but that's much
more expensive.

Instead, to fix this, add a field tracking that the session
is being deleted. In this case, the only re-arming of the
timer happens with the reorder spinlock held, so make that
code not rearm it if the session is being deleted and also
delete the timer after setting that field. This ensures the
timer cannot fire after ___ieee80211_stop_rx_ba_session()
returns, which fixes the problem.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c   | 8 ++++++--
 net/mac80211/rx.c       | 7 ++++---
 net/mac80211/sta_info.h | 2 ++
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a48bad468880..7702978a4c99 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -49,8 +49,6 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
 		container_of(h, struct tid_ampdu_rx, rcu_head);
 	int i;
 
-	del_timer_sync(&tid_rx->reorder_timer);
-
 	for (i = 0; i < tid_rx->buf_size; i++)
 		__skb_queue_purge(&tid_rx->reorder_buf[i]);
 	kfree(tid_rx->reorder_buf);
@@ -93,6 +91,12 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 
 	del_timer_sync(&tid_rx->session_timer);
 
+	/* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */
+	spin_lock_bh(&tid_rx->reorder_lock);
+	tid_rx->removed = true;
+	spin_unlock_bh(&tid_rx->reorder_lock);
+	del_timer_sync(&tid_rx->reorder_timer);
+
 	call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
 }
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 944bdc04e913..1eb730bf8752 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -873,9 +873,10 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
 
  set_release_timer:
 
-		mod_timer(&tid_agg_rx->reorder_timer,
-			  tid_agg_rx->reorder_time[j] + 1 +
-			  HT_RX_REORDER_BUF_TIMEOUT);
+		if (!tid_agg_rx->removed)
+			mod_timer(&tid_agg_rx->reorder_timer,
+				  tid_agg_rx->reorder_time[j] + 1 +
+				  HT_RX_REORDER_BUF_TIMEOUT);
 	} else {
 		del_timer(&tid_agg_rx->reorder_timer);
 	}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 925e68fe64c7..fb0fc1302a58 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -175,6 +175,7 @@ struct tid_ampdu_tx {
  * @reorder_lock: serializes access to reorder buffer, see below.
  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
  *	and ssn.
+ * @removed: this session is removed (but might have been found due to RCU)
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -199,6 +200,7 @@ struct tid_ampdu_rx {
 	u16 timeout;
 	u8 dialog_token;
 	bool auto_seq;
+	bool removed;
 };
 
 /**
-- 
cgit v1.2.3


From 666b805150efd62f05810ff0db08f44a2370c937 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 1 Apr 2015 20:26:46 -0400
Subject: tcp: fix FRTO undo on cumulative ACK of SACKed range

On processing cumulative ACKs, the FRTO code was not checking the
SACKed bit, meaning that there could be a spurious FRTO undo on a
cumulative ACK of a previously SACKed skb.

The FRTO code should only consider a cumulative ACK to indicate that
an original/unretransmitted skb is newly ACKed if the skb was not yet
SACKed.

The effect of the spurious FRTO undo would typically be to make the
connection think that all previously-sent packets were in flight when
they really weren't, leading to a stall and an RTO.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Fixes: e33099f96d99c ("tcp: implement RFC5682 F-RTO")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fb4cf8b8e121..f501ac048366 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3105,10 +3105,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			if (!first_ackt.v64)
 				first_ackt = last_ackt;
 
-			if (!(sacked & TCPCB_SACKED_ACKED))
+			if (!(sacked & TCPCB_SACKED_ACKED)) {
 				reord = min(pkts_acked, reord);
-			if (!after(scb->end_seq, tp->high_seq))
-				flag |= FLAG_ORIG_SACK_ACKED;
+				if (!after(scb->end_seq, tp->high_seq))
+					flag |= FLAG_ORIG_SACK_ACKED;
+			}
 		}
 
 		if (sacked & TCPCB_SACKED_ACKED)
-- 
cgit v1.2.3


From ed785309c94445dd90e242370e1f7bb034e008fd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 31 Mar 2015 11:01:45 -0700
Subject: ipv4: take rtnl_lock and mark mrt table as freed on namespace cleanup

This is the IPv4 part for commit 905a6f96a1b1
(ipv6: take rtnl_lock and mark mrt6 table as freed on namespace cleanup).

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92825443fad6..bc40115bc394 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -278,10 +278,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;
 
+	rtnl_lock();
 	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
 		list_del(&mrt->list);
 		ipmr_free_table(mrt);
 	}
+	rtnl_unlock();
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
 }
 #else
@@ -308,7 +310,10 @@ static int __net_init ipmr_rules_init(struct net *net)
 
 static void __net_exit ipmr_rules_exit(struct net *net)
 {
+	rtnl_lock();
 	ipmr_free_table(net->ipv4.mrt);
+	net->ipv4.mrt = NULL;
+	rtnl_unlock();
 }
 #endif
 
-- 
cgit v1.2.3


From 419df12fb5fa558451319276838c1842f2b11f8f Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 31 Mar 2015 11:01:46 -0700
Subject: net: move fib_rules_unregister() under rtnl lock

We have to hold rtnl lock for fib_rules_unregister()
otherwise the following race could happen:

fib_rules_unregister():	fib_nl_delrule():
...				...
...				ops = lookup_rules_ops();
list_del_rcu(&ops->list);
				list_for_each_entry(ops->rules) {
fib_rules_cleanup_ops(ops);	  ...
  list_del_rcu();		  list_del_rcu();
				}

Note, net->rules_mod_lock is actually not needed at all,
either upper layer netns code or rtnl lock guarantees
we are safe.

Cc: Alexander Duyck <alexander.h.duyck@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c    | 2 +-
 net/decnet/dn_rules.c   | 2 ++
 net/ipv4/fib_frontend.c | 3 +--
 net/ipv4/ipmr.c         | 2 +-
 net/ipv6/fib6_rules.c   | 2 ++
 net/ipv6/ip6mr.c        | 2 +-
 6 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 44706e81b2e0..e4fdc9dfb2c7 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -175,9 +175,9 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 
 	spin_lock(&net->rules_mod_lock);
 	list_del_rcu(&ops->list);
-	fib_rules_cleanup_ops(ops);
 	spin_unlock(&net->rules_mod_lock);
 
+	fib_rules_cleanup_ops(ops);
 	call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index faf7cc3483fe..9d66a0f72f90 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -248,7 +248,9 @@ void __init dn_fib_rules_init(void)
 
 void __exit dn_fib_rules_cleanup(void)
 {
+	rtnl_lock();
 	fib_rules_unregister(dn_fib_rules_ops);
+	rtnl_unlock();
 	rcu_barrier();
 }
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71dd6a9e..23b9b3e86f4c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1111,11 +1111,10 @@ static void ip_fib_net_exit(struct net *net)
 {
 	unsigned int i;
 
+	rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	fib4_rules_exit(net);
 #endif
-
-	rtnl_lock();
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
 		struct fib_table *tb;
 		struct hlist_head *head;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index bc40115bc394..fe54eba6d00d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -283,8 +283,8 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 		list_del(&mrt->list);
 		ipmr_free_table(mrt);
 	}
-	rtnl_unlock();
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
+	rtnl_unlock();
 }
 #else
 #define ipmr_for_each_table(mrt, net) \
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 27ca79682efb..70bc6abc0639 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -322,7 +322,9 @@ out_fib6_rules_ops:
 
 static void __net_exit fib6_rules_net_exit(struct net *net)
 {
+	rtnl_lock();
 	fib_rules_unregister(net->ipv6.fib6_rules_ops);
+	rtnl_unlock();
 }
 
 static struct pernet_operations fib6_rules_net_ops = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 52028f449a89..2f1fd9ffcb34 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -267,8 +267,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 		list_del(&mrt->list);
 		ip6mr_free_table(mrt);
 	}
-	rtnl_unlock();
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
+	rtnl_unlock();
 }
 #else
 #define ip6mr_for_each_table(mrt, net) \
-- 
cgit v1.2.3


From 7ba0c47c34a1ea5bc7a24ca67309996cce0569b5 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 31 Mar 2015 11:01:47 -0700
Subject: ip6mr: call del_timer_sync() in ip6mr_free_table()

We need to wait for the flying timers, since we
are going to free the mrtable right after it.

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2f1fd9ffcb34..312e0ff47339 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -336,7 +336,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 
 static void ip6mr_free_table(struct mr6_table *mrt)
 {
-	del_timer(&mrt->ipmr_expire_timer);
+	del_timer_sync(&mrt->ipmr_expire_timer);
 	mroute_clean_tables(mrt);
 	kfree(mrt);
 }
-- 
cgit v1.2.3


From 6d458f5b4ece8542a5c2281e40008823fec91814 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 3 Apr 2015 12:02:36 +0200
Subject: Revert "netns: don't clear nsid too early on removal"

This reverts
commit 4217291e592d ("netns: don't clear nsid too early on removal").

This is not the right fix, it introduces races.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 5221f975a4cc..cb5290b8c428 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -349,7 +349,7 @@ static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 static void cleanup_net(struct work_struct *work)
 {
 	const struct pernet_operations *ops;
-	struct net *net, *tmp, *peer;
+	struct net *net, *tmp;
 	struct list_head net_kill_list;
 	LIST_HEAD(net_exit_list);
 
@@ -365,6 +365,14 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry(net, &net_kill_list, cleanup_list) {
 		list_del_rcu(&net->list);
 		list_add_tail(&net->exit_list, &net_exit_list);
+		for_each_net(tmp) {
+			int id = __peernet2id(tmp, net, false);
+
+			if (id >= 0)
+				idr_remove(&tmp->netns_ids, id);
+		}
+		idr_destroy(&net->netns_ids);
+
 	}
 	rtnl_unlock();
 
@@ -390,26 +398,12 @@ static void cleanup_net(struct work_struct *work)
 	 */
 	rcu_barrier();
 
-	rtnl_lock();
 	/* Finally it is safe to free my network namespace structure */
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
-		/* Unreference net from all peers (no need to loop over
-		 * net_exit_list because idr_destroy() will be called for each
-		 * element of this list.
-		 */
-		for_each_net(peer) {
-			int id = __peernet2id(peer, net, false);
-
-			if (id >= 0)
-				idr_remove(&peer->netns_ids, id);
-		}
-		idr_destroy(&net->netns_ids);
-
 		list_del_init(&net->exit_list);
 		put_user_ns(net->user_ns);
 		net_drop_ns(net);
 	}
-	rtnl_unlock();
 }
 static DECLARE_WORK(net_cleanup_work, cleanup_net);
 
-- 
cgit v1.2.3


From 576b7cd2f6ff1e90b3fc0a000d2fe74f8a50a4bb Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 3 Apr 2015 12:02:37 +0200
Subject: netns: don't allocate an id for dead netns

First, let's explain the problem.
Suppose you have an ipip interface that stands in the netns foo and its link
part in the netns bar (so the netns bar has an nsid into the netns foo).
Now, you remove the netns bar:
 - the bar nsid into the netns foo is removed
 - the netns exit method of ipip is called, thus our ipip iface is removed:
   => a netlink message is built in the netns foo to advertise this deletion
   => this netlink message requests an nsid for bar, thus a new nsid is
      allocated for bar and never removed.

This patch adds a check in peernet2id() so that an id cannot be allocated for
a netns which is currently destroyed.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cb5290b8c428..70d3450588b2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -198,8 +198,10 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
  */
 int peernet2id(struct net *net, struct net *peer)
 {
-	int id = __peernet2id(net, peer, true);
+	bool alloc = atomic_read(&peer->count) == 0 ? false : true;
+	int id;
 
+	id = __peernet2id(net, peer, alloc);
 	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 }
 EXPORT_SYMBOL(peernet2id);
-- 
cgit v1.2.3


From f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 Mon Sep 17 00:00:00 2001
From: "hannes@stressinduktion.org" <hannes@stressinduktion.org>
Date: Wed, 1 Apr 2015 17:07:44 +0200
Subject: ipv6: protect skb->sk accesses from recursive dereference inside the
 stack

We should not consult skb->sk for output decisions in xmit recursion
levels > 0 in the stack. Otherwise local socket settings could influence
the result of e.g. tunnel encapsulation process.

ipv6 does not conform with this in three places:

1) ip6_fragment: we do consult ipv6_npinfo for frag_size

2) sk_mc_loop in ipv6 uses skb->sk and checks if we should
   loop the packet back to the local socket

3) ip6_skb_dst_mtu could query the settings from the user socket and
   force a wrong MTU

Furthermore:
In sk_mc_loop we could potentially land in WARN_ON(1) if we use a
PF_PACKET socket ontop of an IPv6-backed vxlan device.

Reuse xmit_recursion as we are currently only interested in protecting
tunnel devices.

Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 ++++++
 include/net/ip.h          | 16 ----------------
 include/net/ip6_route.h   |  3 ++-
 include/net/sock.h        |  2 ++
 net/core/dev.c            |  4 +++-
 net/core/sock.c           | 19 +++++++++++++++++++
 net/ipv6/ip6_output.c     |  3 ++-
 7 files changed, 34 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dcf6ec27739b..278738873703 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2185,6 +2185,12 @@ void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
+DECLARE_PER_CPU(int, xmit_recursion);
+static inline int dev_recursion_level(void)
+{
+	return this_cpu_read(xmit_recursion);
+}
+
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
diff --git a/include/net/ip.h b/include/net/ip.h
index 025c61c0dffb..6cc1eafb153a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 
 #endif
 
-static inline int sk_mc_loop(struct sock *sk)
-{
-	if (!sk)
-		return 1;
-	switch (sk->sk_family) {
-	case AF_INET:
-		return inet_sk(sk)->mc_loop;
-#if IS_ENABLED(CONFIG_IPV6)
-	case AF_INET6:
-		return inet6_sk(sk)->mc_loop;
-#endif
-	}
-	WARN_ON(1);
-	return 1;
-}
-
 bool ip_call_ra_chain(struct sk_buff *skb);
 
 /*
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 1d09b46c1e48..eda131d179d9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
-	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+				inet6_sk(skb->sk) : NULL;
 
 	return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
 	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
diff --git a/include/net/sock.h b/include/net/sock.h
index ab186b1d31ff..e4079c28e6b8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1762,6 +1762,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
 
 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
 
+bool sk_mc_loop(struct sock *sk);
+
 static inline bool sk_can_gso(const struct sock *sk)
 {
 	return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
diff --git a/net/core/dev.c b/net/core/dev.c
index 962ee9d71964..45109b70664e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb)
 #define skb_update_prio(skb)
 #endif
 
-static DEFINE_PER_CPU(int, xmit_recursion);
+DEFINE_PER_CPU(int, xmit_recursion);
+EXPORT_SYMBOL(xmit_recursion);
+
 #define RECURSION_LIMIT 10
 
 /**
diff --git a/net/core/sock.c b/net/core/sock.c
index 78e89eb7eb70..71e3e5f1eaa0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
 		sock_reset_flag(sk, bit);
 }
 
+bool sk_mc_loop(struct sock *sk)
+{
+	if (dev_recursion_level())
+		return false;
+	if (!sk)
+		return true;
+	switch (sk->sk_family) {
+	case AF_INET:
+		return inet_sk(sk)->mc_loop;
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		return inet6_sk(sk)->mc_loop;
+#endif
+	}
+	WARN_ON(1);
+	return true;
+}
+EXPORT_SYMBOL(sk_mc_loop);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e80b61b51ff..36cf0ab685a0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -542,7 +542,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
-	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+				inet6_sk(skb->sk) : NULL;
 	struct ipv6hdr *tmp_hdr;
 	struct frag_hdr *fh;
 	unsigned int mtu, hlen, left, len;
-- 
cgit v1.2.3


From 67e04c29ec0daad9ba29341b4dab4b89526994cf Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 3 Apr 2015 13:46:09 -0700
Subject: l2tp: unregister l2tp_net_ops on failure path

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 895348e44c7d..a29a504492af 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1871,6 +1871,7 @@ static int __init l2tp_init(void)
 	l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0);
 	if (!l2tp_wq) {
 		pr_err("alloc_workqueue failed\n");
+		unregister_pernet_device(&l2tp_net_ops);
 		rc = -ENOMEM;
 		goto out;
 	}
-- 
cgit v1.2.3


From 303038135afbd0520d1e241c02592be6e4ea7204 Mon Sep 17 00:00:00 2001
From: Pavel Nakonechny <pavel.nakonechny@skitlab.ru>
Date: Sun, 5 Apr 2015 00:46:21 +0300
Subject: net: dsa: fix filling routing table from OF description

According to description in 'include/net/dsa.h', in cascade switches
configurations where there are more than one interconnected devices,
'rtable' array in 'dsa_chip_data' structure is used to indicate which
port on this switch should be used to send packets to that are destined
for corresponding switch.

However, dsa_of_setup_routing_table() fills 'rtable' with port numbers
of the _target_ switch, but not current one.

This commit removes redundant devicetree parsing and adds needed port
number as a function argument. So dsa_of_setup_routing_table() now just
looks for target switch number by parsing parent of 'link' device node.

To remove possible misunderstandings with the way of determining target
switch number, a corresponding comment was added to the source code and
to the DSA device tree bindings documentation file.

This was tested on a custom board with two Marvell 88E6095 switches with
following corresponding routing tables: { -1, 10 } and { 8, -1 }.

Signed-off-by: Pavel Nakonechny <pavel.nakonechny@skitlab.ru>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/dsa.txt |  4 +++-
 net/dsa/dsa.c                                     | 23 +++++++----------------
 2 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt
index e124847443f8..f0b4cd72411d 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.txt
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt
@@ -19,7 +19,9 @@ the parent DSA node. The maximum number of allowed child nodes is 4
 (DSA_MAX_SWITCHES).
 Each of these switch child nodes should have the following required properties:
 
-- reg			: Describes the switch address on the MII bus
+- reg			: Contains two fields. The first one describes the
+			  address on the MII bus. The second is the switch
+			  number that must be unique in cascaded configurations
 - #address-cells	: Must be 1
 - #size-cells		: Must be 0
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2173402d87e0..4dea2e0681d1 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -501,12 +501,10 @@ static struct net_device *dev_to_net_device(struct device *dev)
 #ifdef CONFIG_OF
 static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
 					struct dsa_chip_data *cd,
-					int chip_index,
+					int chip_index, int port_index,
 					struct device_node *link)
 {
-	int ret;
 	const __be32 *reg;
-	int link_port_addr;
 	int link_sw_addr;
 	struct device_node *parent_sw;
 	int len;
@@ -519,6 +517,10 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
 	if (!reg || (len != sizeof(*reg) * 2))
 		return -EINVAL;
 
+	/*
+	 * Get the destination switch number from the second field of its 'reg'
+	 * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
+	 */
 	link_sw_addr = be32_to_cpup(reg + 1);
 
 	if (link_sw_addr >= pd->nr_chips)
@@ -535,20 +537,9 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
 		memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
 	}
 
-	reg = of_get_property(link, "reg", NULL);
-	if (!reg) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	link_port_addr = be32_to_cpup(reg);
-
-	cd->rtable[link_sw_addr] = link_port_addr;
+	cd->rtable[link_sw_addr] = port_index;
 
 	return 0;
-out:
-	kfree(cd->rtable);
-	return ret;
 }
 
 static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
@@ -658,7 +649,7 @@ static int dsa_of_probe(struct platform_device *pdev)
 			if (!strcmp(port_name, "dsa") && link &&
 					pd->nr_chips > 1) {
 				ret = dsa_of_setup_routing_table(pd, cd,
-						chip_index, link);
+						chip_index, port_index, link);
 				if (ret)
 					goto out_free_chip;
 			}
-- 
cgit v1.2.3