From a39e17b2d842938e19997d2fdc0443fdd4cd8d10 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 27 Nov 2017 12:10:23 -0800
Subject: bpf: offload: add a license header

I forgot to add a license on kernel/bpf/offload.c.  Luckily I'm
still the only author so make it explicitly GPLv2.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/offload.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 68ec884440b7..8455b89d1bbf 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -1,3 +1,18 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
 #include <linux/bpf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bug.h>
-- 
cgit v1.2.3


From 3d7682af228fd78dc46bc6bf40e0268ad04521ec Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Nov 2017 14:25:50 +0000
Subject: rxrpc: Clean up whitespace

Clean up some whitespace from rxrpc.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/call_event.c  | 2 +-
 net/rxrpc/conn_object.c | 2 +-
 net/rxrpc/input.c       | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index bda952ffe6a6..555274ddc514 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -426,7 +426,7 @@ recheck_state:
 	next = call->expect_rx_by;
 
 #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
-	
+
 	set(call->expect_req_by);
 	set(call->expect_term_by);
 	set(call->ack_at);
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1aad04a32d5e..c628351eb900 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -424,7 +424,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
 	if (earliest != now + MAX_JIFFY_OFFSET) {
 		_debug("reschedule reaper %ld", (long)earliest - (long)now);
 		ASSERT(time_after(earliest, now));
-		rxrpc_set_service_reap_timer(rxnet, earliest);		
+		rxrpc_set_service_reap_timer(rxnet, earliest);
 	}
 
 	while (!list_empty(&graveyard)) {
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 23a5e61d8f79..6fc61400337f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -976,7 +976,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
 		rxrpc_reduce_call_timer(call, expect_rx_by, now,
 					rxrpc_timer_set_for_normal);
 	}
-	
+
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_DATA:
 		rxrpc_input_data(call, skb, skew);
@@ -1213,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 				goto reupgrade;
 			conn->service_id = sp->hdr.serviceId;
 		}
-		
+
 		if (sp->hdr.callNumber == 0) {
 			/* Connection-level packet */
 			_debug("CONN %p {%d}", conn, conn->debug_id);
-- 
cgit v1.2.3


From 5fc62f6a139a7b06b027bf442cd4205619506f59 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Nov 2017 14:40:41 +0000
Subject: rxrpc: Fix ACK generation from the connection event processor

Repeat terminal ACKs and now terminal ACKs are now generated from the
connection event processor rather from call handling as this allows us to
discard client call structures as soon as possible and free up the channel
for a follow on call.

However, in ACKs so generated, the additional information trailer is
malformed because the padding that's meant to be in the middle isn't
included in what's transmitted.

Fix it so that the 3 bytes of padding are included in the transmission.

Further, the trailer is misaligned because of the padding, so assigment to
the u16 and u32 fields inside it might cause problems on some arches, so
fix this by breaking the padding and the trailer out of the packed struct.

(This also deals with potential compiler weirdies where some of the nested
structs are packed and some aren't).

The symptoms can be seen in wireshark as terminal DUPLICATE or IDLE ACK
packets in which the Max MTU, Interface MTU and rwind fields have weird
values and the Max Packets field is apparently missing.

Reported-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/conn_event.c | 50 +++++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 9e9a8db1bc9c..4ca11be6be3c 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -30,22 +30,18 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
 	struct rxrpc_channel *chan;
 	struct msghdr msg;
-	struct kvec iov;
+	struct kvec iov[3];
 	struct {
 		struct rxrpc_wire_header whdr;
 		union {
-			struct {
-				__be32 code;
-			} abort;
-			struct {
-				struct rxrpc_ackpacket ack;
-				u8 padding[3];
-				struct rxrpc_ackinfo info;
-			};
+			__be32 abort_code;
+			struct rxrpc_ackpacket ack;
 		};
 	} __attribute__((packed)) pkt;
+	struct rxrpc_ackinfo ack_info;
 	size_t len;
-	u32 serial, mtu, call_id;
+	int ioc;
+	u32 serial, mtu, call_id, padding;
 
 	_enter("%d", conn->debug_id);
 
@@ -66,6 +62,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
 
+	iov[0].iov_base	= &pkt;
+	iov[0].iov_len	= sizeof(pkt.whdr);
+	iov[1].iov_base	= &padding;
+	iov[1].iov_len	= 3;
+	iov[2].iov_base	= &ack_info;
+	iov[2].iov_len	= sizeof(ack_info);
+
 	pkt.whdr.epoch		= htonl(conn->proto.epoch);
 	pkt.whdr.cid		= htonl(conn->proto.cid);
 	pkt.whdr.callNumber	= htonl(call_id);
@@ -80,8 +83,10 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	len = sizeof(pkt.whdr);
 	switch (chan->last_type) {
 	case RXRPC_PACKET_TYPE_ABORT:
-		pkt.abort.code	= htonl(chan->last_abort);
-		len += sizeof(pkt.abort);
+		pkt.abort_code	= htonl(chan->last_abort);
+		iov[0].iov_len += sizeof(pkt.abort_code);
+		len += sizeof(pkt.abort_code);
+		ioc = 1;
 		break;
 
 	case RXRPC_PACKET_TYPE_ACK:
@@ -94,13 +99,19 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 		pkt.ack.serial		= htonl(skb ? sp->hdr.serial : 0);
 		pkt.ack.reason		= skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
 		pkt.ack.nAcks		= 0;
-		pkt.info.rxMTU		= htonl(rxrpc_rx_mtu);
-		pkt.info.maxMTU		= htonl(mtu);
-		pkt.info.rwind		= htonl(rxrpc_rx_window_size);
-		pkt.info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
+		ack_info.rxMTU		= htonl(rxrpc_rx_mtu);
+		ack_info.maxMTU		= htonl(mtu);
+		ack_info.rwind		= htonl(rxrpc_rx_window_size);
+		ack_info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
 		pkt.whdr.flags		|= RXRPC_SLOW_START_OK;
-		len += sizeof(pkt.ack) + sizeof(pkt.info);
+		padding			= 0;
+		iov[0].iov_len += sizeof(pkt.ack);
+		len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+		ioc = 3;
 		break;
+
+	default:
+		return;
 	}
 
 	/* Resync with __rxrpc_disconnect_call() and check that the last call
@@ -110,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	if (READ_ONCE(chan->last_call) != call_id)
 		return;
 
-	iov.iov_base	= &pkt;
-	iov.iov_len	= len;
-
 	serial = atomic_inc_return(&conn->serial);
 	pkt.whdr.serial = htonl(serial);
 
@@ -127,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 		break;
 	}
 
-	kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+	kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
 	_leave("");
 	return;
 }
-- 
cgit v1.2.3


From 282ef4729195c8503f7101d574acfb5e7c8a8209 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Tue, 28 Nov 2017 11:28:52 -0600
Subject: rxrpc: Fix variable overwrite

Values assigned to both variable resend_at and ack_at are overwritten
before they can be used.

The correct fix here is to add 'now' to the previously computed value in
resend_at and ack_at.

Addresses-Coverity-ID: 1462262
Addresses-Coverity-ID: 1462263
Addresses-Coverity-ID: 1462264
Fixes: beb8e5e4f38c ("rxrpc: Express protocol timeouts in terms of RTT")
Link: https://marc.info/?i=17004.1511808959%40warthog.procyon.org.uk
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/call_event.c | 2 +-
 net/rxrpc/sendmsg.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 555274ddc514..ad2ab1103189 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,7 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 		else
 			ack_at = expiry;
 
-		ack_at = jiffies + expiry;
+		ack_at += now;
 		if (time_before(ack_at, call->ack_at)) {
 			WRITE_ONCE(call->ack_at, ack_at);
 			rxrpc_reduce_call_timer(call, ack_at, now,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index a1c53ac066a1..09f2a3e05221 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -233,7 +233,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 		if (resend_at < 1)
 			resend_at = 1;
 
-		resend_at = now + rxrpc_resend_timeout;
+		resend_at += now;
 		WRITE_ONCE(call->resend_at, resend_at);
 		rxrpc_reduce_call_timer(call, resend_at, now,
 					rxrpc_timer_set_for_send);
-- 
cgit v1.2.3


From 9b85c2d4508563f4bb1de0d971ed02fea0d0d757 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 28 Nov 2017 17:44:28 -0800
Subject: tools: bpftool: fix crash on bad parameters with JSON

If bad or unrecognised parameters are specified after JSON output is
requested, `usage()` will try to output null JSON object before the
writer is created.

To prevent this, create the writer as soon as the `--json` option is
parsed.

Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands")
Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d6e4762170a4..14ad54a1c404 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -291,7 +291,15 @@ int main(int argc, char **argv)
 			pretty_output = true;
 			/* fall through */
 		case 'j':
-			json_output = true;
+			if (!json_output) {
+				json_wtr = jsonw_new(stdout);
+				if (!json_wtr) {
+					p_err("failed to create JSON writer");
+					return -1;
+				}
+				json_output = true;
+			}
+			jsonw_pretty(json_wtr, pretty_output);
 			break;
 		case 'f':
 			show_pinned = true;
@@ -306,15 +314,6 @@ int main(int argc, char **argv)
 	if (argc < 0)
 		usage();
 
-	if (json_output) {
-		json_wtr = jsonw_new(stdout);
-		if (!json_wtr) {
-			p_err("failed to create JSON writer");
-			return -1;
-		}
-		jsonw_pretty(json_wtr, pretty_output);
-	}
-
 	bfd_init();
 
 	ret = cmd_select(cmds, argc, argv, do_help);
-- 
cgit v1.2.3


From 7868620a3c15dbc661fb5d849de403ac04624c50 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 28 Nov 2017 17:44:29 -0800
Subject: tools: bpftool: clean up the JSON writer before exiting in usage()

The writer is cleaned at the end of the main function, but not if the
program exits sooner in usage(). Let's keep it clean and destroy the
writer before exiting.

Destruction and actual call to exit() are moved to another function so
that clean exit can also be performed without printing usage() hints.

Fixes: d35efba99d92 ("tools: bpftool: introduce --json and --pretty options")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 10 +++++++++-
 tools/bpf/bpftool/main.h |  3 ++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 14ad54a1c404..d72dd73a4016 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -58,11 +58,19 @@ bool show_pinned;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 
+static void __noreturn clean_and_exit(int i)
+{
+	if (json_output)
+		jsonw_destroy(&json_wtr);
+
+	exit(i);
+}
+
 void usage(void)
 {
 	last_do_help(last_argc - 1, last_argv + 1);
 
-	exit(-1);
+	clean_and_exit(-1);
 }
 
 static int do_help(int argc, char **argv)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9c191e222d6f..0b60ddfb2b93 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -41,6 +41,7 @@
 #include <stdbool.h>
 #include <stdio.h>
 #include <linux/bpf.h>
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/hashtable.h>
 
@@ -80,7 +81,7 @@ void p_info(const char *fmt, ...);
 
 bool is_prefix(const char *pfx, const char *str);
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
-void usage(void) __attribute__((noreturn));
+void usage(void) __noreturn;
 
 struct pinned_obj_table {
 	DECLARE_HASHTABLE(table, 16);
-- 
cgit v1.2.3


From 146882a37da7aa566c7ec088b42c6495d769f2ba Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 28 Nov 2017 17:44:30 -0800
Subject: tools: bpftool: make error message from getopt_long() JSON-friendly

If `getopt_long()` meets an unknown option, it prints its own error
message to standard error output. While this does not strictly break
JSON output, it is the only case bpftool prints something to standard
error output if JSON output is required. All other errors are printed on
standard output as JSON objects, so that an external program does not
have to parse stderr.

This is changed by setting the global variable `opterr` to 0.
Furthermore, p_err() is used to reproduce the error message in a more
JSON-friendly way, so that users still get to know what the erroneous
option is.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d72dd73a4016..d294bc8168be 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -288,6 +288,7 @@ int main(int argc, char **argv)
 	hash_init(prog_table.table);
 	hash_init(map_table.table);
 
+	opterr = 0;
 	while ((opt = getopt_long(argc, argv, "Vhpjf",
 				  options, NULL)) >= 0) {
 		switch (opt) {
@@ -313,7 +314,11 @@ int main(int argc, char **argv)
 			show_pinned = true;
 			break;
 		default:
-			usage();
+			p_err("unrecognized option '%s'", argv[optind - 1]);
+			if (json_output)
+				clean_and_exit(-1);
+			else
+				usage();
 		}
 	}
 
-- 
cgit v1.2.3


From 0d954eeb99eee63964a07a68fcac61e1df8488e7 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 28 Nov 2017 17:44:31 -0800
Subject: tools: bpftool: remove spurious line break from error message

The end-of-line character inside the string would break JSON compliance.
Remove it, `p_err()` already adds a '\n' character for plain output
anyway.

Fixes: 9a5ab8bf1d6d ("tools: bpftool: turn err() and info() macros into functions")
Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 0b60ddfb2b93..bff330b49791 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -51,7 +51,7 @@
 
 #define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
 #define NEXT_ARGP()	({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
-#define BAD_ARG()	({ p_err("what is '%s'?\n", *argv); -1; })
+#define BAD_ARG()	({ p_err("what is '%s'?", *argv); -1; })
 
 #define ERR_MAX_LEN	1024
 
-- 
cgit v1.2.3


From 507e590da398a0e3438d563b5e736c3f2a7749d7 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 28 Nov 2017 17:44:32 -0800
Subject: tools: bpftool: unify installation directories

Programs and documentation not managed by package manager are generally
installed under /usr/local/, instead of the user's home directory. In
particular, `man` is generally able to find manual pages under
`/usr/local/share/man`.

bpftool generally follows perf's example, and perf installs to home
directory. However bpftool requires root credentials, so it seems
sensible to follow the more common convention of installing files under
/usr/local instead. So, make /usr/local the default prefix for
installing the binary with `make install`, and the documentation with
`make doc-install`. Also, create /usr/local/sbin if it does not exist.

Note that the bash-completion file, however, is still installed under
/usr/share/bash-completion/completions, as the default setup for bash
does not attempt to load completion files under /usr/local/.

Reported-by: David Beckett <david.beckett@netronome.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/Makefile | 2 +-
 tools/bpf/bpftool/Makefile               | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index bde77d7c4390..37292bb5ce60 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -6,7 +6,7 @@ RM ?= rm -f
 
 # Make the path relative to DESTDIR, not prefix
 ifndef DESTDIR
-prefix?=$(HOME)
+prefix ?= /usr/local
 endif
 mandir ?= $(prefix)/share/man
 man8dir = $(mandir)/man8
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 813826c50936..c5b21f2cbca5 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -45,8 +45,8 @@ $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
 
-prefix = /usr
-bash_compdir ?= $(prefix)/share/bash-completion/completions
+prefix = /usr/local
+bash_compdir ?= /usr/share/bash-completion/completions
 
 CC = gcc
 
@@ -76,6 +76,7 @@ clean: $(LIBBPF)-clean
 	$(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
 
 install:
+	install -m 0755 -d $(prefix)/sbin
 	install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
 	install -m 0755 -d $(bash_compdir)
 	install -m 0644 bash-completion/bpftool $(bash_compdir)
-- 
cgit v1.2.3


From ad3cda064402b69148faf5f7cd1ac8c2eee52645 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 28 Nov 2017 17:44:33 -0800
Subject: tools: bpftool: declare phony targets as such

In the Makefile, targets install, doc and doc-install should be added to
.PHONY. Let's fix this.

Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c5b21f2cbca5..ec3052c0b004 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -89,5 +89,5 @@ doc-install:
 
 FORCE:
 
-.PHONY: all clean FORCE
+.PHONY: all clean FORCE install doc doc-install
 .DEFAULT_GOAL := all
-- 
cgit v1.2.3


From 23721a755f98ac846897a013c92cccb281c1bcc8 Mon Sep 17 00:00:00 2001
From: Xie XiuQi <xiexiuqi@huawei.com>
Date: Thu, 30 Nov 2017 09:41:29 +0800
Subject: trace/xdp: fix compile warning: 'struct bpf_map' declared inside
 parameter list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We meet this compile warning, which caused by missing bpf.h in xdp.h.

In file included from ./include/trace/events/xdp.h:10:0,
                 from ./include/linux/bpf_trace.h:6,
                 from drivers/net/ethernet/intel/i40e/i40e_txrx.c:29:
./include/trace/events/xdp.h:93:17: warning: ‘struct bpf_map’ declared inside parameter list will not be visible outside of this definition or declaration
    const struct bpf_map *map, u32 map_index),
                 ^
./include/linux/tracepoint.h:187:34: note: in definition of macro ‘__DECLARE_TRACE’
  static inline void trace_##name(proto)    \
                                  ^~~~~
./include/linux/tracepoint.h:352:24: note: in expansion of macro ‘PARAMS’
  __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),  \
                        ^~~~~~
./include/linux/tracepoint.h:477:2: note: in expansion of macro ‘DECLARE_TRACE’
  DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
  ^~~~~~~~~~~~~
./include/linux/tracepoint.h:477:22: note: in expansion of macro ‘PARAMS’
  DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
                      ^~~~~~
./include/trace/events/xdp.h:89:1: note: in expansion of macro ‘DEFINE_EVENT’
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
 ^~~~~~~~~~~~
./include/trace/events/xdp.h:90:2: note: in expansion of macro ‘TP_PROTO’
  TP_PROTO(const struct net_device *dev,
  ^~~~~~~~
./include/trace/events/xdp.h:93:17: warning: ‘struct bpf_map’ declared inside parameter list will not be visible outside of this definition or declaration
    const struct bpf_map *map, u32 map_index),
                 ^
./include/linux/tracepoint.h:203:38: note: in definition of macro ‘__DECLARE_TRACE’
  register_trace_##name(void (*probe)(data_proto), void *data) \
                                      ^~~~~~~~~~
./include/linux/tracepoint.h:354:4: note: in expansion of macro ‘PARAMS’
    PARAMS(void *__data, proto),   \
    ^~~~~~

Reported-by: Huang Daode <huangdaode@hisilicon.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Fixes: 8d3b778ff544 ("xdp: tracepoint xdp_redirect also need a map argument")
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/trace/events/xdp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 4cd0f05d0113..8989a92c571a 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -8,6 +8,7 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/tracepoint.h>
+#include <linux/bpf.h>
 
 #define __XDP_ACT_MAP(FN)	\
 	FN(ABORTED)		\
-- 
cgit v1.2.3


From a154f8e399a063137fc42b961f437248d55ece29 Mon Sep 17 00:00:00 2001
From: Yan Markman <ymarkman@marvell.com>
Date: Thu, 30 Nov 2017 10:49:46 +0100
Subject: net: mvpp2: allocate zeroed tx descriptors

Reserved and unused fields in the Tx descriptors should be 0. The PPv2
driver doesn't clear them at run-time (for performance reasons) but
these descriptors aren't zeroed when allocated, which can lead to
unpredictable behaviors. This patch fixes this by using
dma_zalloc_coherent instead of dma_alloc_coherent.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Yan Markman <ymarkman@marvell.com>
[Antoine: commit message]
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index d83a78be98a2..fed2b2f909fc 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5598,7 +5598,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
 	u32 txq_dma;
 
 	/* Allocate memory for TX descriptors */
-	aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
+	aggr_txq->descs = dma_zalloc_coherent(&pdev->dev,
 				MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
 				&aggr_txq->descs_dma, GFP_KERNEL);
 	if (!aggr_txq->descs)
-- 
cgit v1.2.3


From f8821f96ae97260d68228fe53f81848b2ede44d7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Nov 2017 14:33:56 +0100
Subject: skbuff: Grammar s/are can/can/, s/change/changes/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bc486ef23f20..a38c80e9f91e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1406,8 +1406,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb)
 }
 
 /*
- * If users == 1, we are the only owner and are can avoid redundant
- * atomic change.
+ * If users == 1, we are the only owner and can avoid redundant atomic changes.
  */
 
 /**
-- 
cgit v1.2.3


From 1c08ac0c4bd8e9d66c4dde29bc496c3b430dd028 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Tue, 28 Nov 2017 17:48:22 +0100
Subject: net: stmmac: dwmac-sun8i: fix allwinner,leds-active-low handling

The driver expect "allwinner,leds-active-low" to be in PHY node, but
the binding doc expect it to be in MAC node.

Since all board DT use it also in MAC node, the driver need to search
allwinner,leds-active-low in MAC node.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index e5ff734d4f9b..9eb7f65d8000 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -808,8 +808,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 			 val, reg);
 
 	if (gmac->variant->soc_has_internal_phy) {
-		if (of_property_read_bool(priv->plat->phy_node,
-					  "allwinner,leds-active-low"))
+		if (of_property_read_bool(node, "allwinner,leds-active-low"))
 			reg |= H3_EPHY_LED_POL;
 		else
 			reg &= ~H3_EPHY_LED_POL;
-- 
cgit v1.2.3


From 90a6ec85351b31449c2c6b5406b5396ac96f191d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 29 Nov 2017 16:07:51 -0800
Subject: act_sample: get rid of tcf_sample_cleanup_rcu()

Similar to commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu"),
TC actions don't need to respect RCU grace period, because it
is either just detached from tc filter (standalone case) or
it is removed together with tc filter (bound case) in which case
RCU grace period is already respected at filter layer.

Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_sample.h |  1 -
 net/sched/act_sample.c         | 14 +++-----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 524cee4f4c81..01dbfea32672 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -14,7 +14,6 @@ struct tcf_sample {
 	struct psample_group __rcu *psample_group;
 	u32 psample_group_num;
 	struct list_head tcfm_list;
-	struct rcu_head rcu;
 };
 #define to_sample(a) ((struct tcf_sample *)a)
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 8b5abcd2f32f..9438969290a6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	return ret;
 }
 
-static void tcf_sample_cleanup_rcu(struct rcu_head *rcu)
+static void tcf_sample_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu);
+	struct tcf_sample *s = to_sample(a);
 	struct psample_group *psample_group;
 
-	psample_group = rcu_dereference_protected(s->psample_group, 1);
+	psample_group = rtnl_dereference(s->psample_group);
 	RCU_INIT_POINTER(s->psample_group, NULL);
 	psample_group_put(psample_group);
 }
 
-static void tcf_sample_cleanup(struct tc_action *a, int bind)
-{
-	struct tcf_sample *s = to_sample(a);
-
-	call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
-}
-
 static bool tcf_sample_dev_ok_push(struct net_device *dev)
 {
 	switch (dev->type) {
@@ -264,7 +257,6 @@ static int __init sample_init_module(void)
 
 static void __exit sample_cleanup_module(void)
 {
-	rcu_barrier();
 	tcf_unregister_action(&act_sample_ops, &sample_net_ops);
 }
 
-- 
cgit v1.2.3


From 3016dad75b48279e579117ee3ed566ba90a3b023 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 29 Nov 2017 17:43:57 -0800
Subject: tcp: remove buggy call to tcp_v6_restore_cb()

tcp_v6_send_reset() expects to receive an skb with skb->cb[] layout as
used in TCP stack.
MD5 lookup uses tcp_v6_iif() and tcp_v6_sdif() and thus
TCP_SKB_CB(skb)->header.h6

This patch probably fixes RST packets sent on behalf of a timewait md5
ipv6 socket.

Before Florian patch, tcp_v6_restore_cb() was needed before jumping to
no_tcp_socket label.

Fixes: 271c3b9b7bda ("tcp: honour SO_BINDTODEVICE for TW_RST case too")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6bb98c93edfe..be11dc13aa70 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1590,7 +1590,6 @@ do_time_wait:
 		tcp_v6_timewait_ack(sk, skb);
 		break;
 	case TCP_TW_RST:
-		tcp_v6_restore_cb(skb);
 		tcp_v6_send_reset(sk, skb);
 		inet_twsk_deschedule_put(inet_twsk(sk));
 		goto discard_it;
-- 
cgit v1.2.3


From f859b4af1c52493ec21173ccc73d0b60029b5b88 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 30 Nov 2017 10:41:14 +0800
Subject: sit: update frag_off info

After parsing the sit netlink change info, we forget to update frag_off in
ipip6_tunnel_update(). Fix it by assigning frag_off with new value.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d60ddcb0bfe2..d7dc23c1b2ca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
 	ipip6_tunnel_link(sitn, t);
 	t->parms.iph.ttl = p->iph.ttl;
 	t->parms.iph.tos = p->iph.tos;
+	t->parms.iph.frag_off = p->iph.frag_off;
 	if (t->parms.link != p->link || t->fwmark != fwmark) {
 		t->parms.link = p->link;
 		t->fwmark = fwmark;
-- 
cgit v1.2.3


From 2b279419567105d63f1e524bb1ac34ae8f918e5d Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 30 Nov 2017 08:52:42 -0800
Subject: tools/bpf: adjust rlimit RLIMIT_MEMLOCK for test_verifier_log

The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases,
e.g. in a test machine mimicking our production system, this test may
fail due to unable to charge the required memory for prog load:
  # ./test_verifier_log
  Test log_level 0...
  ERROR: Program load returned: ret:-1/errno:1, expected ret:-1/errno:22

Changing the default rlimit RLIMIT_MEMLOCK to unlimited makes
the test always pass.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier_log.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index 3cc0b561489e..e9626cf5607a 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -3,6 +3,8 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -131,11 +133,16 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
+	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	char full_log[LOG_SIZE];
 	char log[LOG_SIZE];
 	size_t want_len;
 	int i;
 
+	/* allow unlimited locked memory to have more consistent error code */
+	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
+		perror("Unable to lift memlock rlimit");
+
 	memset(log, 1, LOG_SIZE);
 
 	/* Test incorrect attr */
-- 
cgit v1.2.3


From 6fef90c6b3f6a2b52018e66c0886944ea0c03fcc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 30 Nov 2017 10:45:26 -0800
Subject: net: dsa: bcm_sf2: Set correct CHAIN_ID and slice number mask

When configuring an IPv6 address mask, we should use SLICE_NUM_MASK as
the mask in order to make sure all bits are masked by the hardware.
Also, we want matching entries to have a CHAIN_ID value set to the same
value as the rule index we return to user-space for convenience, so fix
that too.

Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules")
Fixes: dd8eff68343d ("net: dsa: bcm_sf2: Allow matching arbitrary IPv6 masks/lengths")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2_cfp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index b721a2009b50..23b45da784cb 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -625,7 +625,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
 	bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc,
 				slice_num, false);
 	bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6src, v6_m_spec->psrc,
-				slice_num, true);
+				SLICE_NUM_MASK, true);
 
 	/* Insert into TCAM now because we need to insert a second rule */
 	bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]);
@@ -699,7 +699,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
 	/* Insert into Action and policer RAMs now, set chain ID to
 	 * the one we are chained to
 	 */
-	ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num,
+	ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num,
 				      queue_num, true);
 	if (ret)
 		goto out_err;
-- 
cgit v1.2.3


From c8c088ba0edf65044c254b96fc438c91914aaab0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 30 Nov 2017 13:47:54 -0800
Subject: bpf: set maximum number of attached progs to 64 for a single perf tp

cgropu+bpf prog array has a maximum number of 64 programs.
Let us apply the same limit here.

Fixes: e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/core.c        | 3 ++-
 kernel/trace/bpf_trace.c | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b9f8686a84cf..86b50aa26ee8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1447,7 +1447,8 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
 	rcu_read_lock();
 	prog = rcu_dereference(progs)->progs;
 	for (; *prog; prog++)
-		cnt++;
+		if (*prog != &dummy_bpf_prog.prog)
+			cnt++;
 	rcu_read_unlock();
 	return cnt;
 }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 27d1f4ffa3de..0ce99c379c30 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -759,6 +759,8 @@ const struct bpf_prog_ops perf_event_prog_ops = {
 
 static DEFINE_MUTEX(bpf_event_mutex);
 
+#define BPF_TRACE_MAX_PROGS 64
+
 int perf_event_attach_bpf_prog(struct perf_event *event,
 			       struct bpf_prog *prog)
 {
@@ -772,6 +774,12 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
 		goto unlock;
 
 	old_array = event->tp_event->prog_array;
+	if (old_array &&
+	    bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
+		ret = -E2BIG;
+		goto unlock;
+	}
+
 	ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
 	if (ret < 0)
 		goto unlock;
-- 
cgit v1.2.3


From 0ec9552b43b98deb882bf48efd347be4bd7afc9f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 30 Nov 2017 13:47:55 -0800
Subject: samples/bpf: add error checking for perf ioctl calls in bpf loader

load_bpf_file() should fail if ioctl with command
PERF_EVENT_IOC_ENABLE and PERF_EVENT_IOC_SET_BPF fails.
When they do fail, proper error messages are printed.

With this change, the below "syscall_tp" run shows that
the maximum number of bpf progs attaching to the same
perf tracepoint is indeed enforced.
  $ ./syscall_tp -i 64
  prog #0: map ids 4 5
  ...
  prog #63: map ids 382 383
  $ ./syscall_tp -i 65
  prog #0: map ids 4 5
  ...
  prog #64: map ids 388 389
  ioctl PERF_EVENT_IOC_SET_BPF failed err Argument list too long

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_load.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 522ca9252d6c..242631aa4ea2 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -193,8 +193,18 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		return -1;
 	}
 	event_fd[prog_cnt - 1] = efd;
-	ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
-	ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+	err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+	if (err < 0) {
+		printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
+		       strerror(errno));
+		return -1;
+	}
+	err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+	if (err < 0) {
+		printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
+		       strerror(errno));
+		return -1;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 435019b48033138581a6171093b181fc6b4d3d30 Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <jimmyassarsson@gmail.com>
Date: Tue, 21 Nov 2017 08:22:26 +0100
Subject: can: kvaser_usb: free buf in error paths

The allocated buffer was not freed if usb_submit_urb() failed.

Signed-off-by: Jimmy Assarsson <jimmyassarsson@gmail.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 9b18d96ef526..075644591498 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -813,6 +813,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
 	if (err) {
 		netdev_err(netdev, "Error transmitting URB\n");
 		usb_unanchor_urb(urb);
+		kfree(buf);
 		usb_free_urb(urb);
 		return err;
 	}
@@ -1768,6 +1769,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 		spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
 
 		usb_unanchor_urb(urb);
+		kfree(buf);
 
 		stats->tx_dropped++;
 
-- 
cgit v1.2.3


From e84f44eb5523401faeb9cc1c97895b68e3cfb78d Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <jimmyassarsson@gmail.com>
Date: Tue, 21 Nov 2017 08:22:27 +0100
Subject: can: kvaser_usb: Fix comparison bug in
 kvaser_usb_read_bulk_callback()

The conditon in the while-loop becomes true when actual_length is less than
2 (MSG_HEADER_LEN). In best case we end up with a former, already
dispatched msg, that got msg->len greater than actual_length. This will
result in a "Format error" error printout.

Problem seen when unplugging a Kvaser USB device connected to a vbox guest.

warning: comparison between signed and unsigned integer expressions
[-Wsign-compare]

Signed-off-by: Jimmy Assarsson <jimmyassarsson@gmail.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 075644591498..d87e330a20b3 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -1334,7 +1334,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
 		goto resubmit_urb;
 	}
 
-	while (pos <= urb->actual_length - MSG_HEADER_LEN) {
+	while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) {
 		msg = urb->transfer_buffer + pos;
 
 		/* The Kvaser firmware can only read and write messages that
-- 
cgit v1.2.3


From 8bd13bd522ff7dfa0eb371921aeb417155f7a3be Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <jimmyassarsson@gmail.com>
Date: Tue, 21 Nov 2017 08:22:28 +0100
Subject: can: kvaser_usb: ratelimit errors if incomplete messages are received

Avoid flooding the kernel log with "Formate error", if incomplete message
are received.

Signed-off-by: Jimmy Assarsson <jimmyassarsson@gmail.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index d87e330a20b3..f95945915d20 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -609,8 +609,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id,
 			}
 
 			if (pos + tmp->len > actual_len) {
-				dev_err(dev->udev->dev.parent,
-					"Format error\n");
+				dev_err_ratelimited(dev->udev->dev.parent,
+						    "Format error\n");
 				break;
 			}
 
@@ -1353,7 +1353,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
 		}
 
 		if (pos + msg->len > urb->actual_length) {
-			dev_err(dev->udev->dev.parent, "Format error\n");
+			dev_err_ratelimited(dev->udev->dev.parent,
+					    "Format error\n");
 			break;
 		}
 
-- 
cgit v1.2.3


From f6c23b174c3c96616514827407769cbcfc8005cf Mon Sep 17 00:00:00 2001
From: Oliver Stäbler <oliver.staebler@bytesatwork.ch>
Date: Mon, 20 Nov 2017 14:45:15 +0100
Subject: can: ti_hecc: Fix napi poll return value for repoll
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit d75b1ade567f ("net: less interrupt masking in NAPI") napi
repoll is done only when work_done == budget.
So we need to return budget if there are still packets to receive.

Signed-off-by: Oliver Stäbler <oliver.staebler@bytesatwork.ch>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/ti_hecc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 4d4941469cfc..db6ea936dc3f 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -637,6 +637,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota)
 		mbx_mask = hecc_read(priv, HECC_CANMIM);
 		mbx_mask |= HECC_TX_MBOX_MASK;
 		hecc_write(priv, HECC_CANMIM, mbx_mask);
+	} else {
+		/* repoll is done only if whole budget is used */
+		num_pkts = quota;
 	}
 
 	return num_pkts;
-- 
cgit v1.2.3


From 5c2cb02edf79ad79d9b8d07c6d52243a948c4c9f Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Thu, 23 Nov 2017 15:44:35 +0100
Subject: can: peak/pci: fix potential bug when probe() fails

PCI/PCIe drivers for PEAK-System CAN/CAN-FD interfaces do some access to the
PCI config during probing. In case one of these accesses fails, a POSITIVE
PCIBIOS_xxx error code is returned back. This POSITIVE error code MUST be
converted into a NEGATIVE errno for the probe() function to indicate it
failed. Using the pcibios_err_to_errno() function, we make sure that the
return code will always be negative.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/peak_canfd/peak_pciefd_main.c | 5 ++++-
 drivers/net/can/sja1000/peak_pci.c            | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index b4efd711f824..788c3464a3b0 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -825,7 +825,10 @@ err_release_regions:
 err_disable_pci:
 	pci_disable_device(pdev);
 
-	return err;
+	/* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while
+	 * the probe() function must return a negative errno in case of failure
+	 * (err is unchanged if negative) */
+	return pcibios_err_to_errno(err);
 }
 
 /* free the board structure object, as well as its resources: */
diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c
index 131026fbc2d7..5adc95c922ee 100644
--- a/drivers/net/can/sja1000/peak_pci.c
+++ b/drivers/net/can/sja1000/peak_pci.c
@@ -717,7 +717,10 @@ failure_release_regions:
 failure_disable_pci:
 	pci_disable_device(pdev);
 
-	return err;
+	/* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while
+	 * the probe() function must return a negative errno in case of failure
+	 * (err is unchanged if negative) */
+	return pcibios_err_to_errno(err);
 }
 
 static void peak_pci_remove(struct pci_dev *pdev)
-- 
cgit v1.2.3


From 658f534c036352a06584c8b2e71a547915173ba9 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 22 Nov 2017 13:01:08 +0100
Subject: can: flexcan: Update IRQ Err Passive information

The flexcan IP cores used on MX25 and MX35 do not generate Error Passive
IRQs. Update the IP core overview table in the driver accordingly.

Suggested-by: ZHU Yi (ST-FIR/ENG1-Zhu) <Yi.Zhu5@cn.bosch.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index a13a4896a8bd..eefddae2e99a 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -184,9 +184,9 @@
  * Below is some version info we got:
  *    SOC   Version   IP-Version  Glitch- [TR]WRN_INT IRQ Err Memory err RTR re-
  *                                Filter? connected?  Passive detection  ception in MB
- *   MX25  FlexCAN2  03.00.00.00     no        no         ?       no        no
+ *   MX25  FlexCAN2  03.00.00.00     no        no        no       no        no
  *   MX28  FlexCAN2  03.00.04.00    yes       yes        no       no        no
- *   MX35  FlexCAN2  03.00.00.00     no        no         ?       no        no
+ *   MX35  FlexCAN2  03.00.00.00     no        no        no       no        no
  *   MX53  FlexCAN2  03.00.00.00    yes        no        no       no        no
  *   MX6s  FlexCAN3  10.00.12.00    yes       yes        no       no       yes
  *   VF610 FlexCAN3  ?               no       yes         ?      yes       yes?
-- 
cgit v1.2.3


From 29c64b17a0bc72232acc45e9533221d88a262efb Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 27 Nov 2017 09:18:21 +0100
Subject: can: flexcan: fix VF610 state transition issue

Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for VF610 to report correct state
transitions.

Tested-by: Mirza Krak <mirza.krak@gmail.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v4.11
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index eefddae2e99a..0626dcfd1f3d 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -189,7 +189,7 @@
  *   MX35  FlexCAN2  03.00.00.00     no        no        no       no        no
  *   MX53  FlexCAN2  03.00.00.00    yes        no        no       no        no
  *   MX6s  FlexCAN3  10.00.12.00    yes       yes        no       no       yes
- *   VF610 FlexCAN3  ?               no       yes         ?      yes       yes?
+ *   VF610 FlexCAN3  ?               no       yes        no      yes       yes?
  *
  * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
  */
@@ -297,7 +297,8 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
 
 static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
 	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
-		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
+		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
+		FLEXCAN_QUIRK_BROKEN_PERR_STATE,
 };
 
 static const struct can_bittiming_const flexcan_bittiming_const = {
-- 
cgit v1.2.3


From 3fed8dbbc42c0639ac03d2361ab5f0606dd49e28 Mon Sep 17 00:00:00 2001
From: Martin Kelly <mkelly@xevo.com>
Date: Mon, 27 Nov 2017 15:49:15 -0800
Subject: can: mcba_usb: fix typo

Fix typo "analizer" --> "Analyzer".

Signed-off-by: Martin Kelly <mkelly@xevo.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 7f0272558bef..c4355f0a20d5 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -862,7 +862,7 @@ static int mcba_usb_probe(struct usb_interface *intf,
 		goto cleanup_unregister_candev;
 	}
 
-	dev_info(&intf->dev, "Microchip CAN BUS analizer connected\n");
+	dev_info(&intf->dev, "Microchip CAN BUS Analyzer connected\n");
 
 	return 0;
 
-- 
cgit v1.2.3


From 1cb35a33a28394fd711bb26ddf3a564f4e9d9125 Mon Sep 17 00:00:00 2001
From: Martin Kelly <mkelly@xevo.com>
Date: Mon, 27 Nov 2017 15:49:16 -0800
Subject: can: mcba_usb: fix device disconnect bug

Currently, when you disconnect the device, the driver infinitely
resubmits all URBs, so you see:

Rx URB aborted (-32)

in an infinite loop.

Fix this by catching -EPIPE (what we get in urb->status when the device
disconnects) and not resubmitting.

With this patch, I can plug and unplug many times and the driver
recovers correctly.

Signed-off-by: Martin Kelly <mkelly@xevo.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index c4355f0a20d5..ef417dcddbf7 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -592,6 +592,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb)
 		break;
 
 	case -ENOENT:
+	case -EPIPE:
 	case -ESHUTDOWN:
 		return;
 
-- 
cgit v1.2.3


From d30fc5126efb0c33b7adf5966d3051db2c3d7721 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 25 Nov 2017 21:18:34 +0800
Subject: sctp: only update outstanding_bytes for transmitted queue when doing
 prsctp_prune

Now outstanding_bytes is only increased when appending chunks into one
packet and sending it at 1st time, while decreased when it is about to
move into retransmit queue. It means outstanding_bytes value is already
decreased for all chunks in retransmit queue.

However sctp_prsctp_prune_sent is a common function to check the chunks
in both transmitted and retransmit queue, it decrease outstanding_bytes
when moving a chunk into abandoned queue from either of them.

It could cause outstanding_bytes underflow, as it also decreases it's
value for the chunks in retransmit queue.

This patch fixes it by only updating outstanding_bytes for transmitted
queue when pruning queues for prsctp prio policy, the same fix is also
needed in sctp_check_transmitted.

Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4db012aa25f7..7029f8b99063 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -377,7 +377,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 		streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 
-		if (!chk->tsn_gap_acked) {
+		if (queue != &asoc->outqueue.retransmit &&
+		    !chk->tsn_gap_acked) {
 			if (chk->transport)
 				chk->transport->flight_size -=
 						sctp_data_size(chk);
@@ -1434,7 +1435,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			/* If this chunk has not been acked, stop
 			 * considering it as 'outstanding'.
 			 */
-			if (!tchunk->tsn_gap_acked) {
+			if (transmitted_queue != &q->retransmit &&
+			    !tchunk->tsn_gap_acked) {
 				if (tchunk->transport)
 					tchunk->transport->flight_size -=
 							sctp_data_size(tchunk);
-- 
cgit v1.2.3


From e5f612969c6f965e3bd1158598e0a3b1c4f389b9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 25 Nov 2017 21:18:35 +0800
Subject: sctp: abandon the whole msg if one part of a fragmented message is
 abandoned

As rfc3758#section-3.1 demands:

   A3) When a TSN is "abandoned", if it is part of a fragmented message,
       all other TSN's within that fragmented message MUST be abandoned
       at the same time.

Besides, if it couldn't handle this, the rest frags would never get
assembled in peer side.

This patch supports it by adding abandoned flag in sctp_datamsg, when
one chunk is being abandoned, set chunk->msg->abandoned as well. Next
time when checking for abandoned, go checking chunk->msg->abandoned
first.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 ++-
 net/sctp/chunk.c           |  7 +++++++
 net/sctp/outqueue.c        | 12 ++++++++----
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 16f949eef52f..2f8f93da5dc2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -503,7 +503,8 @@ struct sctp_datamsg {
 	/* Did the messenge fail to send? */
 	int send_error;
 	u8 send_failed:1,
-	   can_delay;	    /* should this message be Nagle delayed */
+	   can_delay:1,	/* should this message be Nagle delayed */
+	   abandoned:1;	/* should this message be abandoned */
 };
 
 struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7b261afc47b9..9213805b558d 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 	msg->send_failed = 0;
 	msg->send_error = 0;
 	msg->can_delay = 1;
+	msg->abandoned = 0;
 	msg->expires_at = 0;
 	INIT_LIST_HEAD(&msg->chunks);
 }
@@ -304,6 +305,9 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	if (!chunk->asoc->peer.prsctp_capable)
 		return 0;
 
+	if (chunk->msg->abandoned)
+		return 1;
+
 	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
 	    time_after(jiffies, chunk->msg->expires_at)) {
 		struct sctp_stream_out *streamout =
@@ -316,6 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 			chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
 			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
 		}
+		chunk->msg->abandoned = 1;
 		return 1;
 	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
 		   chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
@@ -324,10 +329,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 
 		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+		chunk->msg->abandoned = 1;
 		return 1;
 	} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
 		   chunk->msg->expires_at &&
 		   time_after(jiffies, chunk->msg->expires_at)) {
+		chunk->msg->abandoned = 1;
 		return 1;
 	}
 	/* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7029f8b99063..4ab164b5aad0 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 	list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
 		struct sctp_stream_out *streamout;
 
-		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+		if (!chk->msg->abandoned &&
+		    (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		     chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
 			continue;
 
+		chk->msg->abandoned = 1;
 		list_del_init(&chk->transmitted_list);
 		sctp_insert_list(&asoc->outqueue.abandoned,
 				 &chk->transmitted_list);
@@ -404,10 +406,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 	q->sched->unsched_all(&asoc->stream);
 
 	list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
-		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+		if (!chk->msg->abandoned &&
+		    (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		     chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
 			continue;
 
+		chk->msg->abandoned = 1;
 		sctp_sched_dequeue_common(q, chk);
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
-- 
cgit v1.2.3


From 779edd7348878a7376c0e3d0f96485c30b5f1b7d Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 25 Nov 2017 21:18:36 +0800
Subject: sctp: do not abandon the other frags in unsent outq if one msg has
 outstanding frags

Now for the abandoned chunks in unsent outq, it would just free the chunks.
Because no tsn is assigned to them yet, there's no need to send fwd tsn to
peer, unlike for the abandoned chunks in sent outq.

The problem is when parts of the msg have been sent and the other frags
are still in unsent outq, if they are abandoned/dropped, the peer would
never get this msg reassembled.

So these frags in unsent outq can't be dropped if this msg already has
outstanding frags.

This patch does the check in sctp_chunk_abandoned and
sctp_prsctp_prune_unsent.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/chunk.c    | 4 ++++
 net/sctp/outqueue.c | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 9213805b558d..7f8baa48e7c2 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -308,6 +308,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	if (chunk->msg->abandoned)
 		return 1;
 
+	if (!chunk->has_tsn &&
+	    !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG))
+		return 0;
+
 	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
 	    time_after(jiffies, chunk->msg->expires_at)) {
 		struct sctp_stream_out *streamout =
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4ab164b5aad0..7d67feeeffc1 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -407,7 +407,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 
 	list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
 		if (!chk->msg->abandoned &&
-		    (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+		    (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) ||
+		     !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
 		     chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
 			continue;
 
-- 
cgit v1.2.3


From cfac7f836a715b91f08c851df915d401a4d52783 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 1 Dec 2017 10:06:56 -0800
Subject: tcp/dccp: block bh before arming time_wait timer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maciej Żenczykowski reported some panics in tcp_twsk_destructor()
that might be caused by the following bug.

timewait timer is pinned to the cpu, because we want to transition
timwewait refcount from 0 to 4 in one go, once everything has been
initialized.

At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer
handling") was merged, TCP was always running from BH habdler.

After commit 5413d1babe8f ("net: do not block BH while processing
socket backlog") we definitely can run tcp_time_wait() from process
context.

We need to block BH in the critical section so that the pinned timer
has still its purpose.

This bug is more likely to happen under stress and when very small RTO
are used in datacenter flows.

Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Acked-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/minisocks.c     | 6 ++++++
 net/ipv4/tcp_minisocks.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index abd07a443219..178bb9833311 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 		if (state == DCCP_TIME_WAIT)
 			timeo = DCCP_TIMEWAIT_LEN;
 
+		/* tw_timer is pinned, so we need to make sure BH are disabled
+		 * in following section, otherwise timer handler could run before
+		 * we complete the initialization.
+		 */
+		local_bh_disable();
 		inet_twsk_schedule(tw, timeo);
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
 		inet_twsk_put(tw);
+		local_bh_enable();
 	} else {
 		/* Sorry, if we're out of memory, just CLOSE this
 		 * socket up.  We've got bigger problems than
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e36eff0403f4..b079b619b60c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		if (state == TCP_TIME_WAIT)
 			timeo = TCP_TIMEWAIT_LEN;
 
+		/* tw_timer is pinned, so we need to make sure BH are disabled
+		 * in following section, otherwise timer handler could run before
+		 * we complete the initialization.
+		 */
+		local_bh_disable();
 		inet_twsk_schedule(tw, timeo);
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 		inet_twsk_put(tw);
+		local_bh_enable();
 	} else {
 		/* Sorry, if we're out of memory, just CLOSE this
 		 * socket up.  We've got bigger problems than
-- 
cgit v1.2.3


From c7799c067c2ae33e348508c8afec354f3257ff25 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Wed, 29 Nov 2017 12:48:42 +0200
Subject: tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()

Remove the second tipc_rcv() call in tipc_udp_recv(). We have just
checked that the bearer is not up, and calling tipc_rcv() with a bearer
that is not up leads to a TIPC div-by-zero crash in
tipc_node_calculate_timer(). The crash is rare in practice, but can
happen like this:

  We're enabling a bearer, but it's not yet up and fully initialized.
  At the same time we receive a discovery packet, and in tipc_udp_recv()
  we end up calling tipc_rcv() with the not-yet-initialized bearer,
  causing later the div-by-zero crash in tipc_node_calculate_timer().

Jon Maloy explains the impact of removing the second tipc_rcv() call:
  "link setup in the worst case will be delayed until the next arriving
   discovery messages, 1 sec later, and this is an acceptable delay."

As the tipc_rcv() call is removed, just leave the function via the
rcu_out label, so that we will kfree_skb().

[   12.590450] Own node address <1.1.1>, network identity 1
[   12.668088] divide error: 0000 [#1] SMP
[   12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1
[   12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014
[   12.682095] task: ffff8c2a761edb80 task.stack: ffffa41cc0cac000
[   12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc]
[   12.686486] RSP: 0018:ffff8c2a7fc838a0 EFLAGS: 00010246
[   12.688451] RAX: 0000000000000000 RBX: ffff8c2a5b382600 RCX: 0000000000000000
[   12.691197] RDX: 0000000000000000 RSI: ffff8c2a5b382600 RDI: ffff8c2a5b382600
[   12.693945] RBP: ffff8c2a7fc838b0 R08: 0000000000000001 R09: 0000000000000001
[   12.696632] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2a5d8949d8
[   12.699491] R13: ffffffff95ede400 R14: 0000000000000000 R15: ffff8c2a5d894800
[   12.702338] FS:  0000000000000000(0000) GS:ffff8c2a7fc80000(0000) knlGS:0000000000000000
[   12.705099] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   12.706776] CR2: 0000000001bb9440 CR3: 00000000bd009001 CR4: 00000000003606e0
[   12.708847] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   12.711016] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   12.712627] Call Trace:
[   12.713390]  <IRQ>
[   12.714011]  tipc_node_check_dest+0x2e8/0x350 [tipc]
[   12.715286]  tipc_disc_rcv+0x14d/0x1d0 [tipc]
[   12.716370]  tipc_rcv+0x8b0/0xd40 [tipc]
[   12.717396]  ? minmax_running_min+0x2f/0x60
[   12.718248]  ? dst_alloc+0x4c/0xa0
[   12.718964]  ? tcp_ack+0xaf1/0x10b0
[   12.719658]  ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc]
[   12.720634]  tipc_udp_recv+0x71/0x1d0 [tipc]
[   12.721459]  ? dst_alloc+0x4c/0xa0
[   12.722130]  udp_queue_rcv_skb+0x264/0x490
[   12.722924]  __udp4_lib_rcv+0x21e/0x990
[   12.723670]  ? ip_route_input_rcu+0x2dd/0xbf0
[   12.724442]  ? tcp_v4_rcv+0x958/0xa40
[   12.725039]  udp_rcv+0x1a/0x20
[   12.725587]  ip_local_deliver_finish+0x97/0x1d0
[   12.726323]  ip_local_deliver+0xaf/0xc0
[   12.726959]  ? ip_route_input_noref+0x19/0x20
[   12.727689]  ip_rcv_finish+0xdd/0x3b0
[   12.728307]  ip_rcv+0x2ac/0x360
[   12.728839]  __netif_receive_skb_core+0x6fb/0xa90
[   12.729580]  ? udp4_gro_receive+0x1a7/0x2c0
[   12.730274]  __netif_receive_skb+0x1d/0x60
[   12.730953]  ? __netif_receive_skb+0x1d/0x60
[   12.731637]  netif_receive_skb_internal+0x37/0xd0
[   12.732371]  napi_gro_receive+0xc7/0xf0
[   12.732920]  receive_buf+0x3c3/0xd40
[   12.733441]  virtnet_poll+0xb1/0x250
[   12.733944]  net_rx_action+0x23e/0x370
[   12.734476]  __do_softirq+0xc5/0x2f8
[   12.734922]  irq_exit+0xfa/0x100
[   12.735315]  do_IRQ+0x4f/0xd0
[   12.735680]  common_interrupt+0xa2/0xa2
[   12.736126]  </IRQ>
[   12.736416] RIP: 0010:native_safe_halt+0x6/0x10
[   12.736925] RSP: 0018:ffffa41cc0cafe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff4d
[   12.737756] RAX: 0000000000000000 RBX: ffff8c2a761edb80 RCX: 0000000000000000
[   12.738504] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[   12.739258] RBP: ffffa41cc0cafe90 R08: 0000014b5b9795e5 R09: ffffa41cc12c7e88
[   12.740118] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
[   12.740964] R13: ffff8c2a761edb80 R14: 0000000000000000 R15: 0000000000000000
[   12.741831]  default_idle+0x2a/0x100
[   12.742323]  arch_cpu_idle+0xf/0x20
[   12.742796]  default_idle_call+0x28/0x40
[   12.743312]  do_idle+0x179/0x1f0
[   12.743761]  cpu_startup_entry+0x1d/0x20
[   12.744291]  start_secondary+0x112/0x120
[   12.744816]  secondary_startup_64+0xa5/0xa5
[   12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00
00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48
89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f
[   12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: ffff8c2a7fc838a0
[   12.748555] ---[ end trace 1399ab83390650fd ]---
[   12.749296] Kernel panic - not syncing: Fatal exception in interrupt
[   12.750123] Kernel Offset: 0x13200000 from 0xffffffff82000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[   12.751215] Rebooting in 60 seconds..

Fixes: c9b64d492b1f ("tipc: add replicast peer discovery")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/udp_media.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..3deabcab4882 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 			goto rcu_out;
 	}
 
-	tipc_rcv(sock_net(sk), skb, b);
-	rcu_read_unlock();
-	return 0;
-
 rcu_out:
 	rcu_read_unlock();
 out:
-- 
cgit v1.2.3


From a0da456bbf95d2a9294799bb05c61bfb24736bb7 Mon Sep 17 00:00:00 2001
From: Max Uvarov <muvarov@gmail.com>
Date: Thu, 30 Nov 2017 13:08:29 +0300
Subject: net: phy-micrel: check return code in flp center function

Fix obvious typo that first return value is set but not checked.

Signed-off-by: Max Uvarov <muvarov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index fdb43dd9b5cd..ab4614113403 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -496,16 +496,18 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev,
 	return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval);
 }
 
+/* Center KSZ9031RNX FLP timing at 16ms. */
 static int ksz9031_center_flp_timing(struct phy_device *phydev)
 {
 	int result;
 
-	/* Center KSZ9031RNX FLP timing at 16ms. */
 	result = ksz9031_extended_write(phydev, OP_DATA, 0,
 					MII_KSZ9031RN_FLP_BURST_TX_HI, 0x0006);
+	if (result)
+		return result;
+
 	result = ksz9031_extended_write(phydev, OP_DATA, 0,
 					MII_KSZ9031RN_FLP_BURST_TX_LO, 0x1A80);
-
 	if (result)
 		return result;
 
-- 
cgit v1.2.3


From acf1c02f023926b8b04672a9e81b1711ae681619 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 30 Nov 2017 13:59:11 +0000
Subject: sfp: fix RX_LOS signal handling

The options word is a be16 quantity, so we need to test the flags
having converted the endian-ness.  Convert the flag bits to be16,
which can be optimised by the compiler, rather than converting a
variable at runtime.

Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index e381811e5f11..3355141688a6 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -355,7 +355,7 @@ static void sfp_sm_link_check_los(struct sfp *sfp)
 	 * SFP_OPTIONS_LOS_NORMAL are set?  For now, we assume
 	 * the same as SFP_OPTIONS_LOS_NORMAL set.
 	 */
-	if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED)
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED))
 		los ^= SFP_F_LOS;
 
 	if (los)
@@ -582,7 +582,8 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 		if (event == SFP_E_TX_FAULT)
 			sfp_sm_fault(sfp, true);
 		else if (event ==
-			 (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
+			 (sfp->id.ext.options &
+			  cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ?
 			  SFP_E_LOS_HIGH : SFP_E_LOS_LOW))
 			sfp_sm_link_up(sfp);
 		break;
@@ -592,7 +593,8 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 			sfp_sm_link_down(sfp);
 			sfp_sm_fault(sfp, true);
 		} else if (event ==
-			   (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
+			   (sfp->id.ext.options &
+			    cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ?
 			    SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) {
 			sfp_sm_link_down(sfp);
 			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
-- 
cgit v1.2.3


From 710dfbb01a8ac0fc8d0cc191131bd84dc3796497 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 30 Nov 2017 13:59:16 +0000
Subject: sfp: improve RX_LOS handling

There are two bits in the option word for the RX_LOS signal.  One
reports that the RX_LOS signal is active high, the other reports that
it is active low.  When both or neither are set, the result is not
well defined in the specification.

Rather than assuming that neither set means normal RX_LOS, take this
as meaning no RX_LOS signal available, thereby ignoring the signal.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 3355141688a6..c1aab6a81ce9 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -351,12 +351,13 @@ static void sfp_sm_link_check_los(struct sfp *sfp)
 {
 	unsigned int los = sfp->state & SFP_F_LOS;
 
-	/* FIXME: what if neither SFP_OPTIONS_LOS_INVERTED nor
-	 * SFP_OPTIONS_LOS_NORMAL are set?  For now, we assume
-	 * the same as SFP_OPTIONS_LOS_NORMAL set.
+	/* If neither SFP_OPTIONS_LOS_INVERTED nor SFP_OPTIONS_LOS_NORMAL
+	 * are set, we assume that no LOS signal is available.
 	 */
 	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED))
 		los ^= SFP_F_LOS;
+	else if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL)))
+		los = 0;
 
 	if (los)
 		sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
@@ -364,6 +365,22 @@ static void sfp_sm_link_check_los(struct sfp *sfp)
 		sfp_sm_link_up(sfp);
 }
 
+static bool sfp_los_event_active(struct sfp *sfp, unsigned int event)
+{
+	return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) &&
+		event == SFP_E_LOS_LOW) ||
+	       (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) &&
+		event == SFP_E_LOS_HIGH);
+}
+
+static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event)
+{
+	return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) &&
+		event == SFP_E_LOS_HIGH) ||
+	       (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) &&
+		event == SFP_E_LOS_LOW);
+}
+
 static void sfp_sm_fault(struct sfp *sfp, bool warn)
 {
 	if (sfp->sm_retries && !--sfp->sm_retries) {
@@ -581,10 +598,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 	case SFP_S_WAIT_LOS:
 		if (event == SFP_E_TX_FAULT)
 			sfp_sm_fault(sfp, true);
-		else if (event ==
-			 (sfp->id.ext.options &
-			  cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ?
-			  SFP_E_LOS_HIGH : SFP_E_LOS_LOW))
+		else if (sfp_los_event_inactive(sfp, event))
 			sfp_sm_link_up(sfp);
 		break;
 
@@ -592,10 +606,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 		if (event == SFP_E_TX_FAULT) {
 			sfp_sm_link_down(sfp);
 			sfp_sm_fault(sfp, true);
-		} else if (event ==
-			   (sfp->id.ext.options &
-			    cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ?
-			    SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) {
+		} else if (sfp_los_event_active(sfp, event)) {
 			sfp_sm_link_down(sfp);
 			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
 		}
-- 
cgit v1.2.3


From ec7681bde6d0e53ea5ef564477c8656fc318023a Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 30 Nov 2017 13:59:21 +0000
Subject: sfp: warn about modules requiring address change sequence

We do not support SFP modules which require the address change sequence
as detailed by SFF 8472 revision 1.22 section 8.9.  Warn when these
modules are inserted, and treat them as SFF8079 modules for ethtool.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index c1aab6a81ce9..9dfc1c4c954f 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -487,6 +487,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		return -EINVAL;
 	}
 
+	/* If the module requires address swap mode, warn about it */
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
+		dev_warn(sfp->dev,
+			 "module address swap to access page 0xA2 is not supported.\n");
+
 	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
 }
 
@@ -652,7 +657,8 @@ static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
 {
 	/* locking... and check module is present */
 
-	if (sfp->id.ext.sff8472_compliance) {
+	if (sfp->id.ext.sff8472_compliance &&
+	    !(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)) {
 		modinfo->type = ETH_MODULE_SFF_8472;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
 	} else {
-- 
cgit v1.2.3


From 2012b7d6b2868c532f22d9172c8b24611637eb48 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 30 Nov 2017 13:59:26 +0000
Subject: phylink: ensure we take the link down when phylink_stop() is called

Ensure that we tell the MAC to take the link down when phylink_stop()
is called, and that this completes prior to phylink_stop() returns.

Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index e3bbc70372d3..5dc9668dde34 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -773,6 +773,7 @@ void phylink_stop(struct phylink *pl)
 		sfp_upstream_stop(pl->sfp_bus);
 
 	set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+	queue_work(system_power_efficient_wq, &pl->resolve);
 	flush_work(&pl->resolve);
 }
 EXPORT_SYMBOL_GPL(phylink_stop);
-- 
cgit v1.2.3


From a7f3f939dd7d8398acebecd1ceb2e9e7ffbe91d2 Mon Sep 17 00:00:00 2001
From: Ray Jui <ray.jui@broadcom.com>
Date: Fri, 1 Dec 2017 03:13:02 -0500
Subject: bnxt_en: Need to unconditionally shut down RoCE in bnxt_shutdown

The current 'bnxt_shutdown' implementation only invokes
'bnxt_ulp_shutdown' to shut down RoCE in the case when the system is in
the path of power off (SYSTEM_POWER_OFF). While this may work in most
cases, it does not work in the smart NIC case, when Linux 'reboot'
command is initiated from the Linux that runs on the ARM cores of the
NIC card. In this particular case, Linux 'reboot' results in a system
'L3' level reset where the entire ARM and associated subsystems are
being reset, but at the same time, Nitro core is being kept in sane state
(to allow external PCIe connected servers to continue to work). Without
properly shutting down RoCE and freeing all associated resources, it
results in the ARM core to hang immediately after the 'reboot'

By always invoking 'bnxt_ulp_shutdown' in 'bnxt_shutdown', it fixes the
above issue

Fixes: 0efd2fc65c92 ("bnxt_en: Add a callback to inform RDMA driver during PCI shutdown.")

Signed-off-by: Ray Jui <ray.jui@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c5c38d4b7d1c..7f173eb42aa2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8263,8 +8263,9 @@ static void bnxt_shutdown(struct pci_dev *pdev)
 	if (netif_running(dev))
 		dev_close(dev);
 
+	bnxt_ulp_shutdown(bp);
+
 	if (system_state == SYSTEM_POWER_OFF) {
-		bnxt_ulp_shutdown(bp);
 		bnxt_clear_int_mode(bp);
 		pci_wake_from_d3(pdev, bp->wol);
 		pci_set_power_state(pdev, PCI_D3hot);
-- 
cgit v1.2.3


From c8fb7b8259c67b86cd93a71c85e78b34d2c96fdc Mon Sep 17 00:00:00 2001
From: Sunil Challa <sunilkumar.challa@broadcom.com>
Date: Fri, 1 Dec 2017 03:13:03 -0500
Subject: bnxt_en: wildcard smac while creating tunnel decap filter

While creating a decap filter the tunnel smac need not (and must not) be
specified as we cannot ascertain the neighbor in the recv path. 'ttl'
match is also not needed for the decap filter and must be wild-carded.

Fixes: f484f6782e01 ("bnxt_en: add hwrm FW cmds for cfa_encap_record and decap_filter")
Signed-off-by: Sunil Challa <sunilkumar.challa@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index d5031f436f83..96bff48af971 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -532,10 +532,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
 	}
 
 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
-		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR |
-			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR;
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
 		ether_addr_copy(req.dst_macaddr, l2_info->dmac);
-		ether_addr_copy(req.src_macaddr, l2_info->smac);
 	}
 	if (l2_info->num_vlans) {
 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
@@ -1012,10 +1010,9 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	if (rc)
 		goto put_decap;
 
-	decap_key->ttl = tun_key.ttl;
 	decap_l2_info = &decap_node->l2_info;
+	/* decap smac is wildcarded */
 	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
-	ether_addr_copy(decap_l2_info->smac, l2_info.dmac);
 	if (l2_info.num_vlans) {
 		decap_l2_info->num_vlans = l2_info.num_vlans;
 		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
-- 
cgit v1.2.3


From e9ecc731a87912d209d6e9b4ed20ed70451c08cb Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathya.perla@broadcom.com>
Date: Fri, 1 Dec 2017 03:13:04 -0500
Subject: bnxt_en: fix dst/src fid for vxlan encap/decap actions

For flows that involve a vxlan encap action, the vxlan sock
interface may be specified as the outgoing interface. The driver
must resolve the outgoing PF interface used by this socket and
use the dst_fid of the PF in the hwrm_cfa_encap_record_alloc cmd.

Similarily for flows that have a vxlan decap action, the
fid of the incoming PF interface must be used as the src_fid in
the hwrm_cfa_decap_filter_alloc cmd.

Fixes: 8c95f773b4a3 ("bnxt_en: add support for Flower based vxlan encap/decap offload")
Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 48 +++++++++++++++-------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 96bff48af971..3d201d7324bd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -56,7 +56,6 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
 {
 	int ifindex = tcf_mirred_ifindex(tc_act);
 	struct net_device *dev;
-	u16 dst_fid;
 
 	dev = __dev_get_by_index(dev_net(bp->dev), ifindex);
 	if (!dev) {
@@ -64,15 +63,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
 		return -EINVAL;
 	}
 
-	/* find the FID from dev */
-	dst_fid = bnxt_flow_get_dst_fid(bp, dev);
-	if (dst_fid == BNXT_FID_INVALID) {
-		netdev_info(bp->dev, "can't get fid for ifindex=%d", ifindex);
-		return -EINVAL;
-	}
-
 	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
-	actions->dst_fid = dst_fid;
 	actions->dst_dev = dev;
 	return 0;
 }
@@ -160,13 +151,17 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 	if (rc)
 		return rc;
 
-	/* Tunnel encap/decap action must be accompanied by a redirect action */
-	if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP ||
-	     actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) &&
-	    !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) {
-		netdev_info(bp->dev,
-			    "error: no redir action along with encap/decap");
-		return -EINVAL;
+	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
+		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
+			/* dst_fid is PF's fid */
+			actions->dst_fid = bp->pf.fw_fid;
+		} else {
+			/* find the FID from dst_dev */
+			actions->dst_fid =
+				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
+			if (actions->dst_fid == BNXT_FID_INVALID)
+				return -EINVAL;
+		}
 	}
 
 	return rc;
@@ -899,10 +894,10 @@ static void bnxt_tc_put_decap_handle(struct bnxt *bp,
 
 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
 				       struct ip_tunnel_key *tun_key,
-				       struct bnxt_tc_l2_key *l2_info,
-				       struct net_device *real_dst_dev)
+				       struct bnxt_tc_l2_key *l2_info)
 {
 #ifdef CONFIG_INET
+	struct net_device *real_dst_dev = bp->dev;
 	struct flowi4 flow = { {0} };
 	struct net_device *dst_dev;
 	struct neighbour *nbr;
@@ -1006,7 +1001,7 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	 */
 	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
 	tun_key.tp_dst = flow->tun_key.tp_dst;
-	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev);
+	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
 	if (rc)
 		goto put_decap;
 
@@ -1092,8 +1087,7 @@ static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
 		goto done;
 
-	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info,
-					 flow->actions.dst_dev);
+	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
 	if (rc)
 		goto put_encap;
 
@@ -1166,6 +1160,15 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
 	return 0;
 }
 
+static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				u16 src_fid)
+{
+	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
+		flow->src_fid = bp->pf.fw_fid;
+	else
+		flow->src_fid = src_fid;
+}
+
 /* Add a new flow or replace an existing flow.
  * Notes on locking:
  * There are essentially two critical sections here.
@@ -1201,7 +1204,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
 	if (rc)
 		goto free_node;
-	flow->src_fid = src_fid;
+
+	bnxt_tc_set_src_fid(bp, flow, src_fid);
 
 	if (!bnxt_tc_can_offload(bp, flow)) {
 		rc = -ENOSPC;
-- 
cgit v1.2.3


From ebd5818cc5d4847897d7fe872e2d9799d7b7fcbb Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Fri, 1 Dec 2017 03:13:05 -0500
Subject: bnxt_en: Fix a variable scoping in bnxt_hwrm_do_send_msg()

short_input variable is assigned to another data pointer which is
referred out of its scope. Fix it by moving short_input definition
to the beginning of bnxt_hwrm_do_send_msg() function.

No failure has been reported so far due to this issue.

Fixes: e605db801bde ("bnxt_en: Support for Short Firmware Message")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7f173eb42aa2..28f5e94274ee 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3368,6 +3368,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	u16 cp_ring_id, len = 0;
 	struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
+	struct hwrm_short_input short_input = {0};
 
 	req->seq_id = cpu_to_le16(bp->hwrm_cmd_seq++);
 	memset(resp, 0, PAGE_SIZE);
@@ -3376,7 +3377,6 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
 	if (bp->flags & BNXT_FLAG_SHORT_CMD) {
 		void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
-		struct hwrm_short_input short_input = {0};
 
 		memcpy(short_cmd_req, req, msg_len);
 		memset(short_cmd_req + msg_len, 0, BNXT_HWRM_MAX_REQ_LEN -
-- 
cgit v1.2.3


From 6e474083f3daf3a3546737f5d7d502ad12eb257c Mon Sep 17 00:00:00 2001
From: Wei Xu <wexu@redhat.com>
Date: Fri, 1 Dec 2017 05:10:36 -0500
Subject: vhost: fix skb leak in handle_rx()

Matthew found a roughly 40% tcp throughput regression with commit
c67df11f(vhost_net: try batch dequing from skb array) as discussed
in the following thread:
https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html

Eventually we figured out that it was a skb leak in handle_rx()
when sending packets to the VM. This usually happens when a guest
can not drain out vq as fast as vhost fills in, afterwards it sets
off the traffic jam and leaks skb(s) which occurs as no headcount
to send on the vq from vhost side.

This can be avoided by making sure we have got enough headcount
before actually consuming a skb from the batched rx array while
transmitting, which is simply done by moving checking the zero
headcount a bit ahead.

Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/net.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8d626d7c2e7e..c7bdeb655646 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -778,16 +778,6 @@ static void handle_rx(struct vhost_net *net)
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			goto out;
-		if (nvq->rx_array)
-			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
-		/* On overrun, truncate and discard */
-		if (unlikely(headcount > UIO_MAXIOV)) {
-			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
-			err = sock->ops->recvmsg(sock, &msg,
-						 1, MSG_DONTWAIT | MSG_TRUNC);
-			pr_debug("Discarded rx packet: len %zd\n", sock_len);
-			continue;
-		}
 		/* OK, now we need to know about added descriptors. */
 		if (!headcount) {
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
@@ -800,6 +790,16 @@ static void handle_rx(struct vhost_net *net)
 			 * they refilled. */
 			goto out;
 		}
+		if (nvq->rx_array)
+			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
+		/* On overrun, truncate and discard */
+		if (unlikely(headcount > UIO_MAXIOV)) {
+			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+			err = sock->ops->recvmsg(sock, &msg,
+						 1, MSG_DONTWAIT | MSG_TRUNC);
+			pr_debug("Discarded rx packet: len %zd\n", sock_len);
+			continue;
+		}
 		/* We don't need to be notified again. */
 		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
 		fixup = msg.msg_iter;
-- 
cgit v1.2.3


From c33ee15b3820a03cf8229ba9415084197b827f8c Mon Sep 17 00:00:00 2001
From: Wei Xu <wexu@redhat.com>
Date: Fri, 1 Dec 2017 05:10:37 -0500
Subject: tun: free skb in early errors

tun_recvmsg() supports accepting skb by msg_control after
commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
the skb if presented should be freed no matter how far it can go
along, otherwise it would be leaked.

This patch fixes several missed cases.

Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 95749006d687..4f4a842a1c9c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1952,8 +1952,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 
 	tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
-	if (!iov_iter_count(to))
+	if (!iov_iter_count(to)) {
+		if (skb)
+			kfree_skb(skb);
 		return 0;
+	}
 
 	if (!skb) {
 		/* Read frames from ring */
@@ -2069,22 +2072,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = tun_get(tfile);
+	struct sk_buff *skb = m->msg_control;
 	int ret;
 
-	if (!tun)
-		return -EBADFD;
+	if (!tun) {
+		ret = -EBADFD;
+		goto out_free_skb;
+	}
 
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
 		ret = -EINVAL;
-		goto out;
+		goto out_put_tun;
 	}
 	if (flags & MSG_ERRQUEUE) {
 		ret = sock_recv_errqueue(sock->sk, m, total_len,
 					 SOL_PACKET, TUN_TX_TIMESTAMP);
 		goto out;
 	}
-	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
-			  m->msg_control);
+	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
 	if (ret > (ssize_t)total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
@@ -2092,6 +2097,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 out:
 	tun_put(tun);
 	return ret;
+
+out_put_tun:
+	tun_put(tun);
+out_free_skb:
+	if (skb)
+		kfree_skb(skb);
+	return ret;
 }
 
 static int tun_peek_len(struct socket *sock)
-- 
cgit v1.2.3


From 61d78537843e676e7f56ac6db333db0c0529b892 Mon Sep 17 00:00:00 2001
From: Wei Xu <wexu@redhat.com>
Date: Fri, 1 Dec 2017 05:10:38 -0500
Subject: tap: free skb if flags error

tap_recvmsg() supports accepting skb by msg_control after
commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
the skb if presented should be freed within the function, otherwise
it would be leaked.

Signed-off-by: Wei Xu <wexu@redhat.com>
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tap.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index e9489b88407c..0a886fda0129 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q,
 	DEFINE_WAIT(wait);
 	ssize_t ret = 0;
 
-	if (!iov_iter_count(to))
+	if (!iov_iter_count(to)) {
+		if (skb)
+			kfree_skb(skb);
 		return 0;
+	}
 
 	if (skb)
 		goto put;
@@ -1154,11 +1157,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
 		       size_t total_len, int flags)
 {
 	struct tap_queue *q = container_of(sock, struct tap_queue, sock);
+	struct sk_buff *skb = m->msg_control;
 	int ret;
-	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
+		if (skb)
+			kfree_skb(skb);
 		return -EINVAL;
-	ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
-			  m->msg_control);
+	}
+	ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
-- 
cgit v1.2.3


From bc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 1 Dec 2017 10:14:49 +0100
Subject: s390/qeth: fix thinko in IPv4 multicast address tracking

Commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
reworked how secondary addresses are managed for qeth devices.
Instead of dropping & subsequently re-adding all addresses on every
ndo_set_rx_mode() call, qeth now keeps track of the addresses that are
currently registered with the HW.
On a ndo_set_rx_mode(), we thus only need to do (de-)registration
requests for the addresses that have actually changed.

On L3 devices, the lookup for IPv4 Multicast addresses checks the wrong
hashtable - and thus never finds a match. As a result, we first delete
*all* such addresses, and then re-add them again. So each set_rx_mode()
causes a short period where the IPv4 Multicast addresses are not
registered, and the card stops forwarding inbound traffic for them.

Fix this by setting the ->is_multicast flag on the lookup object, thus
enabling qeth_l3_ip_from_hash() to search the correct hashtable and
find a match there.

Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index aadd384316a3..e79936b50698 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1376,6 +1376,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
 
 		tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
 		memcpy(tmp->mac, buf, sizeof(tmp->mac));
+		tmp->is_multicast = 1;
 
 		ipm = qeth_l3_ip_from_hash(card, tmp);
 		if (ipm) {
-- 
cgit v1.2.3


From 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 1 Dec 2017 10:14:50 +0100
Subject: s390/qeth: fix GSO throughput regression

Using GSO with small MTUs currently results in a substantial throughput
regression - which is caused by how qeth needs to map non-linear skbs
into its IO buffer elements:
compared to a linear skb, each GSO-segmented skb effectively consumes
twice as many buffer elements (ie two instead of one) due to the
additional header-only part. This causes the Output Queue to be
congested with low-utilized IO buffers.

Fix this as follows:
If the MSS is low enough so that a non-SG GSO segmentation produces
order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is
where we anticipate the biggest savings, since an SG-enabled
GSO segmentation produces skbs that always consume at least two
buffer elements.

Larger MSS values continue to get a SG-enabled GSO segmentation, since
1) the relative overhead of the additional header-only buffer element
becomes less noticeable, and
2) the linearization overhead increases.

With the throughput regression fixed, re-enable NETIF_F_SG by default to
reap the significant CPU savings of GSO.

Fixes: 5722963a8e83 ("qeth: do not turn on SG per default")
Reported-by: Nils Hoppmann <niho@de.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  3 +++
 drivers/s390/net/qeth_core_main.c | 31 +++++++++++++++++++++++++++++++
 drivers/s390/net/qeth_l2_main.c   |  2 ++
 drivers/s390/net/qeth_l3_main.c   |  2 ++
 4 files changed, 38 insertions(+)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 9cd569ef43ec..15015a24f8ad 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -987,6 +987,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
 int qeth_set_features(struct net_device *, netdev_features_t);
 void qeth_recover_features(struct net_device *dev);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features);
 int qeth_vm_request_mac(struct qeth_card *card);
 int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
 
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 49b9efeba1bd..d9b0e07d4fa7 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -19,6 +19,11 @@
 #include <linux/mii.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include <linux/skbuff.h>
+
 #include <net/iucv/af_iucv.h>
 #include <net/dsfield.h>
 
@@ -6438,6 +6443,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(qeth_fix_features);
 
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features)
+{
+	/* GSO segmentation builds skbs with
+	 *	a (small) linear part for the headers, and
+	 *	page frags for the data.
+	 * Compared to a linear skb, the header-only part consumes an
+	 * additional buffer element. This reduces buffer utilization, and
+	 * hurts throughput. So compress small segments into one element.
+	 */
+	if (netif_needs_gso(skb, features)) {
+		/* match skb_segment(): */
+		unsigned int doffset = skb->data - skb_mac_header(skb);
+		unsigned int hsize = skb_shinfo(skb)->gso_size;
+		unsigned int hroom = skb_headroom(skb);
+
+		/* linearize only if resulting skb allocations are order-0: */
+		if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
+			features &= ~NETIF_F_SG;
+	}
+
+	return vlan_features_check(skb, features);
+}
+EXPORT_SYMBOL_GPL(qeth_features_check);
+
 static int __init qeth_core_init(void)
 {
 	int rc;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index d2537c09126d..85162712d207 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -960,6 +960,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_stop		= qeth_l2_stop,
 	.ndo_get_stats		= qeth_get_stats,
 	.ndo_start_xmit		= qeth_l2_hard_start_xmit,
+	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l2_set_rx_mode,
 	.ndo_do_ioctl		= qeth_do_ioctl,
@@ -1010,6 +1011,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
 		card->dev->hw_features = NETIF_F_SG;
 		card->dev->vlan_features = NETIF_F_SG;
+		card->dev->features |= NETIF_F_SG;
 		/* OSA 3S and earlier has no RX/TX support */
 		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
 			card->dev->hw_features |= NETIF_F_IP_CSUM;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e79936b50698..46a841258fc8 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2918,6 +2918,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_stop		= qeth_l3_stop,
 	.ndo_get_stats		= qeth_get_stats,
 	.ndo_start_xmit		= qeth_l3_hard_start_xmit,
+	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l3_set_multicast_list,
 	.ndo_do_ioctl		= qeth_do_ioctl,
@@ -2958,6 +2959,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 				card->dev->vlan_features = NETIF_F_SG |
 					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 					NETIF_F_TSO;
+				card->dev->features |= NETIF_F_SG;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
-- 
cgit v1.2.3


From 0cbff6d4546613330a1c5f139f5c368e4ce33ca1 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 1 Dec 2017 10:14:51 +0100
Subject: s390/qeth: build max size GSO skbs on L2 devices

The current GSO skb size limit was copy&pasted over from the L3 path,
where it is needed due to a TSO limitation.
As L2 devices don't offer TSO support (and thus all GSO skbs are
segmented before they reach the driver), there's no reason to restrict
the stack in how large it may build the GSO skbs.

Fixes: d52aec97e5bc ("qeth: enable scatter/gather in layer 2 mode")
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 2 --
 drivers/s390/net/qeth_l3_main.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 85162712d207..f21c94810373 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1030,8 +1030,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
-	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
-				  PAGE_SIZE;
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	netif_carrier_off(card->dev);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 46a841258fc8..2a25f20566d8 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2987,8 +2987,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
 	netif_keep_dst(card->dev);
-	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
-				  PAGE_SIZE;
+	netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+					  PAGE_SIZE);
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
-- 
cgit v1.2.3


From a98a4ebc8c61d20f0150d6be66e0e65223a347af Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Fri, 1 Dec 2017 09:58:42 +0800
Subject: ipvlan: Add the skb->mark as flow4's member to lookup route

Current codes don't use skb->mark to assign flowi4_mark, it would
make the policy route rule with fwmark doesn't work as expected.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 11c1e7950fe5..77cc4fbaeace 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -393,6 +393,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 		.flowi4_oif = dev->ifindex,
 		.flowi4_tos = RT_TOS(ip4h->tos),
 		.flowi4_flags = FLOWI_FLAG_ANYSRC,
+		.flowi4_mark = skb->mark,
 		.daddr = ip4h->daddr,
 		.saddr = ip4h->saddr,
 	};
-- 
cgit v1.2.3


From 45ab4b13e46325d00f4acdb365d406e941a15f81 Mon Sep 17 00:00:00 2001
From: Lars Persson <lars.persson@axis.com>
Date: Fri, 1 Dec 2017 11:12:44 +0100
Subject: stmmac: reset last TSO segment size after device open

The mss variable tracks the last max segment size sent to the TSO
engine. We do not update the hardware as long as we receive skb:s with
the same value in gso_size.

During a network device down/up cycle (mapped to stmmac_release() and
stmmac_open() callbacks) we issue a reset to the hardware and it
forgets the setting for mss. However we did not zero out our mss
variable so the next transmission of a gso packet happens with an
undefined hardware setting.

This triggers a hang in the TSO engine and eventuelly the netdev
watchdog will bark.

Fixes: f748be531d70 ("stmmac: support new GMAC4")
Signed-off-by: Lars Persson <larper@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f63c2ddced3c..d7250539d0bd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2588,6 +2588,7 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
+	priv->mss = 0;
 
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
-- 
cgit v1.2.3


From 886afc1dc489436bf2c4fadf0f3aecacd7269234 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 1 Dec 2017 12:38:11 +0000
Subject: liquidio: fix incorrect indentation of assignment statement

Remove one extraneous level of indentation on assignment statement.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 6aa0eee88ea5..a5eecd895a82 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1113,7 +1113,7 @@ static int liquidio_watchdog(void *param)
 				dev_err(&oct->pci_dev->dev,
 					"ERROR: Octeon core %d crashed or got stuck!  See oct-fwdump for details.\n",
 					core);
-					err_msg_was_printed[core] = true;
+				err_msg_was_printed[core] = true;
 			}
 		}
 
-- 
cgit v1.2.3


From c501256406fb19c306504ee1fe41a4ea208d4245 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 1 Dec 2017 11:09:53 +0000
Subject: rxrpc: Use correct netns source in rxrpc_release_sock()

In rxrpc_release_sock() there may be no rx->local value to access, so we
can't unconditionally follow it to the rxrpc network namespace information
to poke the connection reapers.

Instead, use the socket's namespace pointer to find the namespace.

This unfixed code causes the following static checker warning:

	net/rxrpc/af_rxrpc.c:898 rxrpc_release_sock()
	error: we previously assumed 'rx->local' could be null (see line 887)

Fixes: 3d18cbb7fd0c ("rxrpc: Fix conn expiry timers")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/af_rxrpc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 8f7cf4c042be..dcd818fa837e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -860,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
 static int rxrpc_release_sock(struct sock *sk)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 
 	_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
 
@@ -895,8 +896,8 @@ static int rxrpc_release_sock(struct sock *sk)
 	rxrpc_release_calls_on_socket(rx);
 	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
-	rxrpc_queue_work(&rx->local->rxnet->service_conn_reaper);
-	rxrpc_queue_work(&rx->local->rxnet->client_conn_reaper);
+	rxrpc_queue_work(&rxnet->service_conn_reaper);
+	rxrpc_queue_work(&rxnet->client_conn_reaper);
 
 	rxrpc_put_local(rx->local);
 	rx->local = NULL;
-- 
cgit v1.2.3


From bcd1d601e5cc760bf5743a59e4716603490e281c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 1 Dec 2017 11:10:37 +0000
Subject: rxrpc: Fix the MAINTAINERS record

Fix the MAINTAINERS record so that it's more obvious who the maintainer for
AF_RXRPC is.

Reported-by: Joe Perches <joe@perches.com>
Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 77d819b458a9..511b858405bc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -554,13 +554,13 @@ S:	Orphan
 F:	Documentation/filesystems/affs.txt
 F:	fs/affs/
 
-AFS FILESYSTEM & AF_RXRPC SOCKET DOMAIN
+AFS FILESYSTEM
 M:	David Howells <dhowells@redhat.com>
 L:	linux-afs@lists.infradead.org
 S:	Supported
 F:	fs/afs/
-F:	include/net/af_rxrpc.h
-F:	net/rxrpc/af_rxrpc.c
+F:	include/trace/events/afs.h
+F:	Documentation/filesystems/afs.txt
 W:	https://www.infradead.org/~dhowells/kafs/
 
 AGPGART DRIVER
@@ -11777,6 +11777,18 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-deve
 S:	Maintained
 F:	drivers/net/wireless/realtek/rtl8xxxu/
 
+RXRPC SOCKETS (AF_RXRPC)
+M:	David Howells <dhowells@redhat.com>
+L:	linux-afs@lists.infradead.org
+S:	Supported
+F:	net/rxrpc/
+F:	include/keys/rxrpc-type.h
+F:	include/net/af_rxrpc.h
+F:	include/trace/events/rxrpc.h
+F:	include/uapi/linux/rxrpc.h
+F:	Documentation/networking/rxrpc.txt
+W:	https://www.infradead.org/~dhowells/kafs/
+
 S3 SAVAGE FRAMEBUFFER DRIVER
 M:	Antonino Daplas <adaplas@gmail.com>
 L:	linux-fbdev@vger.kernel.org
-- 
cgit v1.2.3


From eeea10b83a139451130df1594f26710c8fa390c8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 3 Dec 2017 09:32:59 -0800
Subject: tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()

James Morris reported kernel stack corruption bug [1] while
running the SELinux testsuite, and bisected to a recent
commit bffa72cf7f9d ("net: sk_buff rbnode reorg")

We believe this commit is fine, but exposes an older bug.

SELinux code runs from tcp_filter() and might send an ICMP,
expecting IP options to be found in skb->cb[] using regular IPCB placement.

We need to defer TCP mangling of skb->cb[] after tcp_filter() calls.

This patch adds tcp_v4_fill_cb()/tcp_v4_restore_cb() in a very
similar way we added them for IPv6.

[1]
[  339.806024] SELinux: failure in selinux_parse_skb(), unable to parse packet
[  339.822505] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff81745af5
[  339.822505]
[  339.852250] CPU: 4 PID: 3642 Comm: client Not tainted 4.15.0-rc1-test #15
[  339.868498] Hardware name: LENOVO 10FGS0VA1L/30BC, BIOS FWKT68A   01/19/2017
[  339.885060] Call Trace:
[  339.896875]  <IRQ>
[  339.908103]  dump_stack+0x63/0x87
[  339.920645]  panic+0xe8/0x248
[  339.932668]  ? ip_push_pending_frames+0x33/0x40
[  339.946328]  ? icmp_send+0x525/0x530
[  339.958861]  ? kfree_skbmem+0x60/0x70
[  339.971431]  __stack_chk_fail+0x1b/0x20
[  339.984049]  icmp_send+0x525/0x530
[  339.996205]  ? netlbl_skbuff_err+0x36/0x40
[  340.008997]  ? selinux_netlbl_err+0x11/0x20
[  340.021816]  ? selinux_socket_sock_rcv_skb+0x211/0x230
[  340.035529]  ? security_sock_rcv_skb+0x3b/0x50
[  340.048471]  ? sk_filter_trim_cap+0x44/0x1c0
[  340.061246]  ? tcp_v4_inbound_md5_hash+0x69/0x1b0
[  340.074562]  ? tcp_filter+0x2c/0x40
[  340.086400]  ? tcp_v4_rcv+0x820/0xa20
[  340.098329]  ? ip_local_deliver_finish+0x71/0x1a0
[  340.111279]  ? ip_local_deliver+0x6f/0xe0
[  340.123535]  ? ip_rcv_finish+0x3a0/0x3a0
[  340.135523]  ? ip_rcv_finish+0xdb/0x3a0
[  340.147442]  ? ip_rcv+0x27c/0x3c0
[  340.158668]  ? inet_del_offload+0x40/0x40
[  340.170580]  ? __netif_receive_skb_core+0x4ac/0x900
[  340.183285]  ? rcu_accelerate_cbs+0x5b/0x80
[  340.195282]  ? __netif_receive_skb+0x18/0x60
[  340.207288]  ? process_backlog+0x95/0x140
[  340.218948]  ? net_rx_action+0x26c/0x3b0
[  340.230416]  ? __do_softirq+0xc9/0x26a
[  340.241625]  ? do_softirq_own_stack+0x2a/0x40
[  340.253368]  </IRQ>
[  340.262673]  ? do_softirq+0x50/0x60
[  340.273450]  ? __local_bh_enable_ip+0x57/0x60
[  340.285045]  ? ip_finish_output2+0x175/0x350
[  340.296403]  ? ip_finish_output+0x127/0x1d0
[  340.307665]  ? nf_hook_slow+0x3c/0xb0
[  340.318230]  ? ip_output+0x72/0xe0
[  340.328524]  ? ip_fragment.constprop.54+0x80/0x80
[  340.340070]  ? ip_local_out+0x35/0x40
[  340.350497]  ? ip_queue_xmit+0x15c/0x3f0
[  340.361060]  ? __kmalloc_reserve.isra.40+0x31/0x90
[  340.372484]  ? __skb_clone+0x2e/0x130
[  340.382633]  ? tcp_transmit_skb+0x558/0xa10
[  340.393262]  ? tcp_connect+0x938/0xad0
[  340.403370]  ? ktime_get_with_offset+0x4c/0xb0
[  340.414206]  ? tcp_v4_connect+0x457/0x4e0
[  340.424471]  ? __inet_stream_connect+0xb3/0x300
[  340.435195]  ? inet_stream_connect+0x3b/0x60
[  340.445607]  ? SYSC_connect+0xd9/0x110
[  340.455455]  ? __audit_syscall_entry+0xaf/0x100
[  340.466112]  ? syscall_trace_enter+0x1d0/0x2b0
[  340.476636]  ? __audit_syscall_exit+0x209/0x290
[  340.487151]  ? SyS_connect+0xe/0x10
[  340.496453]  ? do_syscall_64+0x67/0x1b0
[  340.506078]  ? entry_SYSCALL64_slow_path+0x25/0x25

Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: James Morris <james.l.morris@oracle.com>
Tested-by: James Morris <james.l.morris@oracle.com>
Tested-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 59 ++++++++++++++++++++++++++++++++++++-----------------
 net/ipv6/tcp_ipv6.c | 10 +++++----
 2 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6bc0c4d19c6..77ea45da0fe9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_filter);
 
+static void tcp_v4_restore_cb(struct sk_buff *skb)
+{
+	memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
+		sizeof(struct inet_skb_parm));
+}
+
+static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
+			   const struct tcphdr *th)
+{
+	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+	 * barrier() makes sure compiler wont play fool^Waliasing games.
+	 */
+	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+		sizeof(struct inet_skb_parm));
+	barrier();
+
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff * 4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
+	TCP_SKB_CB(skb)->sacked	 = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+}
+
 /*
  *	From tcp_input.c
  */
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	th = (const struct tcphdr *)skb->data;
 	iph = ip_hdr(skb);
-	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
-	 * barrier() makes sure compiler wont play fool^Waliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
-		sizeof(struct inet_skb_parm));
-	barrier();
-
-	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
-	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
-				    skb->len - th->doff * 4);
-	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
-	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
-	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
-	TCP_SKB_CB(skb)->sacked	 = 0;
-	TCP_SKB_CB(skb)->has_rxtstamp =
-			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
-
 lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
 			       th->dest, sdif, &refcounted);
@@ -1679,14 +1689,19 @@ process:
 		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
-		if (!tcp_filter(sk, skb))
+		if (!tcp_filter(sk, skb)) {
+			th = (const struct tcphdr *)skb->data;
+			iph = ip_hdr(skb);
+			tcp_v4_fill_cb(skb, iph, th);
 			nsk = tcp_check_req(sk, skb, req, false);
+		}
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
 			reqsk_put(req);
+			tcp_v4_restore_cb(skb);
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v4_send_reset(nsk, skb);
 			goto discard_and_relse;
@@ -1712,6 +1727,7 @@ process:
 		goto discard_and_relse;
 	th = (const struct tcphdr *)skb->data;
 	iph = ip_hdr(skb);
+	tcp_v4_fill_cb(skb, iph, th);
 
 	skb->dev = NULL;
 
@@ -1742,6 +1758,8 @@ no_tcp_socket:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	tcp_v4_fill_cb(skb, iph, th);
+
 	if (tcp_checksum_complete(skb)) {
 csum_error:
 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@@ -1768,6 +1786,8 @@ do_time_wait:
 		goto discard_it;
 	}
 
+	tcp_v4_fill_cb(skb, iph, th);
+
 	if (tcp_checksum_complete(skb)) {
 		inet_twsk_put(inet_twsk(sk));
 		goto csum_error;
@@ -1784,6 +1804,7 @@ do_time_wait:
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
+			tcp_v4_restore_cb(skb);
 			refcounted = false;
 			goto process;
 		}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index be11dc13aa70..1f04ec0e4a7a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1454,7 +1454,6 @@ process:
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		tcp_v6_fill_cb(skb, hdr, th);
 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
 			sk_drops_add(sk, skb);
 			reqsk_put(req);
@@ -1467,8 +1466,12 @@ process:
 		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
-		if (!tcp_filter(sk, skb))
+		if (!tcp_filter(sk, skb)) {
+			th = (const struct tcphdr *)skb->data;
+			hdr = ipv6_hdr(skb);
+			tcp_v6_fill_cb(skb, hdr, th);
 			nsk = tcp_check_req(sk, skb, req, false);
+		}
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
@@ -1492,8 +1495,6 @@ process:
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	tcp_v6_fill_cb(skb, hdr, th);
-
 	if (tcp_v6_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
 
@@ -1501,6 +1502,7 @@ process:
 		goto discard_and_relse;
 	th = (const struct tcphdr *)skb->data;
 	hdr = ipv6_hdr(skb);
+	tcp_v6_fill_cb(skb, hdr, th);
 
 	skb->dev = NULL;
 
-- 
cgit v1.2.3


From b4d1605a8ea608fd7dc45b926a05d75d340bde4b Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 3 Dec 2017 09:33:00 -0800
Subject: tcp: use IPCB instead of TCP_SKB_CB in inet_exact_dif_match()

After this fix : ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"),
socket lookups happen while skb->cb[] has not been mangled yet by TCP.

Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4e09398009c1..6998707e81f3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -844,12 +844,11 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb)
 }
 #endif
 
-/* TCP_SKB_CB reference means this can not be used from early demux */
 static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
 	if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
-	    skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+	    skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
 		return true;
 #endif
 	return false;
-- 
cgit v1.2.3